File Coverage

blib/lib/PPI/Token/Word.pm

Criterion	Covered	Total	%
statement	102	119	85.7
branch	58	72	80.5
condition	42	45	93.3
subroutine	8	8	100.0
pod	2	2	100.0
total	212	246	86.1

line	stmt	bran	cond	sub	pod	time	code
1							package PPI::Token::Word;
2
3							=pod
4
5							=head1 NAME
6
7							PPI::Token::Word - The generic "word" Token
8
9							=head1 INHERITANCE
10
11							PPI::Token::Word
12							isa PPI::Token
13							isa PPI::Element
14
15							=head1 DESCRIPTION
16
17							A C<PPI::Token::Word> object is a PPI-specific representation of several
18							different types of word-like things, and is one of the most common Token
19							classes found in typical documents.
20
21							Specifically, it includes not only barewords, but also any other valid
22							Perl identifier including non-operator keywords and core functions, and
23							any include C<::> separators inside it, as long as it fits the
24							format of a class, function, etc.
25
26							=head1 METHODS
27
28							There are no methods available for C<PPI::Token::Word> beyond those
29							provided by its L<PPI::Token> and L<PPI::Element> parent
30							classes.
31
32							We expect to add additional methods to help further resolve a Word as
33							a function, method, etc over time. If you need such a thing right
34							now, look at L<Perl::Critic::Utils>.
35
36							=cut
37
38	67			67		377	use strict;
	67					98
	67					1937
39	67			67		249	use PPI::Token ();
	67					81
	67					1313
40	67			67		207	use PPI::Singletons qw' %OPERATOR %QUOTELIKE %KEYWORDS ';
	67					102
	67					121957
41
42							our $VERSION = '1.284';
43
44							our @ISA = "PPI::Token";
45
46							=pod
47
48							=head2 literal
49
50							Returns the value of the Word as a string. This assumes (often
51							incorrectly) that the Word is a bareword and not a function, method,
52							keyword, etc. This differs from C<content> because C<Foo'Bar> expands
53							to C<Foo::Bar>.
54
55							=cut
56
57							sub literal {
58	7			7	1	882	my $self = shift;
59	7					18	my $word = $self->content;
60
61							# Expand Foo'Bar to Foo::Bar
62	7					16	$word =~ s/\'/::/g;
63
64	7					23	return $word;
65							}
66
67							=pod
68
69							=head2 method_call
70
71							Answers whether this is the name of a method in a method call. Returns true if
72							yes, false if no, and nothing if unknown.
73
74							=cut
75
76							sub method_call {
77	22			22	1	4864	my $self = shift;
78
79	22					54	my $previous = $self->sprevious_sibling;
80	22	100	100			86	if (
			100
81							$previous
82							and
83							$previous->isa('PPI::Token::Operator')
84							and
85							$previous->content eq '->'
86							) {
87	4					14	return 1;
88							}
89
90	18					36	my $snext = $self->snext_sibling;
91	18	100				39	return 0 unless $snext;
92
93	15	100	100			138	if (
			100
			100
			100
94							$snext->isa('PPI::Structure::List')
95							or
96							$snext->isa('PPI::Token::Structure')
97							or
98							$snext->isa('PPI::Token::Operator')
99							and (
100							$snext->content eq ','
101							or
102							$snext->content eq '=>'
103							)
104							) {
105	9					46	return 0;
106							}
107
108	6	100	100			20	if (
109							$snext->isa('PPI::Token::Word')
110							and
111							$snext->content =~ m< \w :: \z >xms
112							) {
113	1					3	return 1;
114							}
115
116	5					17	return;
117							}
118
119
120							sub __TOKENIZER__on_char {
121	18			18		47	my $class = shift;
122	18					32	my $t = shift;
123
124							# Suck in till the end of the bareword
125	18					62	pos $t->{line} = $t->{line_cursor};
126	18	100				143	if ( $t->{line} =~ m/\G(\w+(?:(?:\'\|::)\w+)*(?:::)?)/gc ) {
127	11					49	my $word = $1;
128							# Special Case: If we accidentally treat eq'foo' like
129							# the word "eq'foo", then just make 'eq' (or whatever
130							# else is in the %KEYWORDS hash.
131	11	50	66			48	if ( $word =~ /^(\w+)'/ && $KEYWORDS{$1} ) {
132	1					3	$word = $1;
133							}
134	11					28	$t->{token}->{content} .= $word;
135	11					25	$t->{line_cursor} += length $word;
136
137							}
138
139							# We might be a subroutine attribute.
140	18	50				60	if ( __current_token_is_attribute($t) ) {
141	0					0	$t->{class} = $t->{token}->set_class( 'Attribute' );
142	0					0	return $t->{class}->__TOKENIZER__commit( $t );
143							}
144
145	18					83	my $word = $t->{token}->{content};
146	18	50				69	if ( $KEYWORDS{$word} ) {
147							# Check for a Perl keyword that is forced to be a normal word instead
148	0	0				0	if ( $t->__current_token_is_forced_word ) {
149	0					0	$t->{class} = $t->{token}->set_class( 'Word' );
150	0					0	return $t->{class}->__TOKENIZER__on_char( $t );
151							}
152
153							# Check for a quote like operator. %QUOTELIKE must be subset of %KEYWORDS
154	0	0				0	if ( $QUOTELIKE{$word} ) {
155	0					0	$t->{class} = $t->{token}->set_class( $QUOTELIKE{$word} );
156	0					0	return $t->{class}->__TOKENIZER__on_char( $t );
157							}
158
159							# Or one of the word operators. %OPERATOR must be subset of %KEYWORDS
160	0	0				0	if ( $OPERATOR{$word} ) {
161	0					0	$t->{class} = $t->{token}->set_class( 'Operator' );
162	0					0	return $t->_finalize_token->__TOKENIZER__on_char( $t );
163							}
164							}
165
166							# Unless this is a simple identifier, at this point
167							# it has to be a normal bareword
168	18	100				85	if ( $word =~ /\:/ ) {
169	10					23	return $t->_finalize_token->__TOKENIZER__on_char( $t );
170							}
171
172							# If the NEXT character in the line is a colon, this
173							# is a label.
174	8					25	my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
175	8	50				57	if ( $char eq ':' ) {
		100
176	0					0	$t->{token}->{content} .= ':';
177	0					0	$t->{line_cursor}++;
178	0					0	$t->{class} = $t->{token}->set_class( 'Label' );
179
180							# If not a label, '_' on its own is the magic filehandle
181							} elsif ( $word eq '_' ) {
182	1					7	$t->{class} = $t->{token}->set_class( 'Magic' );
183
184							}
185
186							# Finalise and process the character again
187	8					29	$t->_finalize_token->__TOKENIZER__on_char( $t );
188							}
189
190
191
192							# We are committed to being a bareword.
193							# Or so we would like to believe.
194							sub __TOKENIZER__commit {
195	56605			56605		84563	my ($class, $t) = @_;
196
197							# Our current position is the first character of the bareword.
198							# Capture the bareword.
199	56605					150235	pos $t->{line} = $t->{line_cursor};
200	56605	50				302652	unless ( $t->{line} =~ m/\G((?!\d)\w+(?:(?:\'\|::)\w+)*(?:::)?)/gc ) {
201							# Programmer error
202	0					0	die sprintf "Fatal error... regex failed to match in '%s' when expected", substr $t->{line}, $t->{line_cursor};
203							}
204
205							# Special Case: If we accidentally treat eq'foo' like the word "eq'foo",
206							# then unwind it and just make it 'eq' (or the other stringy comparitors)
207	56605					119774	my $word = $1;
208	56605	100	100			119161	if ( $word =~ /^(\w+)'/ && $KEYWORDS{$1} ) {
209	260					460	$word = $1;
210							}
211
212							# Advance the position one after the end of the bareword
213	56605					76740	$t->{line_cursor} += length $word;
214
215							# We might be a subroutine attribute.
216	56605	100				101071	if ( __current_token_is_attribute($t) ) {
217	1065					2281	$t->_new_token( 'Attribute', $word );
218							return ($t->{line_cursor} >= $t->{line_length}) ? 0
219	1065	50				3826	: $t->{class}->__TOKENIZER__on_char($t);
220							}
221
222							# Check for the end of the file
223	55540	100				114102	if ( $word eq '__END__' ) {
224							# Create the token for the __END__ itself
225	8					26	$t->_new_token( 'Separator', $1 );
226	8					24	$t->_finalize_token;
227
228							# Move into the End zone (heh)
229	8					15	$t->{zone} = 'PPI::Token::End';
230
231							# Add the rest of the line as a comment, and a whitespace newline
232							# Anything after the __END__ on the line is "ignored". So we must
233							# also ignore it, by turning it into a comment.
234	8					18	my $end_rest = substr( $t->{line}, $t->{line_cursor} );
235	8					15	$t->{line_cursor} = length $t->{line};
236	8	100				32	if ( $end_rest =~ /\n$/ ) {
237	6					15	chomp $end_rest;
238	6	100				20	$t->_new_token( 'Comment', $end_rest ) if length $end_rest;
239	6					17	$t->_new_token( 'Whitespace', "\n" );
240							} else {
241	2	100				7	$t->_new_token( 'Comment', $end_rest ) if length $end_rest;
242							}
243	8					20	$t->_finalize_token;
244
245	8					26	return 0;
246							}
247
248							# Check for the data section
249	55532	100				86258	if ( $word eq '__DATA__' ) {
250							# Create the token for the __DATA__ itself
251	6					26	$t->_new_token( 'Separator', "$1" );
252	6					17	$t->_finalize_token;
253
254							# Move into the Data zone
255	6					11	$t->{zone} = 'PPI::Token::Data';
256
257							# Add the rest of the line as the Data token
258	6					13	my $data_rest = substr( $t->{line}, $t->{line_cursor} );
259	6					12	$t->{line_cursor} = length $t->{line};
260	6	100				23	if ( $data_rest =~ /\n$/ ) {
261	4					11	chomp $data_rest;
262	4	100				14	$t->_new_token( 'Comment', $data_rest ) if length $data_rest;
263	4					12	$t->_new_token( 'Whitespace', "\n" );
264							} else {
265	2	100				5	$t->_new_token( 'Comment', $data_rest ) if length $data_rest;
266							}
267	6					29	$t->_finalize_token;
268
269	6					18	return 0;
270							}
271
272	55526					63290	my $token_class;
273	55526	100	100			267521	if ( $word =~ /\:/ ) {
		100	100
		100	66
		100
274							# Since it's not a simple identifier...
275	1293					2047	$token_class = 'Word';
276
277							} elsif ( $KEYWORDS{$word} and $t->__current_token_is_forced_word ) {
278	6624					9868	$token_class = 'Word';
279
280							} elsif ( $QUOTELIKE{$word} ) {
281							# Special Case: A Quote-like operator
282	2611					6892	$t->_new_token( $QUOTELIKE{$word}, $word );
283							return ($t->{line_cursor} >= $t->{line_length}) ? 0
284	2611	50				11276	: $t->{class}->__TOKENIZER__on_char( $t );
285
286							} elsif ( $OPERATOR{$word} && ($word ne 'x' \|\| $t->_current_x_is_operator) ) {
287							# Word operator
288	1648					2620	$token_class = 'Operator';
289
290							} else {
291							# Get tokens early to be sure to not disturb state set up by pos and m//gc.
292	43350					76448	my @tokens = $t->_previous_significant_tokens(1);
293
294							# If the next character is a ':' then it's a label...
295	43350					82495	pos $t->{line} = $t->{line_cursor};
296	43350	100				144051	if ( $t->{line} =~ m/\G(\s*:)(?!:)/gc ) {
		100
297	1374	100	100			8784	if ( $tokens[0] and $tokens[0]->{content} eq 'sub' ) {
		100	100
298							# ... UNLESS it's after 'sub' in which
299							# case it is a sub name and an attribute
300							# operator.
301							# We COULD have checked this at the top
302							# level of checks, but this would impose
303							# an additional performance per-word
304							# penalty, and every other case where the
305							# attribute operator doesn't directly
306							# touch the object name already works.
307	624					949	$token_class = 'Word';
308							} elsif ( !($tokens[0] and $tokens[0]->isa('PPI::Token::Operator')) ) {
309	442					1192	$word .= $1;
310	442					856	$t->{line_cursor} += length($1);
311	442					779	$token_class = 'Label';
312							} else {
313	308					549	$token_class = 'Word';
314							}
315							} elsif ( $word eq '_' ) {
316	433					922	$token_class = 'Magic';
317							} else {
318	41543					58740	$token_class = 'Word';
319							}
320							}
321
322							# Create the new token and finalise
323	52915					118015	$t->_new_token( $token_class, $word );
324	52915	50				97974	if ( $t->{line_cursor} >= $t->{line_length} ) {
325							# End of the line
326	0					0	$t->_finalize_token;
327	0					0	return 0;
328							}
329	52915					85837	$t->_finalize_token->__TOKENIZER__on_char($t);
330							}
331
332
333
334							# Is the current Word really a subroutine attribute?
335							sub __current_token_is_attribute {
336	56623			56623		73876	my ( $t ) = @_;
337	56623					119151	my @tokens = $t->_previous_significant_tokens(1);
338							return (
339							$tokens[0]
340							and (
341							# hint from tokenizer
342							$tokens[0]->{_attribute}
343							# nothing between attribute and us except whitespace
344	56623		66			411224	or $tokens[0]->isa('PPI::Token::Attribute')
345							)
346							);
347							}
348
349							1;
350
351							=pod
352
353							=head1 TO DO
354
355							- Add C<function>, C<method> etc detector methods
356
357							=head1 SUPPORT
358
359							See the L<support section\|PPI/SUPPORT> in the main module.
360
361							=head1 AUTHOR
362
363							Adam Kennedy E<lt>adamk@cpan.orgE<gt>
364
365							=head1 COPYRIGHT
366
367							Copyright 2001 - 2011 Adam Kennedy.
368
369							This program is free software; you can redistribute
370							it and/or modify it under the same terms as Perl itself.
371
372							The full text of the license can be found in the
373							LICENSE file included with this module.
374
375							=cut