File Coverage

blib/lib/PPI/Token/HereDoc.pm

Criterion	Covered	Total	%
statement	74	76	97.3
branch	41	42	97.6
condition	5	6	83.3
subroutine	7	8	87.5
pod	3	3	100.0
total	130	135	96.3

line	stmt	bran	cond	sub	pod	time	code
1							package PPI::Token::HereDoc;
2
3							=pod
4
5							=head1 NAME
6
7							PPI::Token::HereDoc - Token class for the here-doc
8
9							=head1 INHERITANCE
10
11							PPI::Token::HereDoc
12							isa PPI::Token
13							isa PPI::Element
14
15							=head1 DESCRIPTION
16
17							Here-docs are incredibly handy when writing Perl, but incredibly tricky
18							when parsing it, primarily because they don't follow the general flow of
19							input.
20
21							They jump ahead and nab lines directly off the input buffer. Whitespace
22							and newlines may not matter in most Perl code, but they matter in here-docs.
23
24							They are also tricky to store as an object. They look sort of like an
25							operator and a string, but they don't act like it. And they have a second
26							section that should be something like a separate token, but isn't because a
27							string can span from above the here-doc content to below it.
28
29							So when parsing, this is what we do.
30
31							Firstly, the PPI::Token::HereDoc object, does not represent the C<<< << >>>
32							operator, or the "END_FLAG", or the content, or even the terminator.
33
34							It represents all of them at once.
35
36							The token itself has only the declaration part as its "content".
37
38							# This is what the content of a HereDoc token is
39							<
40
41							# Or this
42							<<"FOO"
43
44							# Or even this
45							<< 'FOO'
46
47							That is, the "operator", any whitespace separator, and the quoted or bare
48							terminator. So when you call the C method on a HereDoc token, you
49							get '<< "FOO"'.
50
51							As for the content and the terminator, when treated purely in "content" terms
52							they do not exist.
53
54							The content is made available with the C method, and the name of
55							the terminator with the C method.
56
57							To make things work in the way you expect, PPI has to play some games
58							when doing line/column location calculation for tokens, and also during
59							the content parsing and generation processes.
60
61							Documents cannot simply by recreated by stitching together the token
62							contents, and involve a somewhat more expensive procedure, but the extra
63							expense should be relatively negligible unless you are doing huge
64							quantities of them.
65
66							Please note that due to the immature nature of PPI in general, we expect
67							C to be a rich (bad) source of corner-case bugs for quite a while,
68							but for the most part they should more or less DWYM.
69
70							=head2 Comparison to other string types
71
72							Although technically it can be considered a quote, for the time being
73							C are being treated as a completely separate C subclass,
74							and will not be found in a search for L or
75							L objects.
76
77							This may change in the future, with it most likely to end up under
78							QuoteLike.
79
80							=head1 METHODS
81
82							Although it has the standard set of C methods, C objects
83							have a relatively large number of unique methods all of their own.
84
85							=cut
86
87	65			65		417	use strict;
	65					110
	65					62968
88
89							our $VERSION = '1.276';
90
91							our @ISA = "PPI::Token";
92
93
94
95
96
97							#####################################################################
98							# PPI::Token::HereDoc Methods
99
100							=pod
101
102							=head2 heredoc
103
104							The C method is the authoritative method for accessing the contents
105							of the C object.
106
107							It returns the contents of the here-doc as a list of newline-terminated
108							strings. If called in scalar context, it returns the number of lines in
109							the here-doc, B the terminator line.
110
111							=cut
112
113	667			667	1	50175	sub heredoc { @{shift->{_heredoc}} }
	667					1776
114
115							=pod
116
117							=head2 indentation
118
119							The C method returns the indentation string of an indented
120							here-doc if that can be determined. If the indented here-doc is damaged
121							(say, missing terminator) or the here-doc was not indented, it returns
122							C.
123
124							=cut
125
126	603			603	1	2192	sub indentation { shift->{_indentation} }
127
128							=pod
129
130							=head2 terminator
131
132							The C method returns the name of the terminating string for the
133							here-doc.
134
135							Returns the terminating string as an unescaped string (in the rare case
136							the terminator has an escaped quote in it).
137
138							=cut
139
140							sub terminator {
141	0			0	1	0	shift->{_terminator};
142							}
143
144							sub _is_terminator {
145	1431			1431		2678	my ( $self, $terminator, $line, $indented ) = @_;
146	1431	100				2131	if ( $indented ) {
147	729					10613	return $line =~ /^\s*\Q$terminator\E$/;
148							} else {
149	702					1478	return $line eq $terminator;
150							}
151							}
152
153							sub _indent {
154	16			16		26	my ( $self, $token ) = @_;
155	16					43	my ($indent) = $token->{_terminator_line} =~ /^(\s*)/;
156	16					30	return $indent;
157							}
158
159							sub _is_match_indent {
160	16			16		26	my ( $self, $token, $indent ) = @_;
161	16	100				19	return (grep { /^$indent/ \|\| $_ eq "\n" } @{$token->{_heredoc}}) == @{$token->{_heredoc}};
	33					132
	16					29
	16					61
162							}
163
164
165
166
167							#####################################################################
168							# Tokenizer Methods
169
170							# Parse in the entire here-doc in one call
171							sub __TOKENIZER__on_char {
172	717			717		1449	my ( $self, $t ) = @_;
173
174							# We are currently located on the first char after the <<
175
176							# Handle the most common form first for simplicity and speed reasons
177							### FIXME - This regex, and this method in general, do not yet allow
178							### for the null here-doc, which terminates at the first
179							### empty line.
180	717					1519	pos $t->{line} = $t->{line_cursor};
181
182	717	100				3377	if ( $t->{line} !~ m/\G( ~? \s* (?: "[^"]" \| '[^']' \| `[^`]*` \| \\?\w+ ) )/gcx ) {
183							# Degenerate to a left-shift operation
184	99					278	$t->{token}->set_class('Operator');
185	99					202	return $t->_finalize_token->__TOKENIZER__on_char( $t );
186							}
187
188							# Add the rest of the token, work out what type it is,
189							# and suck in the content until the end.
190	618					977	my $token = $t->{token};
191	618					1960	$token->{content} .= $1;
192	618					1032	$t->{line_cursor} += length $1;
193
194							# Find the terminator, clean it up and determine
195							# the type of here-doc we are dealing with.
196	618					988	my $content = $token->{content};
197	618	100				3554	if ( $content =~ /^\<\<(~?)(\w+)$/ ) {
		100
		100
		100
		50
198							# Bareword
199	133					251	$token->{_mode} = 'interpolate';
200	133	100				278	$token->{_indented} = 1 if $1 eq '~';
201	133					277	$token->{_terminator} = $2;
202
203							} elsif ( $content =~ /^\<\<(~?)\s\'(.)\'$/ ) {
204							# ''-quoted literal
205	245					664	$token->{_mode} = 'literal';
206	245	100				827	$token->{_indented} = 1 if $1 eq '~';
207	245					603	$token->{_terminator} = $2;
208	245					604	$token->{_terminator} =~ s/\\'/'/g;
209
210							} elsif ( $content =~ /^\<\<(~?)\s\"(.)\"$/ ) {
211							# ""-quoted literal
212	38					87	$token->{_mode} = 'interpolate';
213	38	100				117	$token->{_indented} = 1 if $1 eq '~';
214	38					98	$token->{_terminator} = $2;
215	38					98	$token->{_terminator} =~ s/\\"/"/g;
216
217							} elsif ( $content =~ /^\<\<(~?)\s\`(.)\`$/ ) {
218							# ``-quoted command
219	182					329	$token->{_mode} = 'command';
220	182	100				462	$token->{_indented} = 1 if $1 eq '~';
221	182					367	$token->{_terminator} = $2;
222	182					320	$token->{_terminator} =~ s/\\`/`/g;
223
224							} elsif ( $content =~ /^\<\<(~?)\\(\w+)$/ ) {
225							# Legacy forward-slashed bareword
226	20					41	$token->{_mode} = 'literal';
227	20	100				55	$token->{_indented} = 1 if $1 eq '~';
228	20					49	$token->{_terminator} = $2;
229
230							} else {
231							# WTF?
232	0					0	return undef;
233							}
234
235							# Suck in the HEREDOC
236	618					1167	$token->{_heredoc} = \my @heredoc;
237	618					1253	my $terminator = $token->{_terminator} . "\n";
238	618					1504	while ( defined( my $line = $t->_get_line ) ) {
239	1143	100				2465	if ( $self->_is_terminator( $terminator, $line, $token->{_indented} ) ) {
240							# Keep the actual termination line for consistency
241							# when we are re-assembling the file
242	147					335	$token->{_terminator_line} = $line;
243
244	147	100				348	if ( $token->{_indented} ) {
245	10					20	my $indent = $self->_indent( $token );
246	10					23	$token->{_indentation} = $indent;
247							# Indentation of here-doc doesn't match delimiter
248	10	100				26	unless ( $self->_is_match_indent( $token, $indent ) ) {
249	1					2	push @heredoc, $line;
250	1					2	last;
251							}
252
253	9					81	s/^$indent// for @heredoc, $token->{_terminator_line};
254							}
255
256							# The HereDoc is now fully parsed
257	146					324	return $t->_finalize_token->__TOKENIZER__on_char( $t );
258							}
259
260							# Add the line
261	996					2766	push @heredoc, $line;
262							}
263
264							# End of file.
265							# Error: Didn't reach end of here-doc before end of file.
266
267							# If the here-doc block is not empty, look at the last line to determine if
268							# the here-doc terminator is missing a newline (which Perl would fail to
269							# compile but is easy to detect) or if the here-doc block was just not
270							# terminated at all (which Perl would fail to compile as well).
271	472					784	$token->{_terminator_line} = undef;
272	472	100	66			1754	if ( @heredoc and defined $heredoc[-1] ) {
273							# See PPI::Tokenizer, the algorithm there adds a space at the end of the
274							# document that we need to make sure we remove.
275	288	100				679	if ( $t->{source_eof_chop} ) {
276	272					451	chop $heredoc[-1];
277	272					457	$t->{source_eof_chop} = '';
278							}
279
280							# Check if the last line of the file matches the terminator without
281							# newline at the end. If so, remove it from the content and set it as
282							# the terminator line.
283							$token->{_terminator_line} = pop @heredoc
284	288	100				660	if $self->_is_terminator( $token->{_terminator}, $heredoc[-1], $token->{_indented} );
285							}
286
287	472	100	100			1785	if ( $token->{_indented} && $token->{_terminator_line} ) {
288	6					11	my $indent = $self->_indent( $token );
289	6					10	$token->{_indentation} = $indent;
290	6	100				12	if ( $self->_is_match_indent( $token, $indent ) ) {
291							# Remove indent from here-doc as much as possible
292	5					35	s/^$indent// for @heredoc;
293							}
294
295	6					28	s/^$indent// for $token->{_terminator_line};
296							}
297
298							# Set a hint for PPI::Document->serialize so it can
299							# inexpensively repair it if needed when writing back out.
300	472					745	$token->{_damaged} = 1;
301
302							# The HereDoc is not fully parsed
303	472					998	$t->_finalize_token->__TOKENIZER__on_char( $t );
304							}
305
306							1;
307
308							=pod
309
310							=head1 TO DO
311
312							- Implement PPI::Token::Quote interface compatibility
313
314							- Check CPAN for any use of the null here-doc or here-doc-in-s///e
315
316							- Add support for the null here-doc
317
318							- Add support for here-doc in s///e
319
320							=head1 SUPPORT
321
322							See the L in the main module.
323
324							=head1 AUTHOR
325
326							Adam Kennedy Eadamk@cpan.orgE
327
328							=head1 COPYRIGHT
329
330							Copyright 2001 - 2011 Adam Kennedy.
331
332							This program is free software; you can redistribute
333							it and/or modify it under the same terms as Perl itself.
334
335							The full text of the license can be found in the
336							LICENSE file included with this module.
337
338							=cut