File Coverage

blib/lib/PPI/Document.pm
Criterion Covered Total %
statement 252 275 91.6
branch 90 128 70.3
condition 18 29 62.0
subroutine 47 51 92.1
pod 20 24 83.3
total 427 507 84.2


line stmt bran cond sub pod time code
1             package PPI::Document;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Document - Object representation of a Perl document
8              
9             =head1 INHERITANCE
10              
11             PPI::Document
12             isa PPI::Node
13             isa PPI::Element
14              
15             =head1 SYNOPSIS
16              
17             use PPI;
18            
19             # Load a document from a file
20             my $Document = PPI::Document->new('My/Module.pm');
21            
22             # Strip out comments
23             $Document->prune('PPI::Token::Comment');
24            
25             # Find all the named subroutines
26             my $sub_nodes = $Document->find(
27             sub { $_[1]->isa('PPI::Statement::Sub') and $_[1]->name }
28             );
29             my @sub_names = map { $_->name } @$sub_nodes;
30            
31             # Save the file
32             $Document->save('My/Module.pm.stripped');
33              
34             =head1 DESCRIPTION
35              
36             The C class represents a single Perl "document". A
37             C object acts as a root L, with some
38             additional methods for loading and saving, and working with
39             the line/column locations of Elements within a file.
40              
41             The exemption to its L-like behavior this is that a
42             C object can NEVER have a parent node, and is always
43             the root node in a tree.
44              
45             =head2 Storable Support
46              
47             C implements the necessary C and
48             C hooks to provide native support for L,
49             if you have it installed.
50              
51             However if you want to clone a Document, you are highly recommended
52             to use the C<$Document-Eclone> method rather than Storable's
53             C function (although C should still work).
54              
55             =head1 METHODS
56              
57             Most of the things you are likely to want to do with a Document are
58             probably going to involve the methods from L class, of which
59             this is a subclass.
60              
61             The methods listed here are the remaining few methods that are truly
62             Document-specific.
63              
64             =cut
65              
66 68     68   1256449 use strict;
  68         112  
  68         1991  
67 68     68   248 use Carp ();
  68         146  
  68         1271  
68 68     68   233 use List::Util 1.33 ();
  68         1130  
  68         1718  
69 68     68   5594 use Params::Util 1.00 qw{_SCALAR0 _ARRAY0 _INSTANCE};
  68         67244  
  68         3723  
70 68     68   357 use Digest::MD5 ();
  68         98  
  68         945  
71 68     68   6239 use PPI::Util ();
  68         115  
  68         1040  
72 68     68   7225 use PPI ();
  68         129  
  68         3378  
73 68     68   1011 use PPI::Node ();
  68         973  
  68         938  
74 68     68   32626 use YAML::PP ();
  68         4147531  
  68         2459  
75              
76 68     68   478 use overload 'bool' => \&PPI::Util::TRUE;
  68         113  
  68         324  
77 68     68   3350 use overload '""' => 'content';
  68         119  
  68         211  
78              
79             our $VERSION = '1.291';
80              
81             our ( $errstr, @ISA ) = ( "", "PPI::Node" );
82              
83 68     68   25535 use PPI::Document::Fragment ();
  68         158  
  68         1766  
84              
85             # Document cache
86             my $CACHE;
87              
88             # Convenience constants related to constants
89 68     68   294 use constant LOCATION_LINE => 0;
  68         101  
  68         4104  
90 68     68   280 use constant LOCATION_CHARACTER => 1;
  68         124  
  68         2504  
91 68     68   245 use constant LOCATION_COLUMN => 2;
  68         122  
  68         2023  
92 68     68   255 use constant LOCATION_LOGICAL_LINE => 3;
  68         114  
  68         2177  
93 68     68   252 use constant LOCATION_LOGICAL_FILE => 4;
  68         125  
  68         199543  
94              
95              
96              
97              
98              
99             #####################################################################
100             # Constructor and Static Methods
101              
102             =pod
103              
104             =head2 new
105              
106             # Simple construction
107             $doc = PPI::Document->new( $filename );
108             $doc = PPI::Document->new( \$source );
109            
110             # With the readonly attribute set
111             $doc = PPI::Document->new( $filename,
112             readonly => 1,
113             );
114              
115             The C constructor takes as argument a variety of different sources of
116             Perl code, and creates a single cohesive Perl C
117             for it.
118              
119             If passed a file name as a normal string, it will attempt to load the
120             document from the file.
121              
122             If passed a reference to a C, this is taken to be source code and
123             parsed directly to create the document.
124              
125             If passed zero arguments, a "blank" document will be created that contains
126             no content at all.
127              
128             In all cases, the document is considered to be "anonymous" and not tied back
129             to where it was created from. Specifically, if you create a PPI::Document from
130             a filename, the document will B remember where it was created from.
131              
132             Returns a C object, or C if parsing fails.
133             L objects can also be thrown if there are parsing problems.
134              
135             The constructor also takes attribute flags.
136              
137             =head3 readonly
138              
139             Setting C to true will allow various systems to provide additional
140             optimisations and caching. Note that because C is an optimisation
141             flag, it is off by default and you will need to explicitly enable it.
142              
143             =head3 feature_mods
144              
145             Setting feature_mods with a hashref allows defining perl parsing features to be
146             enabled for the whole document. (e.g. when the code is assumed to be run as a
147             oneliner)
148              
149             =head3 custom_feature_includes
150              
151             custom_feature_includes =>
152             { $my_custom_pragma_name => { $feature_name => $feature_provider } }
153            
154             # e.g.
155             custom_feature_includes =>
156             { MyStrict => { try => "Syntax::Keyword::Try" } }
157              
158             Setting custom_feature_includes with a hashref allows defining include names
159             which act like pragmas that enable parsing features within their scope. This
160             is mostly useful when your work project has its own boilerplate module. The
161             provider is either perl, or the name of a cpan module that implements the
162             feature.
163              
164             It can also be provided as JSON or YAML in the environment variable
165             PPI_CUSTOM_FEATURE_INCLUDES, like so:
166              
167             PPI_CUSTOM_FEATURE_INCLUDES='MyStrict: {signatures: perl}' \
168             perlcritic lib/OurModule.pm
169              
170             PPI_CUSTOM_FEATURE_INCLUDES='{"MyStrict":{"signatures":"perl"}}' \
171             perlcritic lib/OurModule.pm
172              
173             =head3 custom_feature_include_cb
174              
175             custom_feature_include_cb => sub {
176             my ($statement) = @_;
177             return $statement->module eq "MyStrict" ? { signatures => "perl" } : ();
178             },
179              
180             Setting custom_feature_include_cb with a code reference causes all inspections
181             on includes to call that sub before doing any other inspections. The sub can
182             decide to either return a hashref of features to be enabled or disabled, which
183             will be used for the scope the include was called in, or undef to continue with
184             the default inspections. The argument to the sub will be the
185             L object.
186              
187             This can be useful when your work project has a complex boilerplate module.
188              
189             =cut
190              
191             sub new {
192 33690     33690 1 33973832 local $_; # An extra one, just in case
193 33690 50       70464 my $class = ref $_[0] ? ref shift : shift;
194              
195 33690 100       68613 unless ( @_ ) {
196 16845         55178 my $self = $class->SUPER::new;
197 16845         41230 $self->{readonly} = ! 1;
198 16845         24705 $self->{tab_width} = 1;
199 16845         36832 return $self;
200             }
201              
202             # Check constructor attributes
203 16845         24142 my $source = shift;
204 16845         31976 my %attr = @_;
205              
206             # Check the data source
207 16845 50       78466 if ( ! defined $source ) {
    100          
    100          
    50          
208 0         0 $class->_error("An undefined value was passed to PPI::Document::new");
209              
210             } elsif ( ! ref $source ) {
211             # Catch people using the old API
212 520 50       3607 if ( $source =~ /(?:\012|\015)/ ) {
213 0         0 Carp::croak("API CHANGE: Source code should only be passed to PPI::Document->new as a SCALAR reference");
214             }
215              
216             # Save the filename
217 520   66     3366 $attr{filename} ||= $source;
218              
219             # When loading from a filename, use the caching layer if it exists.
220 520 100       1361 if ( $CACHE ) {
221 3         8 my $file_contents = PPI::Util::_slurp( $source );
222              
223             # Errors returned as plain string
224 3 50       8 return $class->_error($file_contents) if !ref $file_contents;
225              
226             # Retrieve the document from the cache
227 3         14 my $document = $CACHE->get_document($file_contents);
228 3 100       14 return $class->_setattr( $document, %attr ) if $document;
229              
230 1         12 $document = PPI::Lexer->lex_source( $$file_contents, %attr );
231 1 50       5 if ( $document ) {
232             # Save in the cache
233 1         4 $CACHE->store_document( $document );
234 1         53 return $document;
235             }
236             } else {
237 517         4563 my $document = PPI::Lexer->lex_file( $source, %attr );
238 517 100       3873 return $document if $document;
239             }
240              
241             } elsif ( _SCALAR0($source) ) {
242 16322         67038 my $document = PPI::Lexer->lex_source( $$source, %attr );
243 16322 100       79233 return $document if $document;
244              
245             } elsif ( _ARRAY0($source) ) {
246 3         13 my $document = PPI::Lexer->lex_file( $source, %attr );
247 3 50       16 return $document if $document;
248              
249             } else {
250 0         0 $class->_error("Unknown object or reference was passed to PPI::Document::new");
251             }
252              
253             # Pull and store the error from the lexer
254 3         4 my $errstr;
255 3 50       11 if ( PPI::Lexer->errstr ) {
    0          
    0          
256 3         6 $errstr = PPI::Lexer->errstr;
257             } elsif ( _INSTANCE($@, 'PPI::Exception') ) {
258 0         0 $errstr = $@->message;
259             } elsif ( $@ ) {
260 0         0 $errstr = $@;
261 0         0 $errstr =~ s/\sat line\s.+$//;
262             } else {
263 0         0 $errstr = "Unknown error parsing Perl document";
264             }
265 3         10 PPI::Lexer->_clear;
266 3         11 $class->_error( $errstr );
267             }
268              
269             sub load {
270 0     0 0 0 Carp::croak("API CHANGE: File names should now be passed to PPI::Document->new to load a file");
271             }
272              
273             sub _setattr {
274 16846     16846   28844 my ( $class, $document, %attr ) = @_;
275 16846         32253 $document->{readonly} = !!$attr{readonly};
276 16846         27048 $document->{filename} = $attr{filename};
277 16846         25203 $document->{feature_mods} = $attr{feature_mods};
278 16846         33423 $document->{custom_feature_includes} = $attr{custom_feature_includes};
279 16846         30744 $document->{custom_feature_include_cb} = $attr{custom_feature_include_cb};
280 16846 100       40064 if ( $ENV{PPI_CUSTOM_FEATURE_INCLUDES} ) {
281 1         5 my $includes = YAML::PP::Load $ENV{PPI_CUSTOM_FEATURE_INCLUDES};
282 1 50       5647 die "\$ENV{PPI_CUSTOM_FEATURE_INCLUDES} "
283             . "does not contain valid perl:\n"
284             . "val: '$ENV{PPI_CUSTOM_FEATURE_INCLUDES}'\nerr: $@"
285             if $@;
286             $document->{custom_feature_includes} =
287 1 50       2 { %{ $document->{custom_feature_includes} || {} }, %{$includes} };
  1         6  
  1         3  
288             }
289 16846         31706 return $document;
290             }
291              
292             =pod
293              
294             =head2 set_cache $cache
295              
296             As of L 1.100, C supports parser caching.
297              
298             The default cache class L provides a L-based
299             caching or the parsed document based on the MD5 hash of the document as
300             a string.
301              
302             The static C method is used to set the cache object for
303             C to use when loading documents. It takes as argument
304             a L object (or something that C the same).
305              
306             If passed C, this method will stop using the current cache, if any.
307              
308             For more information on caching, see L.
309              
310             Returns true on success, or C if not passed a valid param.
311              
312             =cut
313              
314             sub set_cache {
315 3 50   3 1 13 my $class = ref $_[0] ? ref shift : shift;
316              
317 3 100       4 if ( defined $_[0] ) {
318             # Enable the cache
319 2 50       15 my $object = _INSTANCE(shift, 'PPI::Cache') or return undef;
320 2         4 $CACHE = $object;
321             } else {
322             # Disable the cache
323 1         3 $CACHE = undef;
324             }
325              
326 3         8 1;
327             }
328              
329             =pod
330              
331             =head2 get_cache
332              
333             If a document cache is currently set, the C method will
334             return it.
335              
336             Returns a L object, or C if there is no cache
337             currently set for C.
338              
339             =cut
340              
341             sub get_cache {
342 7     7 1 1191 $CACHE;
343             }
344              
345              
346              
347              
348              
349             #####################################################################
350             # PPI::Document Instance Methods
351              
352             =pod
353              
354             =head2 filename
355              
356             The C accessor returns the name of the file in which the document
357             is stored.
358              
359             =cut
360              
361             sub filename {
362 270     270 1 1294 $_[0]->{filename};
363             }
364              
365             =pod
366              
367             =head2 readonly
368              
369             The C attribute indicates if the document is intended to be
370             read-only, and will never be modified. This is an advisory flag, that
371             writers of L-related systems may or may not use to enable
372             optimisations and caches for your document.
373              
374             Returns true if the document is read-only or false if not.
375              
376             =cut
377              
378             sub readonly {
379 4     4 1 4164 $_[0]->{readonly};
380             }
381              
382             =pod
383              
384             =head2 tab_width [ $width ]
385              
386             In order to handle support for C correctly, C
387             need to understand the concept of tabs and tab width. The C
388             method is used to get and set the size of the tab width.
389              
390             At the present time, PPI only supports "naive" (width 1) tabs, but we do
391             plan on supporting arbitrary, default and auto-sensing tab widths later.
392              
393             Returns the tab width as an integer, or Cs if you attempt to set the
394             tab width.
395              
396             =cut
397              
398             sub tab_width {
399 58178     58178 1 55949 my $self = shift;
400 58178 100       84826 return $self->{tab_width} unless @_;
401 2         8 $self->{tab_width} = shift;
402             }
403              
404             =head2 feature_mods { feature_name => $provider }
405              
406             =cut
407              
408             sub feature_mods {
409 16858     16858 1 18769 my $self = shift;
410 16858 50       52738 return $self->{feature_mods} unless @_;
411 0         0 $self->{feature_mods} = shift;
412             }
413              
414             =head2 custom_feature_includes { module_name => { feature_name => $provider } }
415              
416             =cut
417              
418             sub custom_feature_includes {
419 2267     2267 1 2914 my $self = shift;
420 2267 50       11450 return $self->{custom_feature_includes} unless @_;
421 0         0 $self->{custom_feature_includes} = shift;
422             }
423              
424             =head2 custom_feature_include_cb sub { ... }
425              
426             =cut
427              
428             sub custom_feature_include_cb {
429 3318     3318 1 4140 my $self = shift;
430 3318 50       22296 return $self->{custom_feature_include_cb} unless @_;
431 0         0 $self->{custom_feature_include_cb} = shift;
432             }
433              
434             =pod
435              
436             =head2 save
437              
438             $document->save( $file )
439            
440             The C method serializes the C object and saves the
441             resulting Perl document to a file. Returns C on failure to open
442             or write to the file.
443              
444             =cut
445              
446             sub save {
447 2     2 1 4 my $self = shift;
448 2         7 local *FILE;
449 2 50       270 open( FILE, '>', $_[0] ) or return undef;
450 2         8 binmode FILE;
451 2 50       8 print FILE $self->serialize or return undef;
452 2 50       184 close FILE or return undef;
453 2         23 return 1;
454             }
455              
456             =pod
457              
458             =head2 serialize
459              
460             Unlike the C method, which shows only the immediate content
461             within an element, Document objects also have to be able to be written
462             out to a file again.
463              
464             When doing this we need to take into account some additional factors.
465              
466             Primarily, we need to handle here-docs correctly, so that are written
467             to the file in the expected place.
468              
469             The C method generates the actual file content for a given
470             Document object. The resulting string can be written straight to a file.
471              
472             Returns the serialized document as a string.
473              
474             =cut
475              
476             sub serialize {
477 7230     7230 1 9666100 my $self = shift;
478 7230         27480 my @tokens = $self->tokens;
479              
480             # The here-doc content buffer
481 7230         12777 my $heredoc = '';
482              
483             # Start the main loop
484 7230         11090 my $output = '';
485 7230         20641 foreach my $i ( 0 .. $#tokens ) {
486 279934         265516 my $Token = $tokens[$i];
487              
488             # Handle normal tokens
489 279934 100       517506 unless ( $Token->isa('PPI::Token::HereDoc') ) {
490 279301         339855 my $content = $Token->content;
491              
492             # Handle the trivial cases
493 279301 100 100     380217 unless ( $heredoc ne '' and $content =~ /\n/ ) {
494 278855         247911 $output .= $content;
495 278855         288514 next;
496             }
497              
498             # We have pending here-doc content that needs to be
499             # inserted just after the first newline in the content.
500 446 100       752 if ( $content eq "\n" ) {
501             # Shortcut the most common case for speed
502 323         521 $output .= $content . $heredoc;
503             } else {
504             # Slower and more general version
505 123         591 $content =~ s/\n/\n$heredoc/;
506 123         219 $output .= $content;
507             }
508              
509 446         523 $heredoc = '';
510 446         695 next;
511             }
512              
513             # This token is a HereDoc.
514             # First, add the token content as normal, which in this
515             # case will definitely not contain a newline.
516 633         1560 $output .= $Token->content;
517              
518             # Pick up the indentation, which may be undef.
519 633   100     1785 my $indentation = $Token->indentation || '';
520              
521             # Now add all of the here-doc content to the heredoc buffer.
522 633         1317 foreach my $line ( $Token->heredoc ) {
523 1004 100       1877 $heredoc .= "\n" eq $line ? $line : $indentation . $line;
524             }
525              
526 633 100       1348 if ( $Token->{_damaged} ) {
527             # Special Case:
528             # There are a couple of warning/bug situations
529             # that can occur when a HereDoc content was read in
530             # from the end of a file that we silently allow.
531             #
532             # When writing back out to the file we have to
533             # auto-repair these problems if we aren't going back
534             # on to the end of the file.
535              
536             # When calculating $last_line, ignore the final token if
537             # and only if it has a single newline at the end.
538 459         567 my $last_index = $#tokens;
539 459 100       2374 if ( $tokens[$last_index]->{content} =~ /^[^\n]*\n$/ ) {
540 284         349 $last_index--;
541             }
542              
543             # This is a two part test.
544             # First, are we on the last line of the
545             # content part of the file
546             my $last_line = List::Util::none {
547 1232 50   1232   4293 $tokens[$_] and $tokens[$_]->{content} =~ /\n/
548 459         2771 } (($i + 1) .. $last_index);
549 459 50       1753 if ( ! defined $last_line ) {
550             # Handles the null list case
551 0         0 $last_line = 1;
552             }
553              
554             # Secondly, are their any more here-docs after us,
555             # (with content or a terminator)
556             my $any_after = List::Util::any {
557             $tokens[$_]->isa('PPI::Token::HereDoc')
558             and (
559 2         12 scalar(@{$tokens[$_]->{_heredoc}})
560             or
561             defined $tokens[$_]->{_terminator_line}
562             )
563 459 100 33 1516   1802 } (($i + 1) .. $#tokens);
  1516         4059  
564 459 50       1409 if ( ! defined $any_after ) {
565             # Handles the null list case
566 0         0 $any_after = '';
567             }
568              
569             # We don't need to repair the last here-doc on the
570             # last line. But we do need to repair anything else.
571 459 50 33     1553 unless ( $last_line and ! $any_after ) {
572             # Add a terminating string if it didn't have one
573 0 0       0 unless ( defined $Token->{_terminator_line} ) {
574 0         0 $Token->{_terminator_line} = $Token->{_terminator};
575             }
576              
577             # Add a trailing newline to the terminating
578             # string if it didn't have one.
579 0 0       0 unless ( $Token->{_terminator_line} =~ /\n$/ ) {
580 0         0 $Token->{_terminator_line} .= "\n";
581             }
582             }
583             }
584              
585             # Now add the termination line to the heredoc buffer
586 633 100       1381 if ( defined $Token->{_terminator_line} ) {
587 181         325 $heredoc .= $indentation . $Token->{_terminator_line};
588             }
589             }
590              
591             # End of tokens
592              
593 7230 50       13444 if ( $heredoc ne '' ) {
594             # If the file doesn't end in a newline, we need to add one
595             # so that the here-doc content starts on the next line.
596 0 0       0 unless ( $output =~ /\n$/ ) {
597 0         0 $output .= "\n";
598             }
599              
600             # Now we add the remaining here-doc content
601             # to the end of the file.
602 0         0 $output .= $heredoc;
603             }
604              
605 7230         36893 $output;
606             }
607              
608             =pod
609              
610             =head2 hex_id
611              
612             The C method generates an unique identifier for the Perl document.
613              
614             This identifier is basically just the serialized document, with
615             Unix-specific newlines, passed through MD5 to produce a hexadecimal string.
616              
617             This identifier is used by a variety of systems (such as L
618             and L) as a unique key against which to store or cache
619             information about a document (or indeed, to cache the document itself).
620              
621             Returns a 32 character hexadecimal string.
622              
623             =cut
624              
625             sub hex_id {
626 168     168 1 268739 PPI::Util::md5hex($_[0]->serialize);
627             }
628              
629             =pod
630              
631             =head2 index_locations
632              
633             Within a document, all L objects can be considered to have a
634             "location", a line/column position within the document when considered as a
635             file. This position is primarily useful for debugging type activities.
636              
637             The method for finding the position of a single Element is a bit laborious,
638             and very slow if you need to do it a lot. So the C method
639             will index and save the locations of every Element within the Document in
640             advance, making future calls to virtually free.
641              
642             Please note that this index should always be cleared using C
643             once you are finished with the locations. If content is added to or removed
644             from the file, these indexed locations will be B.
645              
646             =cut
647              
648             sub index_locations {
649 267     267 1 111783 my $self = shift;
650 267         898 my @tokens = $self->tokens;
651              
652             # Whenever we hit a heredoc we will need to increment by
653             # the number of lines in its content section when we
654             # encounter the next token with a newline in it.
655 267         1016 my $heredoc = 0;
656              
657             # Find the first Token without a location
658 267         675 my ($first, $location) = ();
659 267         955 foreach ( 0 .. $#tokens ) {
660 287         544 my $Token = $tokens[$_];
661 287 100       806 if ($Token->{_location}) {
662 20         19 $location = $Token->{_location};
663 20         18 next;
664             }
665              
666             # Found the first Token without a location
667             # Calculate the new location if needed.
668             $location =
669 267 100       972 $_
670             ? $self->_add_location( $location, $tokens[ $_ - 1 ], \$heredoc )
671             : $self->_default_location;
672 267         7115 $first = $_;
673 267         573 last;
674             }
675              
676             # Calculate locations for the rest
677 267 50       691 if ( defined $first ) {
678 267         660 foreach ( $first .. $#tokens ) {
679 69059         77741 my $Token = $tokens[$_];
680 69059         85319 $Token->{_location} = $location;
681 69059         81487 $location = $self->_add_location( $location, $Token, \$heredoc );
682              
683             # Add any here-doc lines to the counter
684 69059 100       150643 if ( $Token->isa('PPI::Token::HereDoc') ) {
685 34         86 $heredoc += $Token->heredoc + 1;
686             }
687             }
688             }
689              
690 267         5815 1;
691             }
692              
693             sub _default_location {
694 269     269   488 my ($self) = @_;
695 269 50       1578 my $logical_file = $self->can('filename') ? $self->filename : undef;
696 269         920 return [ 1, 1, 1, 1, $logical_file ];
697             }
698              
699             sub location {
700 3     3 1 518 my ($self) = @_;
701 3   33     12 return $self->SUPER::location || $self->_default_location;
702             }
703              
704             sub _add_location {
705 69060     69060   80193 my ($self, $start, $Token, $heredoc) = @_;
706 69060         85864 my $content = $Token->{content};
707              
708             # Does the content contain any newlines
709 69060         86444 my $newlines =()= $content =~ /\n/g;
710 69060         83861 my ($logical_line, $logical_file) =
711             $self->_logical_line_and_file($start, $Token, $newlines);
712              
713 69060 100       87456 unless ( $newlines ) {
714             # Handle the simple case
715             return [
716 57866         77297 $start->[LOCATION_LINE],
717             $start->[LOCATION_CHARACTER] + length($content),
718             $start->[LOCATION_COLUMN]
719             + $self->_visual_length(
720             $content,
721             $start->[LOCATION_COLUMN]
722             ),
723             $logical_line,
724             $logical_file,
725             ];
726             }
727              
728             # This is the more complex case where we hit or
729             # span a newline boundary.
730 11194         10749 my $physical_line = $start->[LOCATION_LINE] + $newlines;
731 11194         18277 my $location = [ $physical_line, 1, 1, $logical_line, $logical_file ];
732 11194 100 66     24180 if ( $heredoc and $$heredoc ) {
733 31         43 $location->[LOCATION_LINE] += $$heredoc;
734 31         45 $location->[LOCATION_LOGICAL_LINE] += $$heredoc;
735 31         37 $$heredoc = 0;
736             }
737              
738             # Does the token have additional characters
739             # after their last newline.
740 11194 100       24086 if ( $content =~ /\n([^\n]+?)\z/ ) {
741 309         698 $location->[LOCATION_CHARACTER] += length($1);
742 309         529 $location->[LOCATION_COLUMN] +=
743             $self->_visual_length(
744             $1, $location->[LOCATION_COLUMN],
745             );
746             }
747              
748 11194         14544 $location;
749             }
750              
751             sub _logical_line_and_file {
752 69060     69060   76604 my ($self, $start, $Token, $newlines) = @_;
753              
754             # Regex taken from perlsyn, with the correction that there's no space
755             # required between the line number and the file name.
756 69060 100       86428 if ($start->[LOCATION_CHARACTER] == 1) {
757 10911 100       31273 if ( $Token->isa('PPI::Token::Comment') ) {
    100          
758 1764 100       3405 if (
759             $Token->content =~ m<
760             \A
761             \# \s*
762             line \s+
763             (\d+) \s*
764             (?: (\"?) ([^\"]* [^\s\"]) \2 )?
765             \s*
766             \z
767             >xms
768             ) {
769 13   66     99 return $1, ($3 || $start->[LOCATION_LOGICAL_FILE]);
770             }
771             }
772             elsif ( $Token->isa('PPI::Token::Pod') ) {
773 351         904 my $content = $Token->content;
774 351         421 my $line;
775 351         421 my $file = $start->[LOCATION_LOGICAL_FILE];
776 351         361 my $end_of_directive;
777 351         1370 while (
778             $content =~ m<
779             ^
780             \# \s*?
781             line \s+?
782             (\d+) (?: (?! \n) \s)*
783             (?: (\"?) ([^\"]*? [^\s\"]) \2 )??
784             \s*?
785             $
786             >xmsg
787             ) {
788 6   66     28 ($line, $file) = ($1, ( $3 || $file ) );
789 6         11 $end_of_directive = pos $content;
790             }
791              
792 351 100       733 if (defined $line) {
793 6         9 pos $content = $end_of_directive;
794 6         20 my $post_directive_newlines =()= $content =~ m< \G [^\n]* \n >xmsg;
795 6         15 return $line + $post_directive_newlines - 1, $file;
796             }
797             }
798             }
799              
800             return
801 69041         99670 $start->[LOCATION_LOGICAL_LINE] + $newlines,
802             $start->[LOCATION_LOGICAL_FILE];
803             }
804              
805             sub _visual_length {
806 58175     58175   64546 my ($self, $content, $pos) = @_;
807              
808 58175         64413 my $tab_width = $self->tab_width;
809 58175         57921 my ($length, $vis_inc);
810              
811 58175 100       142677 return length $content if $content !~ /\t/;
812              
813             # Split the content in tab and non-tab parts and calculate the
814             # "visual increase" of each part.
815 4397         12493 for my $part ( split(/(\t)/, $content) ) {
816 16092 100       17307 if ($part eq "\t") {
817 8007         7932 $vis_inc = $tab_width - ($pos-1) % $tab_width;
818             }
819             else {
820 8085         6945 $vis_inc = length $part;
821             }
822 16092         14162 $length += $vis_inc;
823 16092         15026 $pos += $vis_inc;
824             }
825              
826 4397         10343 $length;
827             }
828              
829             =pod
830              
831             =head2 flush_locations
832              
833             When no longer needed, the C method clears all location data
834             from the tokens.
835              
836             =cut
837              
838             sub flush_locations {
839 1     1 1 422 shift->_flush_locations(@_);
840             }
841              
842             =pod
843              
844             =head2 normalized
845              
846             The C method is used to generate a "Layer 1"
847             L object for the current Document.
848              
849             A "normalized" Perl Document is an arbitrary structure that removes any
850             irrelevant parts of the document and refactors out variations in style,
851             to attempt to approach something that is closer to the "true meaning"
852             of the Document.
853              
854             See L for more information on document normalization and
855             the tasks for which it is useful.
856              
857             Returns a L object, or C on error.
858              
859             =cut
860              
861             sub normalized {
862             # The normalization process will utterly destroy and mangle
863             # anything passed to it, so we are going to only give it a
864             # clone of ourselves.
865 4     4 1 2227 PPI::Normal->process( $_[0]->clone );
866             }
867              
868             =pod
869              
870             =head1 complete
871              
872             The C method is used to determine if a document is cleanly
873             structured, all braces are closed, the final statement is
874             fully terminated and all heredocs are fully entered.
875              
876             Returns true if the document is complete or false if not.
877              
878             =cut
879              
880             sub complete {
881 2     2 0 2066 my $self = shift;
882              
883             # Every structure has to be complete
884             $self->find_any( sub {
885 15 50   15   52 $_[1]->isa('PPI::Structure')
886             and
887             ! $_[1]->complete
888             } )
889 2 50       14 and return '';
890              
891             # Strip anything that isn't a statement off the end
892 2         11 my @child = $self->children;
893 2   66     10 while ( @child and not $child[-1]->isa('PPI::Statement') ) {
894 2         7 pop @child;
895             }
896              
897             # We must have at least one statement
898 2 50       3 return '' unless @child;
899              
900             # Check the completeness of the last statement
901 2         9 return $child[-1]->_complete;
902             }
903              
904              
905              
906              
907              
908             #####################################################################
909             # PPI::Node Methods
910              
911             # We are a scope boundary
912             ### XS -> PPI/XS.xs:_PPI_Document__scope 0.903+
913             sub scope() { 1 }
914              
915              
916              
917              
918              
919             #####################################################################
920             # PPI::Element Methods
921              
922             sub insert_before {
923 0     0 1 0 return undef;
924             # die "Cannot insert_before a PPI::Document";
925             }
926              
927             sub insert_after {
928 0     0 1 0 return undef;
929             # die "Cannot insert_after a PPI::Document";
930             }
931              
932             sub replace {
933 0     0 1 0 return undef;
934             # die "Cannot replace a PPI::Document";
935             }
936              
937              
938              
939              
940              
941             #####################################################################
942             # Error Handling
943              
944             # Set the error message
945             sub _error {
946 3     3   6 $errstr = $_[1];
947 3         12 undef;
948             }
949              
950             # Clear the error message.
951             # Returns the object as a convenience.
952             sub _clear {
953 16785     16785   49482 $errstr = '';
954 16785         23648 $_[0];
955             }
956              
957             =pod
958              
959             =head2 errstr
960              
961             For error that occur when loading and saving documents, you can use
962             C, as either a static or object method, to access the error message.
963              
964             If a Document loads or saves without error, C will return false.
965              
966             =cut
967              
968             sub errstr {
969 16786     16786 1 79694 $errstr;
970             }
971              
972              
973              
974              
975              
976             #####################################################################
977             # Native Storable Support
978              
979             sub STORABLE_freeze {
980 4     4 0 2367 my $self = shift;
981 4         9 my $class = ref $self;
982 4         31 my %hash = %$self;
983 4         713 return ($class, \%hash);
984             }
985              
986             sub STORABLE_thaw {
987 6     6 0 615 my ($self, undef, $class, $hash) = @_;
988 6         14 bless $self, $class;
989 6         25 foreach ( keys %$hash ) {
990 42         86 $self->{$_} = delete $hash->{$_};
991             }
992 6         38 $self->__link_children;
993             }
994              
995             1;
996              
997             =pod
998              
999             =head1 TO DO
1000              
1001             - May need to overload some methods to forcefully prevent Document
1002             objects becoming children of another Node.
1003              
1004             =head1 SUPPORT
1005              
1006             See the L in the main module.
1007              
1008             =head1 AUTHOR
1009              
1010             Adam Kennedy Eadamk@cpan.orgE
1011              
1012             =head1 SEE ALSO
1013              
1014             L, L
1015              
1016             =head1 COPYRIGHT
1017              
1018             Copyright 2001 - 2011 Adam Kennedy.
1019              
1020             This program is free software; you can redistribute
1021             it and/or modify it under the same terms as Perl itself.
1022              
1023             The full text of the license can be found in the
1024             LICENSE file included with this module.
1025              
1026             =cut