File Coverage

blib/lib/Pod/Simple/PullParser.pm
Criterion Covered Total %
statement 209 232 90.0
branch 76 102 74.5
condition 42 67 62.6
subroutine 26 32 81.2
pod 13 17 76.4
total 366 450 81.3


line stmt bran cond sub pod time code
1             package Pod::Simple::PullParser;
2 10     10   168269 use strict;
  10         17  
  10         618  
3             our $VERSION = '3.48';
4 10     10   3970 use Pod::Simple ();
  10         23  
  10         411  
5 10     10   316 BEGIN {our @ISA = ('Pod::Simple')}
6              
7 10     10   46 use Carp ();
  10         14  
  10         156  
8              
9 10     10   4165 use Pod::Simple::PullParserStartToken;
  10         23  
  10         262  
10 10     10   3905 use Pod::Simple::PullParserEndToken;
  10         23  
  10         305  
11 10     10   3925 use Pod::Simple::PullParserTextToken;
  10         23  
  10         439  
12              
13 10 50   10   1740 BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
14              
15             __PACKAGE__->_accessorize(
16             'source_fh', # the filehandle we're reading from
17             'source_scalar_ref', # the scalarref we're reading from
18             'source_arrayref', # the arrayref we're reading from
19             );
20              
21             #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
22             #
23             # And here is how we implement a pull-parser on top of a push-parser...
24              
25             sub filter {
26 0     0 1 0 my($self, $source) = @_;
27 0 0       0 $self = $self->new unless ref $self;
28              
29 0 0       0 $source = *STDIN{IO} unless defined $source;
30 0         0 $self->set_source($source);
31 0         0 $self->output_fh(*STDOUT{IO});
32              
33 0         0 $self->run; # define run() in a subclass if you want to use filter()!
34 0         0 return $self;
35             }
36              
37             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
38              
39             sub parse_string_document {
40 50     50 1 69 my $this = shift;
41 50         156 $this->set_source(\ $_[0]);
42 50         115 $this->run;
43             }
44              
45             sub parse_file {
46 13     13 1 22 my($this, $filename) = @_;
47 13         41 $this->set_source($filename);
48 13         38 $this->run;
49             }
50              
51             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
52             # In case anyone tries to use them:
53              
54             sub run {
55 10     10   65 use Carp ();
  10         17  
  10         871  
56 0 0 0 0 0 0 if( __PACKAGE__ eq (ref($_[0]) || $_[0])) { # I'm not being subclassed!
57 0         0 Carp::croak "You can call run() only on subclasses of "
58             . __PACKAGE__;
59             } else {
60 0   0     0 Carp::croak join '',
61             "You can't call run() because ",
62             ref($_[0]) || $_[0], " didn't define a run() method";
63             }
64             }
65              
66             sub parse_lines {
67 10     10   42 use Carp ();
  10         14  
  10         338  
68 0     0 1 0 Carp::croak "Use set_source with ", __PACKAGE__,
69             " and subclasses, not parse_lines";
70             }
71              
72             sub parse_line {
73 10     10   67 use Carp ();
  10         14  
  10         27356  
74 0     0 0 0 Carp::croak "Use set_source with ", __PACKAGE__,
75             " and subclasses, not parse_line";
76             }
77              
78             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
79              
80             sub new {
81 90     90 1 277348 my $class = shift;
82 90         297 my $self = $class->SUPER::new(@_);
83 90 50       177 die "Couldn't construct for $class" unless $self;
84              
85 90   50     310 $self->{'token_buffer'} ||= [];
86 90   50     363 $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';
87 90   50     282 $self->{'text_token_class'} ||= 'Pod::Simple::PullParserTextToken';
88 90   50     251 $self->{'end_token_class'} ||= 'Pod::Simple::PullParserEndToken';
89              
90 90         87 DEBUG > 1 and print STDERR "New pullparser object: $self\n";
91              
92 90         172 return $self;
93             }
94              
95             # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
96              
97             sub get_token {
98 1406     1406 1 2347 my $self = shift;
99 1406         1273 DEBUG > 1 and print STDERR "\nget_token starting up on $self.\n";
100             DEBUG > 2 and print STDERR " Items in token-buffer (",
101             scalar( @{ $self->{'token_buffer'} } ) ,
102             ") :\n", map(
103             " " . $_->dump . "\n", @{ $self->{'token_buffer'} }
104             ),
105 1406         1196 @{ $self->{'token_buffer'} } ? '' : ' (no tokens)',
106             "\n"
107             ;
108              
109 1406         1279 until( @{ $self->{'token_buffer'} } ) {
  1999         3033  
110 593         614 DEBUG > 3 and print STDERR "I need to get something into my empty token buffer...\n";
111 593 100       1320 if($self->{'source_dead'}) {
    100          
    100          
    50          
112 80         81 DEBUG and print STDERR "$self 's source is dead.\n";
113 80         85 push @{ $self->{'token_buffer'} }, undef;
  80         131  
114             } elsif(exists $self->{'source_fh'}) {
115 23         27 my @lines;
116 23   33     101 my $fh = $self->{'source_fh'}
117             || Carp::croak('You have to call set_source before you can call get_token');
118              
119 23         25 DEBUG and print STDERR "$self 's source is filehandle $fh.\n";
120             # Read those many lines at a time
121 23         48 for(my $i = Pod::Simple::MANY_LINES; $i--;) {
122 298         252 DEBUG > 3 and print STDERR " Fetching a line from source filehandle $fh...\n";
123 298         493 local $/ = $Pod::Simple::NL;
124 298         892 push @lines, scalar(<$fh>); # readline
125 298         312 DEBUG > 3 and print STDERR " Line is: ",
126             defined($lines[-1]) ? $lines[-1] : "\n";
127 298 100       586 unless( defined $lines[-1] ) {
128 17         16 DEBUG and print STDERR "That's it for that source fh! Killing.\n";
129 17         30 delete $self->{'source_fh'}; # so it can be GC'd
130 17         39 last;
131             }
132             # but pass thru the undef, which will set source_dead to true
133              
134             # TODO: look to see if $lines[-1] is =encoding, and if so,
135             # do horribly magic things
136              
137             }
138              
139 23         21 if(DEBUG > 8) {
140             print STDERR "* I've gotten ", scalar(@lines), " lines:\n";
141             foreach my $l (@lines) {
142             if(defined $l) {
143             print STDERR " line {$l}\n";
144             } else {
145             print STDERR " line undef\n";
146             }
147             }
148             print STDERR "* end of ", scalar(@lines), " lines\n";
149             }
150              
151 23         134 $self->SUPER::parse_lines(@lines);
152              
153             } elsif(exists $self->{'source_arrayref'}) {
154             DEBUG and print STDERR "$self 's source is arrayref $self->{'source_arrayref'}, with ",
155 2         2 scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";
156              
157 2         2 DEBUG > 3 and print STDERR " Fetching ", Pod::Simple::MANY_LINES, " lines.\n";
158             $self->SUPER::parse_lines(
159 2         3 splice @{ $self->{'source_arrayref'} },
  2         10  
160             0,
161             Pod::Simple::MANY_LINES
162             );
163 2 50       4 unless( @{ $self->{'source_arrayref'} } ) {
  2         6  
164 2         3 DEBUG and print STDERR "That's it for that source arrayref! Killing.\n";
165 2         5 $self->SUPER::parse_lines(undef);
166 2         4 delete $self->{'source_arrayref'}; # so it can be GC'd
167             }
168             # to make sure that an undef is always sent to signal end-of-stream
169              
170             } elsif(exists $self->{'source_scalar_ref'}) {
171              
172             DEBUG and print STDERR "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",
173             length(${ $self->{'source_scalar_ref'} }) -
174 488         457 (pos(${ $self->{'source_scalar_ref'} }) || 0),
175             " characters left to parse.\n";
176              
177 488         457 DEBUG > 3 and print STDERR " Fetching a line from source-string...\n";
178 488 100       470 if( ${ $self->{'source_scalar_ref'} } =~
  488         1791  
179             m/([^\n\r]*)((?:\r?\n)?)/g
180             ) {
181             #print(">> $1\n"),
182             $self->SUPER::parse_lines($1)
183             if length($1) or length($2)
184 57         85 or pos( ${ $self->{'source_scalar_ref'} })
185 431 100 100     1673 != length( ${ $self->{'source_scalar_ref'} });
  57   66     137  
186             # I.e., unless it's a zero-length "empty line" at the very
187             # end of "foo\nbar\n" (i.e., between the \n and the EOS).
188             } else { # that's the end. Byebye
189 57         157 $self->SUPER::parse_lines(undef);
190 57         114 delete $self->{'source_scalar_ref'};
191 57         72 DEBUG and print STDERR "That's it for that source scalarref! Killing.\n";
192             }
193              
194              
195             } else {
196 0         0 die "What source??";
197             }
198             }
199             DEBUG and print STDERR "get_token about to return ",
200             Pod::Simple::pretty( @{$self->{'token_buffer'}}
201 1406         1344 ? $self->{'token_buffer'}[-1] : undef
202             ), "\n";
203 1406         1310 return shift @{$self->{'token_buffer'}}; # that's an undef if empty
  1406         3112  
204             }
205              
206             sub unget_token {
207 96     96 1 113 my $self = shift;
208 96         87 DEBUG and print STDERR "Ungetting ", scalar(@_), " tokens: ",
209             @_ ? "@_\n" : "().\n";
210 96         129 foreach my $t (@_) {
211 656 50       795 Carp::croak "Can't unget that, because it's not a token -- it's undef!"
212             unless defined $t;
213 656 50       796 Carp::croak "Can't unget $t, because it's not a token -- it's a string!"
214             unless ref $t;
215 656 50       1087 Carp::croak "Can't unget $t, because it's not a token object!"
216             unless UNIVERSAL::can($t, 'type');
217             }
218              
219 96         103 unshift @{$self->{'token_buffer'}}, @_;
  96         185  
220             DEBUG > 1 and print STDERR "Token buffer now has ",
221 96         101 scalar(@{$self->{'token_buffer'}}), " items in it.\n";
222 96         159 return;
223             }
224              
225             #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
226              
227             # $self->{'source_filename'} = $source;
228              
229             sub set_source {
230 91     91 1 1002562 my $self = shift @_;
231 91 50       164 return $self->{'source_fh'} unless @_;
232             Carp::croak("Cannot assign new source to pull parser; create a new instance, instead")
233 91 100 66     578 if $self->{'source_fh'} || $self->{'source_scalar_ref'} || $self->{'source_arrayref'};
      66        
234 90         171 my $handle;
235 90 50       422 if(!defined $_[0]) {
    100          
    100          
    100          
    100          
    50          
236 0         0 Carp::croak("Can't use empty-string as a source for set_source");
237             } elsif(ref(\( $_[0] )) eq 'GLOB') {
238 1         7 $self->{'source_filename'} = '' . ($handle = $_[0]);
239 1         2 DEBUG and print STDERR "$self 's source is glob $_[0]\n";
240             # and fall thru
241             } elsif(ref( $_[0] ) eq 'SCALAR') {
242 71         116 $self->{'source_scalar_ref'} = $_[0];
243 71         66 DEBUG and print STDERR "$self 's source is scalar ref $_[0]\n";
244 71         116 return;
245             } elsif(ref( $_[0] ) eq 'ARRAY') {
246 2         3 $self->{'source_arrayref'} = $_[0];
247 2         4 DEBUG and print STDERR "$self 's source is array ref $_[0]\n";
248 2         3 return;
249             } elsif(ref $_[0]) {
250 2         9 $self->{'source_filename'} = '' . ($handle = $_[0]);
251 2         2 DEBUG and print STDERR "$self 's source is fh-obj $_[0]\n";
252             } elsif(!length $_[0]) {
253 0         0 Carp::croak("Can't use empty-string as a source for set_source");
254             } else { # It's a filename!
255 14         15 DEBUG and print STDERR "$self 's source is filename $_[0]\n";
256             {
257 14         16 local *PODSOURCE;
  14         36  
258 14 50       550 open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";
259 14         50 $handle = *PODSOURCE{IO};
260             }
261 14         38 $self->{'source_filename'} = $_[0];
262 14         17 DEBUG and print STDERR " Its name is $_[0].\n";
263              
264             # TODO: file-discipline things here!
265             }
266              
267 17         34 $self->{'source_fh'} = $handle;
268 17         33 DEBUG and print STDERR " Its handle is $handle\n";
269 17         32 return 1;
270             }
271              
272             # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
273              
274 0     0 0 0 sub get_title_short { shift->get_short_title(@_) } # alias
275              
276             sub get_short_title {
277 21     21 1 80 my $title = shift->get_title(@_);
278 21 100       99 $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;
279             # turn "Foo::Bar -- bars for your foo" into "Foo::Bar"
280 21         46 return $title;
281             }
282              
283             sub get_title { shift->_get_titled_section(
284 39     39 1 736 'NAME', max_token => 50, desperate => 1, @_)
285             }
286             sub get_version { shift->_get_titled_section(
287 3     3 1 12 'VERSION',
288             max_token => 400,
289             accept_verbatim => 1,
290             max_content_length => 3_000,
291             @_,
292             );
293             }
294             sub get_description { shift->_get_titled_section(
295 7     7 1 657 'DESCRIPTION',
296             max_token => 400,
297             max_content_length => 3_000,
298             @_,
299             ) }
300              
301 0     0 0 0 sub get_authors { shift->get_author(@_) } # a harmless alias
302              
303             sub get_author {
304 2     2 1 560 my $this = shift;
305             # Max_token is so high because these are
306             # typically at the end of the document:
307 2 100       6 $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||
308             $this->_get_titled_section('AUTHORS', max_token => 10_000, @_);
309             }
310              
311             #--------------------------------------------------------------------------
312              
313             sub _get_titled_section {
314             # Based on a get_title originally contributed by Graham Barr
315 52     52   149 my($self, $titlename, %options) = (@_);
316              
317 52         97 my $max_token = delete $options{'max_token'};
318 52         69 my $desperate_for_title = delete $options{'desperate'};
319 52         93 my $accept_verbatim = delete $options{'accept_verbatim'};
320 52         84 my $max_content_length = delete $options{'max_content_length'};
321 52         57 my $nocase = delete $options{'nocase'};
322 52 100       100 $max_content_length = 120 unless defined $max_content_length;
323              
324 52 0       100 Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")
    50          
325             . join " ", map "[$_]", sort keys %options
326             )
327             if keys %options;
328              
329 52         52 my %content_containers;
330 52         73 $content_containers{'Para'} = 1;
331 52 100       97 if($accept_verbatim) {
332 3         4 $content_containers{'Verbatim'} = 1;
333 3         5 $content_containers{'VerbatimFormatted'} = 1;
334             }
335              
336 52         63 my $token_count = 0;
337 52         58 my $title;
338             my @to_unget;
339 52         54 my $state = 0;
340 52         63 my $depth = 0;
341              
342 52 50 33     368 Carp::croak "What kind of titlename is \"$titlename\"?!" unless
343             defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity
344 52         109 my $titlename_re = quotemeta($titlename);
345              
346 52         96 my $head1_text_content;
347             my $para_text_content;
348 52         0 my $skipX;
349              
350 52   50     171 while(
      66        
351             ++$token_count <= ($max_token || 1_000_000)
352             and defined(my $token = $self->get_token)
353             ) {
354 563         605 push @to_unget, $token;
355              
356 563 100       799 if ($state == 0) { # seeking =head1
    100          
    100          
    50          
357 276 100 100     466 if( $token->is_start and $token->tagname eq 'head1' ) {
358 62         59 DEBUG and print STDERR " Found head1. Seeking content...\n";
359 62         61 ++$state;
360 62         151 $head1_text_content = '';
361             }
362             }
363              
364             elsif($state == 1) { # accumulating text until end of head1
365 131 100 100     224 if( $token->is_text ) {
    100          
    100          
366 65 100       89 unless ($skipX) {
367 64         72 DEBUG and print STDERR " Adding \"", $token->text, "\" to head1-content.\n";
368 64         122 $head1_text_content .= $token->text;
369             }
370             } elsif( $token->is_tagname('X') ) {
371             # We're going to want to ignore X<> stuff.
372 2         5 $skipX = $token->is_start;
373 2         5 DEBUG and print STDERR +($skipX ? 'Start' : 'End'), 'ing ignoring of X<> tag';
374             } elsif( $token->is_end and $token->tagname eq 'head1' ) {
375 62         64 DEBUG and print STDERR " Found end of head1. Considering content...\n";
376 62 100       106 $head1_text_content = uc $head1_text_content if $nocase;
377 62 50 100     576 if($head1_text_content eq $titlename
    100 66        
    100 100        
      100        
      66        
378             or $head1_text_content =~ m/\($titlename_re\)/s
379             # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n
380             ) {
381 36         58 DEBUG and print STDERR " Yup, it was $titlename. Seeking next para-content...\n";
382 36         89 ++$state;
383             } elsif(
384             $desperate_for_title
385             # if we're so desperate we'll take the first
386             # =head1's content as a title
387             and $head1_text_content =~ m/\S/
388             and $head1_text_content !~ m/^[ A-Z]+$/s
389             and $head1_text_content !~
390             m/\((?:
391             NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS
392             | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?
393             | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT
394             )\)/sx
395             # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)
396             and ($max_content_length
397             ? (length($head1_text_content) <= $max_content_length) # sanity
398             : 1)
399             ) {
400             # Looks good; trim it
401 6         20 ($title = $head1_text_content) =~ s/\s+$//;
402 6         10 DEBUG and print STDERR " It looks titular: \"$title\".\n\n Using that.\n";
403 6         11 last;
404             } else {
405 20         28 --$state;
406 20         47 DEBUG and print STDERR " Didn't look titular ($head1_text_content).\n",
407             "\n Dropping back to seeking-head1-content mode...\n";
408             }
409             }
410             }
411              
412             elsif($state == 2) {
413             # seeking start of para (which must immediately follow)
414 36 50 33     68 if($token->is_start and $content_containers{ $token->tagname }) {
415 36         34 DEBUG and print STDERR " Found start of Para. Accumulating content...\n";
416 36         52 $para_text_content = '';
417 36         88 ++$state;
418             } else {
419 0         0 DEBUG and print
420             " Didn't see an immediately subsequent start-Para. Reseeking H1\n";
421 0         0 $state = 0;
422             }
423             }
424              
425             elsif($state == 3) {
426             # accumulating text until end of Para
427 120 100 100     157 if( $token->is_text ) {
    100          
428 60         82 DEBUG and print STDERR " Adding \"", $token->text, "\" to para-content.\n";
429 60         95 $para_text_content .= $token->text;
430             # and keep looking
431              
432             } elsif( $token->is_end and $content_containers{ $token->tagname } ) {
433 36         38 DEBUG and print STDERR " Found end of Para. Considering content: ",
434             $para_text_content, "\n";
435              
436 36 50 33     210 if( $para_text_content =~ m/\S/
    50          
437             and ($max_content_length
438             ? (length($para_text_content) <= $max_content_length)
439             : 1)
440             ) {
441             # Some minimal sanity constraints, I think.
442 36         40 DEBUG and print STDERR " It looks contentworthy, I guess. Using it.\n";
443 36         44 $title = $para_text_content;
444 36         66 last;
445             } else {
446 0         0 DEBUG and print STDERR " Doesn't look at all contentworthy!\n Giving up.\n";
447 0         0 undef $title;
448 0         0 last;
449             }
450             }
451             }
452              
453             else {
454 0         0 die "IMPOSSIBLE STATE $state!\n"; # should never happen
455             }
456              
457             }
458              
459             # Put it all back!
460 52         153 $self->unget_token(@to_unget);
461              
462 52         127 if(DEBUG) {
463             if(defined $title) { print STDERR " Returning title <$title>\n" }
464             else { print STDERR "Returning title <>\n" }
465             }
466              
467 52 100       124 return '' unless defined $title;
468 42         86 $title =~ s/^\s+//;
469 42         207 return $title;
470             }
471              
472             #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
473             #
474             # Methods that actually do work at parse-time:
475              
476             sub _handle_element_start {
477 349     349   433 my $self = shift; # leaving ($element_name, $attr_hash_r)
478 349         315 DEBUG > 2 and print STDERR "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n";
479              
480 349         1233 push @{ $self->{'token_buffer'} },
481 349         325 $self->{'start_token_class'}->new(@_);
482 349         517 return;
483             }
484              
485             sub _handle_text {
486 244     244   250 my $self = shift; # leaving ($text)
487 244         241 DEBUG > 2 and print STDERR "== $_[0]\n";
488 244         757 push @{ $self->{'token_buffer'} },
489 244         243 $self->{'text_token_class'}->new(@_);
490 244         498 return;
491             }
492              
493             sub _handle_element_end {
494 335     335   343 my $self = shift; # leaving ($element_name);
495 335         295 DEBUG > 2 and print STDERR "-- $_[0]\n";
496 335         861 push @{ $self->{'token_buffer'} },
497 335         323 $self->{'end_token_class'}->new(@_);
498 335         450 return;
499             }
500              
501             #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
502              
503             1;
504              
505              
506             __END__