File Coverage

blib/lib/Pod/Simple/PullParser.pm
Criterion Covered Total %
statement 209 232 90.0
branch 76 102 74.5
condition 42 67 62.6
subroutine 26 32 81.2
pod 13 17 76.4
total 366 450 81.3


line stmt bran cond sub pod time code
1             package Pod::Simple::PullParser;
2 10     10   243130 use strict;
  10         18  
  10         516  
3             our $VERSION = '3.47';
4 10     10   4273 use Pod::Simple ();
  10         28  
  10         452  
5 10     10   372 BEGIN {our @ISA = ('Pod::Simple')}
6              
7 10     10   88 use Carp ();
  10         17  
  10         224  
8              
9 10     10   5808 use Pod::Simple::PullParserStartToken;
  10         28  
  10         335  
10 10     10   5049 use Pod::Simple::PullParserEndToken;
  10         31  
  10         349  
11 10     10   5133 use Pod::Simple::PullParserTextToken;
  10         28  
  10         480  
12              
13 10 50   10   2378 BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG }
14              
15             __PACKAGE__->_accessorize(
16             'source_fh', # the filehandle we're reading from
17             'source_scalar_ref', # the scalarref we're reading from
18             'source_arrayref', # the arrayref we're reading from
19             );
20              
21             #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
22             #
23             # And here is how we implement a pull-parser on top of a push-parser...
24              
25             sub filter {
26 0     0 1 0 my($self, $source) = @_;
27 0 0       0 $self = $self->new unless ref $self;
28              
29 0 0       0 $source = *STDIN{IO} unless defined $source;
30 0         0 $self->set_source($source);
31 0         0 $self->output_fh(*STDOUT{IO});
32              
33 0         0 $self->run; # define run() in a subclass if you want to use filter()!
34 0         0 return $self;
35             }
36              
37             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
38              
39             sub parse_string_document {
40 50     50 1 104 my $this = shift;
41 50         177 $this->set_source(\ $_[0]);
42 50         148 $this->run;
43             }
44              
45             sub parse_file {
46 13     13 1 38 my($this, $filename) = @_;
47 13         65 $this->set_source($filename);
48 13         57 $this->run;
49             }
50              
51             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
52             # In case anyone tries to use them:
53              
54             sub run {
55 10     10   80 use Carp ();
  10         41  
  10         1163  
56 0 0 0 0 0 0 if( __PACKAGE__ eq (ref($_[0]) || $_[0])) { # I'm not being subclassed!
57 0         0 Carp::croak "You can call run() only on subclasses of "
58             . __PACKAGE__;
59             } else {
60 0   0     0 Carp::croak join '',
61             "You can't call run() because ",
62             ref($_[0]) || $_[0], " didn't define a run() method";
63             }
64             }
65              
66             sub parse_lines {
67 10     10   62 use Carp ();
  10         19  
  10         457  
68 0     0 1 0 Carp::croak "Use set_source with ", __PACKAGE__,
69             " and subclasses, not parse_lines";
70             }
71              
72             sub parse_line {
73 10     10   43 use Carp ();
  10         18  
  10         35828  
74 0     0 0 0 Carp::croak "Use set_source with ", __PACKAGE__,
75             " and subclasses, not parse_line";
76             }
77              
78             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
79              
80             sub new {
81 90     90 1 483359 my $class = shift;
82 90         475 my $self = $class->SUPER::new(@_);
83 90 50       238 die "Couldn't construct for $class" unless $self;
84              
85 90   50     474 $self->{'token_buffer'} ||= [];
86 90   50     498 $self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken';
87 90   50     425 $self->{'text_token_class'} ||= 'Pod::Simple::PullParserTextToken';
88 90   50     417 $self->{'end_token_class'} ||= 'Pod::Simple::PullParserEndToken';
89              
90 90         123 DEBUG > 1 and print STDERR "New pullparser object: $self\n";
91              
92 90         208 return $self;
93             }
94              
95             # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
96              
97             sub get_token {
98 1406     1406 1 3695 my $self = shift;
99 1406         2543 DEBUG > 1 and print STDERR "\nget_token starting up on $self.\n";
100             DEBUG > 2 and print STDERR " Items in token-buffer (",
101             scalar( @{ $self->{'token_buffer'} } ) ,
102             ") :\n", map(
103             " " . $_->dump . "\n", @{ $self->{'token_buffer'} }
104             ),
105 1406         1838 @{ $self->{'token_buffer'} } ? '' : ' (no tokens)',
106             "\n"
107             ;
108              
109 1406         1963 until( @{ $self->{'token_buffer'} } ) {
  1999         4462  
110 593         903 DEBUG > 3 and print STDERR "I need to get something into my empty token buffer...\n";
111 593 100       1753 if($self->{'source_dead'}) {
    100          
    100          
    50          
112 80         99 DEBUG and print STDERR "$self 's source is dead.\n";
113 80         118 push @{ $self->{'token_buffer'} }, undef;
  80         175  
114             } elsif(exists $self->{'source_fh'}) {
115 23         37 my @lines;
116 23   33     129 my $fh = $self->{'source_fh'}
117             || Carp::croak('You have to call set_source before you can call get_token');
118              
119 23         41 DEBUG and print STDERR "$self 's source is filehandle $fh.\n";
120             # Read those many lines at a time
121 23         71 for(my $i = Pod::Simple::MANY_LINES; $i--;) {
122 298         360 DEBUG > 3 and print STDERR " Fetching a line from source filehandle $fh...\n";
123 298         800 local $/ = $Pod::Simple::NL;
124 298         1368 push @lines, scalar(<$fh>); # readline
125 298         483 DEBUG > 3 and print STDERR " Line is: ",
126             defined($lines[-1]) ? $lines[-1] : "\n";
127 298 100       891 unless( defined $lines[-1] ) {
128 17         45 DEBUG and print STDERR "That's it for that source fh! Killing.\n";
129 17         59 delete $self->{'source_fh'}; # so it can be GC'd
130 17         131 last;
131             }
132             # but pass thru the undef, which will set source_dead to true
133              
134             # TODO: look to see if $lines[-1] is =encoding, and if so,
135             # do horribly magic things
136              
137             }
138              
139 23         39 if(DEBUG > 8) {
140             print STDERR "* I've gotten ", scalar(@lines), " lines:\n";
141             foreach my $l (@lines) {
142             if(defined $l) {
143             print STDERR " line {$l}\n";
144             } else {
145             print STDERR " line undef\n";
146             }
147             }
148             print STDERR "* end of ", scalar(@lines), " lines\n";
149             }
150              
151 23         138 $self->SUPER::parse_lines(@lines);
152              
153             } elsif(exists $self->{'source_arrayref'}) {
154             DEBUG and print STDERR "$self 's source is arrayref $self->{'source_arrayref'}, with ",
155 2         5 scalar(@{$self->{'source_arrayref'}}), " items left in it.\n";
156              
157 2         5 DEBUG > 3 and print STDERR " Fetching ", Pod::Simple::MANY_LINES, " lines.\n";
158             $self->SUPER::parse_lines(
159 2         4 splice @{ $self->{'source_arrayref'} },
  2         40  
160             0,
161             Pod::Simple::MANY_LINES
162             );
163 2 50       5 unless( @{ $self->{'source_arrayref'} } ) {
  2         6  
164 2         2 DEBUG and print STDERR "That's it for that source arrayref! Killing.\n";
165 2         6 $self->SUPER::parse_lines(undef);
166 2         4 delete $self->{'source_arrayref'}; # so it can be GC'd
167             }
168             # to make sure that an undef is always sent to signal end-of-stream
169              
170             } elsif(exists $self->{'source_scalar_ref'}) {
171              
172             DEBUG and print STDERR "$self 's source is scalarref $self->{'source_scalar_ref'}, with ",
173             length(${ $self->{'source_scalar_ref'} }) -
174 488         558 (pos(${ $self->{'source_scalar_ref'} }) || 0),
175             " characters left to parse.\n";
176              
177 488         536 DEBUG > 3 and print STDERR " Fetching a line from source-string...\n";
178 488 100       571 if( ${ $self->{'source_scalar_ref'} } =~
  488         2365  
179             m/([^\n\r]*)((?:\r?\n)?)/g
180             ) {
181             #print(">> $1\n"),
182             $self->SUPER::parse_lines($1)
183             if length($1) or length($2)
184 57         126 or pos( ${ $self->{'source_scalar_ref'} })
185 431 100 100     2235 != length( ${ $self->{'source_scalar_ref'} });
  57   66     205  
186             # I.e., unless it's a zero-length "empty line" at the very
187             # end of "foo\nbar\n" (i.e., between the \n and the EOS).
188             } else { # that's the end. Byebye
189 57         165 $self->SUPER::parse_lines(undef);
190 57         130 delete $self->{'source_scalar_ref'};
191 57         98 DEBUG and print STDERR "That's it for that source scalarref! Killing.\n";
192             }
193              
194              
195             } else {
196 0         0 die "What source??";
197             }
198             }
199             DEBUG and print STDERR "get_token about to return ",
200             Pod::Simple::pretty( @{$self->{'token_buffer'}}
201 1406         1958 ? $self->{'token_buffer'}[-1] : undef
202             ), "\n";
203 1406         1906 return shift @{$self->{'token_buffer'}}; # that's an undef if empty
  1406         4833  
204             }
205              
206             sub unget_token {
207 96     96 1 158 my $self = shift;
208 96         163 DEBUG and print STDERR "Ungetting ", scalar(@_), " tokens: ",
209             @_ ? "@_\n" : "().\n";
210 96         294 foreach my $t (@_) {
211 656 50       1355 Carp::croak "Can't unget that, because it's not a token -- it's undef!"
212             unless defined $t;
213 656 50       1322 Carp::croak "Can't unget $t, because it's not a token -- it's a string!"
214             unless ref $t;
215 656 50       1991 Carp::croak "Can't unget $t, because it's not a token object!"
216             unless UNIVERSAL::can($t, 'type');
217             }
218              
219 96         183 unshift @{$self->{'token_buffer'}}, @_;
  96         367  
220             DEBUG > 1 and print STDERR "Token buffer now has ",
221 96         141 scalar(@{$self->{'token_buffer'}}), " items in it.\n";
222 96         319 return;
223             }
224              
225             #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
226              
227             # $self->{'source_filename'} = $source;
228              
229             sub set_source {
230 91     91 1 1004472 my $self = shift @_;
231 91 50       277 return $self->{'source_fh'} unless @_;
232             Carp::croak("Cannot assign new source to pull parser; create a new instance, instead")
233 91 100 66     869 if $self->{'source_fh'} || $self->{'source_scalar_ref'} || $self->{'source_arrayref'};
      66        
234 90         150 my $handle;
235 90 50       505 if(!defined $_[0]) {
    100          
    100          
    100          
    100          
    50          
236 0         0 Carp::croak("Can't use empty-string as a source for set_source");
237             } elsif(ref(\( $_[0] )) eq 'GLOB') {
238 1         8 $self->{'source_filename'} = '' . ($handle = $_[0]);
239 1         4 DEBUG and print STDERR "$self 's source is glob $_[0]\n";
240             # and fall thru
241             } elsif(ref( $_[0] ) eq 'SCALAR') {
242 71         206 $self->{'source_scalar_ref'} = $_[0];
243 71         160 DEBUG and print STDERR "$self 's source is scalar ref $_[0]\n";
244 71         156 return;
245             } elsif(ref( $_[0] ) eq 'ARRAY') {
246 2         6 $self->{'source_arrayref'} = $_[0];
247 2         5 DEBUG and print STDERR "$self 's source is array ref $_[0]\n";
248 2         6 return;
249             } elsif(ref $_[0]) {
250 2         13 $self->{'source_filename'} = '' . ($handle = $_[0]);
251 2         6 DEBUG and print STDERR "$self 's source is fh-obj $_[0]\n";
252             } elsif(!length $_[0]) {
253 0         0 Carp::croak("Can't use empty-string as a source for set_source");
254             } else { # It's a filename!
255 14         29 DEBUG and print STDERR "$self 's source is filename $_[0]\n";
256             {
257 14         25 local *PODSOURCE;
  14         43  
258 14 50       939 open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!";
259 14         79 $handle = *PODSOURCE{IO};
260             }
261 14         56 $self->{'source_filename'} = $_[0];
262 14         26 DEBUG and print STDERR " Its name is $_[0].\n";
263              
264             # TODO: file-discipline things here!
265             }
266              
267 17         48 $self->{'source_fh'} = $handle;
268 17         61 DEBUG and print STDERR " Its handle is $handle\n";
269 17         48 return 1;
270             }
271              
272             # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
273              
274 0     0 0 0 sub get_title_short { shift->get_short_title(@_) } # alias
275              
276             sub get_short_title {
277 21     21 1 91 my $title = shift->get_title(@_);
278 21 100       940 $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s;
279             # turn "Foo::Bar -- bars for your foo" into "Foo::Bar"
280 21         81 return $title;
281             }
282              
283             sub get_title { shift->_get_titled_section(
284 39     39 1 1182 'NAME', max_token => 50, desperate => 1, @_)
285             }
286             sub get_version { shift->_get_titled_section(
287 3     3 1 22 'VERSION',
288             max_token => 400,
289             accept_verbatim => 1,
290             max_content_length => 3_000,
291             @_,
292             );
293             }
294             sub get_description { shift->_get_titled_section(
295 7     7 1 1071 'DESCRIPTION',
296             max_token => 400,
297             max_content_length => 3_000,
298             @_,
299             ) }
300              
301 0     0 0 0 sub get_authors { shift->get_author(@_) } # a harmless alias
302              
303             sub get_author {
304 2     2 1 1004 my $this = shift;
305             # Max_token is so high because these are
306             # typically at the end of the document:
307 2 100       8 $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) ||
308             $this->_get_titled_section('AUTHORS', max_token => 10_000, @_);
309             }
310              
311             #--------------------------------------------------------------------------
312              
313             sub _get_titled_section {
314             # Based on a get_title originally contributed by Graham Barr
315 52     52   263 my($self, $titlename, %options) = (@_);
316              
317 52         149 my $max_token = delete $options{'max_token'};
318 52         122 my $desperate_for_title = delete $options{'desperate'};
319 52         112 my $accept_verbatim = delete $options{'accept_verbatim'};
320 52         91 my $max_content_length = delete $options{'max_content_length'};
321 52         99 my $nocase = delete $options{'nocase'};
322 52 100       187 $max_content_length = 120 unless defined $max_content_length;
323              
324 52 0       162 Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ")
    50          
325             . join " ", map "[$_]", sort keys %options
326             )
327             if keys %options;
328              
329 52         85 my %content_containers;
330 52         110 $content_containers{'Para'} = 1;
331 52 100       130 if($accept_verbatim) {
332 3         9 $content_containers{'Verbatim'} = 1;
333 3         8 $content_containers{'VerbatimFormatted'} = 1;
334             }
335              
336 52         116 my $token_count = 0;
337 52         100 my $title;
338             my @to_unget;
339 52         87 my $state = 0;
340 52         83 my $depth = 0;
341              
342 52 50 33     488 Carp::croak "What kind of titlename is \"$titlename\"?!" unless
343             defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity
344 52         171 my $titlename_re = quotemeta($titlename);
345              
346 52         183 my $head1_text_content;
347             my $para_text_content;
348 52         0 my $skipX;
349              
350 52   50     325 while(
      66        
351             ++$token_count <= ($max_token || 1_000_000)
352             and defined(my $token = $self->get_token)
353             ) {
354 563         940 push @to_unget, $token;
355              
356 563 100       1383 if ($state == 0) { # seeking =head1
    100          
    100          
    50          
357 276 100 100     843 if( $token->is_start and $token->tagname eq 'head1' ) {
358 62         89 DEBUG and print STDERR " Found head1. Seeking content...\n";
359 62         107 ++$state;
360 62         259 $head1_text_content = '';
361             }
362             }
363              
364             elsif($state == 1) { # accumulating text until end of head1
365 131 100 100     372 if( $token->is_text ) {
    100          
    100          
366 65 100       167 unless ($skipX) {
367 64         111 DEBUG and print STDERR " Adding \"", $token->text, "\" to head1-content.\n";
368 64         194 $head1_text_content .= $token->text;
369             }
370             } elsif( $token->is_tagname('X') ) {
371             # We're going to want to ignore X<> stuff.
372 2         7 $skipX = $token->is_start;
373 2         8 DEBUG and print STDERR +($skipX ? 'Start' : 'End'), 'ing ignoring of X<> tag';
374             } elsif( $token->is_end and $token->tagname eq 'head1' ) {
375 62         112 DEBUG and print STDERR " Found end of head1. Considering content...\n";
376 62 100       147 $head1_text_content = uc $head1_text_content if $nocase;
377 62 50 100     1044 if($head1_text_content eq $titlename
    100 66        
    100 100        
      100        
      66        
378             or $head1_text_content =~ m/\($titlename_re\)/s
379             # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n
380             ) {
381 36         63 DEBUG and print STDERR " Yup, it was $titlename. Seeking next para-content...\n";
382 36         188 ++$state;
383             } elsif(
384             $desperate_for_title
385             # if we're so desperate we'll take the first
386             # =head1's content as a title
387             and $head1_text_content =~ m/\S/
388             and $head1_text_content !~ m/^[ A-Z]+$/s
389             and $head1_text_content !~
390             m/\((?:
391             NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS
392             | COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS?
393             | CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT
394             )\)/sx
395             # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION)
396             and ($max_content_length
397             ? (length($head1_text_content) <= $max_content_length) # sanity
398             : 1)
399             ) {
400             # Looks good; trim it
401 6         29 ($title = $head1_text_content) =~ s/\s+$//;
402 6         10 DEBUG and print STDERR " It looks titular: \"$title\".\n\n Using that.\n";
403 6         15 last;
404             } else {
405 20         46 --$state;
406 20         100 DEBUG and print STDERR " Didn't look titular ($head1_text_content).\n",
407             "\n Dropping back to seeking-head1-content mode...\n";
408             }
409             }
410             }
411              
412             elsif($state == 2) {
413             # seeking start of para (which must immediately follow)
414 36 50 33     111 if($token->is_start and $content_containers{ $token->tagname }) {
415 36         63 DEBUG and print STDERR " Found start of Para. Accumulating content...\n";
416 36         72 $para_text_content = '';
417 36         175 ++$state;
418             } else {
419 0         0 DEBUG and print
420             " Didn't see an immediately subsequent start-Para. Reseeking H1\n";
421 0         0 $state = 0;
422             }
423             }
424              
425             elsif($state == 3) {
426             # accumulating text until end of Para
427 120 100 100     308 if( $token->is_text ) {
    100          
428 60         81 DEBUG and print STDERR " Adding \"", $token->text, "\" to para-content.\n";
429 60         169 $para_text_content .= $token->text;
430             # and keep looking
431              
432             } elsif( $token->is_end and $content_containers{ $token->tagname } ) {
433 36         79 DEBUG and print STDERR " Found end of Para. Considering content: ",
434             $para_text_content, "\n";
435              
436 36 50 33     361 if( $para_text_content =~ m/\S/
    50          
437             and ($max_content_length
438             ? (length($para_text_content) <= $max_content_length)
439             : 1)
440             ) {
441             # Some minimal sanity constraints, I think.
442 36         62 DEBUG and print STDERR " It looks contentworthy, I guess. Using it.\n";
443 36         77 $title = $para_text_content;
444 36         93 last;
445             } else {
446 0         0 DEBUG and print STDERR " Doesn't look at all contentworthy!\n Giving up.\n";
447 0         0 undef $title;
448 0         0 last;
449             }
450             }
451             }
452              
453             else {
454 0         0 die "IMPOSSIBLE STATE $state!\n"; # should never happen
455             }
456              
457             }
458              
459             # Put it all back!
460 52         255 $self->unget_token(@to_unget);
461              
462 52         81 if(DEBUG) {
463             if(defined $title) { print STDERR " Returning title <$title>\n" }
464             else { print STDERR "Returning title <>\n" }
465             }
466              
467 52 100       206 return '' unless defined $title;
468 42         156 $title =~ s/^\s+//;
469 42         335 return $title;
470             }
471              
472             #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
473             #
474             # Methods that actually do work at parse-time:
475              
476             sub _handle_element_start {
477 349     349   576 my $self = shift; # leaving ($element_name, $attr_hash_r)
478 349         477 DEBUG > 2 and print STDERR "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n";
479              
480 349         1853 push @{ $self->{'token_buffer'} },
481 349         478 $self->{'start_token_class'}->new(@_);
482 349         852 return;
483             }
484              
485             sub _handle_text {
486 244     244   385 my $self = shift; # leaving ($text)
487 244         344 DEBUG > 2 and print STDERR "== $_[0]\n";
488 244         1193 push @{ $self->{'token_buffer'} },
489 244         373 $self->{'text_token_class'}->new(@_);
490 244         766 return;
491             }
492              
493             sub _handle_element_end {
494 335     335   444 my $self = shift; # leaving ($element_name);
495 335         417 DEBUG > 2 and print STDERR "-- $_[0]\n";
496 335         1235 push @{ $self->{'token_buffer'} },
497 335         453 $self->{'end_token_class'}->new(@_);
498 335         695 return;
499             }
500              
501             #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
502              
503             1;
504              
505              
506             __END__