line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
require 5; |
2
|
|
|
|
|
|
|
package Pod::Simple::PullParser; |
3
|
|
|
|
|
|
|
$VERSION = '3.42'; |
4
|
10
|
|
|
10
|
|
13932
|
use Pod::Simple (); |
|
10
|
|
|
|
|
25
|
|
|
10
|
|
|
|
|
305
|
|
5
|
10
|
|
|
10
|
|
370
|
BEGIN {@ISA = ('Pod::Simple')} |
6
|
|
|
|
|
|
|
|
7
|
10
|
|
|
10
|
|
60
|
use strict; |
|
10
|
|
|
|
|
19
|
|
|
10
|
|
|
|
|
215
|
|
8
|
10
|
|
|
10
|
|
46
|
use Carp (); |
|
10
|
|
|
|
|
16
|
|
|
10
|
|
|
|
|
153
|
|
9
|
|
|
|
|
|
|
|
10
|
10
|
|
|
10
|
|
4670
|
use Pod::Simple::PullParserStartToken; |
|
10
|
|
|
|
|
25
|
|
|
10
|
|
|
|
|
311
|
|
11
|
10
|
|
|
10
|
|
4127
|
use Pod::Simple::PullParserEndToken; |
|
10
|
|
|
|
|
23
|
|
|
10
|
|
|
|
|
283
|
|
12
|
10
|
|
|
10
|
|
3706
|
use Pod::Simple::PullParserTextToken; |
|
10
|
|
|
|
|
25
|
|
|
10
|
|
|
|
|
388
|
|
13
|
|
|
|
|
|
|
|
14
|
10
|
50
|
|
10
|
|
1629
|
BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG } |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
__PACKAGE__->_accessorize( |
17
|
|
|
|
|
|
|
'source_fh', # the filehandle we're reading from |
18
|
|
|
|
|
|
|
'source_scalar_ref', # the scalarref we're reading from |
19
|
|
|
|
|
|
|
'source_arrayref', # the arrayref we're reading from |
20
|
|
|
|
|
|
|
); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
23
|
|
|
|
|
|
|
# |
24
|
|
|
|
|
|
|
# And here is how we implement a pull-parser on top of a push-parser... |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub filter { |
27
|
0
|
|
|
0
|
1
|
0
|
my($self, $source) = @_; |
28
|
0
|
0
|
|
|
|
0
|
$self = $self->new unless ref $self; |
29
|
|
|
|
|
|
|
|
30
|
0
|
0
|
|
|
|
0
|
$source = *STDIN{IO} unless defined $source; |
31
|
0
|
|
|
|
|
0
|
$self->set_source($source); |
32
|
0
|
|
|
|
|
0
|
$self->output_fh(*STDOUT{IO}); |
33
|
|
|
|
|
|
|
|
34
|
0
|
|
|
|
|
0
|
$self->run; # define run() in a subclass if you want to use filter()! |
35
|
0
|
|
|
|
|
0
|
return $self; |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub parse_string_document { |
41
|
49
|
|
|
49
|
1
|
78
|
my $this = shift; |
42
|
49
|
|
|
|
|
146
|
$this->set_source(\ $_[0]); |
43
|
49
|
|
|
|
|
145
|
$this->run; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub parse_file { |
47
|
13
|
|
|
13
|
1
|
37
|
my($this, $filename) = @_; |
48
|
13
|
|
|
|
|
89
|
$this->set_source($filename); |
49
|
13
|
|
|
|
|
51
|
$this->run; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
53
|
|
|
|
|
|
|
# In case anyone tries to use them: |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub run { |
56
|
10
|
|
|
10
|
|
70
|
use Carp (); |
|
10
|
|
|
|
|
18
|
|
|
10
|
|
|
|
|
1014
|
|
57
|
0
|
0
|
0
|
0
|
0
|
0
|
if( __PACKAGE__ eq ref($_[0]) || $_[0]) { # I'm not being subclassed! |
58
|
0
|
|
|
|
|
0
|
Carp::croak "You can call run() only on subclasses of " |
59
|
|
|
|
|
|
|
. __PACKAGE__; |
60
|
|
|
|
|
|
|
} else { |
61
|
0
|
|
0
|
|
|
0
|
Carp::croak join '', |
62
|
|
|
|
|
|
|
"You can't call run() because ", |
63
|
|
|
|
|
|
|
ref($_[0]) || $_[0], " didn't define a run() method"; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub parse_lines { |
68
|
10
|
|
|
10
|
|
63
|
use Carp (); |
|
10
|
|
|
|
|
23
|
|
|
10
|
|
|
|
|
460
|
|
69
|
0
|
|
|
0
|
1
|
0
|
Carp::croak "Use set_source with ", __PACKAGE__, |
70
|
|
|
|
|
|
|
" and subclasses, not parse_lines"; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub parse_line { |
74
|
10
|
|
|
10
|
|
58
|
use Carp (); |
|
10
|
|
|
|
|
25
|
|
|
10
|
|
|
|
|
27220
|
|
75
|
0
|
|
|
0
|
0
|
0
|
Carp::croak "Use set_source with ", __PACKAGE__, |
76
|
|
|
|
|
|
|
" and subclasses, not parse_line"; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub new { |
82
|
89
|
|
|
89
|
1
|
1835
|
my $class = shift; |
83
|
89
|
|
|
|
|
368
|
my $self = $class->SUPER::new(@_); |
84
|
89
|
50
|
|
|
|
328
|
die "Couldn't construct for $class" unless $self; |
85
|
|
|
|
|
|
|
|
86
|
89
|
|
50
|
|
|
392
|
$self->{'token_buffer'} ||= []; |
87
|
89
|
|
50
|
|
|
357
|
$self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken'; |
88
|
89
|
|
50
|
|
|
309
|
$self->{'text_token_class'} ||= 'Pod::Simple::PullParserTextToken'; |
89
|
89
|
|
50
|
|
|
400
|
$self->{'end_token_class'} ||= 'Pod::Simple::PullParserEndToken'; |
90
|
|
|
|
|
|
|
|
91
|
89
|
|
|
|
|
108
|
DEBUG > 1 and print STDERR "New pullparser object: $self\n"; |
92
|
|
|
|
|
|
|
|
93
|
89
|
|
|
|
|
185
|
return $self; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub get_token { |
99
|
1398
|
|
|
1398
|
1
|
2486
|
my $self = shift; |
100
|
1398
|
|
|
|
|
1464
|
DEBUG > 1 and print STDERR "\nget_token starting up on $self.\n"; |
101
|
|
|
|
|
|
|
DEBUG > 2 and print STDERR " Items in token-buffer (", |
102
|
|
|
|
|
|
|
scalar( @{ $self->{'token_buffer'} } ) , |
103
|
|
|
|
|
|
|
") :\n", map( |
104
|
|
|
|
|
|
|
" " . $_->dump . "\n", @{ $self->{'token_buffer'} } |
105
|
|
|
|
|
|
|
), |
106
|
1398
|
|
|
|
|
1531
|
@{ $self->{'token_buffer'} } ? '' : ' (no tokens)', |
107
|
|
|
|
|
|
|
"\n" |
108
|
|
|
|
|
|
|
; |
109
|
|
|
|
|
|
|
|
110
|
1398
|
|
|
|
|
1579
|
until( @{ $self->{'token_buffer'} } ) { |
|
1985
|
|
|
|
|
3706
|
|
111
|
587
|
|
|
|
|
1072
|
DEBUG > 3 and print STDERR "I need to get something into my empty token buffer...\n"; |
112
|
587
|
100
|
|
|
|
1520
|
if($self->{'source_dead'}) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
113
|
79
|
|
|
|
|
99
|
DEBUG and print STDERR "$self 's source is dead.\n"; |
114
|
79
|
|
|
|
|
114
|
push @{ $self->{'token_buffer'} }, undef; |
|
79
|
|
|
|
|
158
|
|
115
|
|
|
|
|
|
|
} elsif(exists $self->{'source_fh'}) { |
116
|
23
|
|
|
|
|
33
|
my @lines; |
117
|
23
|
|
33
|
|
|
66
|
my $fh = $self->{'source_fh'} |
118
|
|
|
|
|
|
|
|| Carp::croak('You have to call set_source before you can call get_token'); |
119
|
|
|
|
|
|
|
|
120
|
23
|
|
|
|
|
33
|
DEBUG and print STDERR "$self 's source is filehandle $fh.\n"; |
121
|
|
|
|
|
|
|
# Read those many lines at a time |
122
|
23
|
|
|
|
|
54
|
for(my $i = Pod::Simple::MANY_LINES; $i--;) { |
123
|
298
|
|
|
|
|
314
|
DEBUG > 3 and print STDERR " Fetching a line from source filehandle $fh...\n"; |
124
|
298
|
|
|
|
|
647
|
local $/ = $Pod::Simple::NL; |
125
|
298
|
|
|
|
|
1169
|
push @lines, scalar(<$fh>); # readline |
126
|
298
|
|
|
|
|
434
|
DEBUG > 3 and print STDERR " Line is: ", |
127
|
|
|
|
|
|
|
defined($lines[-1]) ? $lines[-1] : "\n"; |
128
|
298
|
100
|
|
|
|
727
|
unless( defined $lines[-1] ) { |
129
|
17
|
|
|
|
|
24
|
DEBUG and print STDERR "That's it for that source fh! Killing.\n"; |
130
|
17
|
|
|
|
|
45
|
delete $self->{'source_fh'}; # so it can be GC'd |
131
|
17
|
|
|
|
|
67
|
last; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
# but pass thru the undef, which will set source_dead to true |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# TODO: look to see if $lines[-1] is =encoding, and if so, |
136
|
|
|
|
|
|
|
# do horribly magic things |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
23
|
|
|
|
|
31
|
if(DEBUG > 8) { |
141
|
|
|
|
|
|
|
print STDERR "* I've gotten ", scalar(@lines), " lines:\n"; |
142
|
|
|
|
|
|
|
foreach my $l (@lines) { |
143
|
|
|
|
|
|
|
if(defined $l) { |
144
|
|
|
|
|
|
|
print STDERR " line {$l}\n"; |
145
|
|
|
|
|
|
|
} else { |
146
|
|
|
|
|
|
|
print STDERR " line undef\n"; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
print STDERR "* end of ", scalar(@lines), " lines\n"; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
23
|
|
|
|
|
126
|
$self->SUPER::parse_lines(@lines); |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
} elsif(exists $self->{'source_arrayref'}) { |
155
|
|
|
|
|
|
|
DEBUG and print STDERR "$self 's source is arrayref $self->{'source_arrayref'}, with ", |
156
|
2
|
|
|
|
|
3
|
scalar(@{$self->{'source_arrayref'}}), " items left in it.\n"; |
157
|
|
|
|
|
|
|
|
158
|
2
|
|
|
|
|
3
|
DEBUG > 3 and print STDERR " Fetching ", Pod::Simple::MANY_LINES, " lines.\n"; |
159
|
|
|
|
|
|
|
$self->SUPER::parse_lines( |
160
|
2
|
|
|
|
|
3
|
splice @{ $self->{'source_arrayref'} }, |
|
2
|
|
|
|
|
9
|
|
161
|
|
|
|
|
|
|
0, |
162
|
|
|
|
|
|
|
Pod::Simple::MANY_LINES |
163
|
|
|
|
|
|
|
); |
164
|
2
|
50
|
|
|
|
5
|
unless( @{ $self->{'source_arrayref'} } ) { |
|
2
|
|
|
|
|
4
|
|
165
|
2
|
|
|
|
|
4
|
DEBUG and print STDERR "That's it for that source arrayref! Killing.\n"; |
166
|
2
|
|
|
|
|
5
|
$self->SUPER::parse_lines(undef); |
167
|
2
|
|
|
|
|
5
|
delete $self->{'source_arrayref'}; # so it can be GC'd |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
# to make sure that an undef is always sent to signal end-of-stream |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
} elsif(exists $self->{'source_scalar_ref'}) { |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
DEBUG and print STDERR "$self 's source is scalarref $self->{'source_scalar_ref'}, with ", |
174
|
|
|
|
|
|
|
length(${ $self->{'source_scalar_ref'} }) - |
175
|
483
|
|
|
|
|
543
|
(pos(${ $self->{'source_scalar_ref'} }) || 0), |
176
|
|
|
|
|
|
|
" characters left to parse.\n"; |
177
|
|
|
|
|
|
|
|
178
|
483
|
|
|
|
|
556
|
DEBUG > 3 and print STDERR " Fetching a line from source-string...\n"; |
179
|
483
|
100
|
|
|
|
557
|
if( ${ $self->{'source_scalar_ref'} } =~ |
|
483
|
|
|
|
|
1936
|
|
180
|
|
|
|
|
|
|
m/([^\n\r]*)((?:\r?\n)?)/g |
181
|
|
|
|
|
|
|
) { |
182
|
|
|
|
|
|
|
#print(">> $1\n"), |
183
|
|
|
|
|
|
|
$self->SUPER::parse_lines($1) |
184
|
|
|
|
|
|
|
if length($1) or length($2) |
185
|
56
|
|
|
|
|
125
|
or pos( ${ $self->{'source_scalar_ref'} }) |
186
|
427
|
100
|
100
|
|
|
1970
|
!= length( ${ $self->{'source_scalar_ref'} }); |
|
56
|
|
66
|
|
|
180
|
|
187
|
|
|
|
|
|
|
# I.e., unless it's a zero-length "empty line" at the very |
188
|
|
|
|
|
|
|
# end of "foo\nbar\n" (i.e., between the \n and the EOS). |
189
|
|
|
|
|
|
|
} else { # that's the end. Byebye |
190
|
56
|
|
|
|
|
195
|
$self->SUPER::parse_lines(undef); |
191
|
56
|
|
|
|
|
110
|
delete $self->{'source_scalar_ref'}; |
192
|
56
|
|
|
|
|
93
|
DEBUG and print STDERR "That's it for that source scalarref! Killing.\n"; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
} else { |
197
|
0
|
|
|
|
|
0
|
die "What source??"; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
DEBUG and print STDERR "get_token about to return ", |
201
|
|
|
|
|
|
|
Pod::Simple::pretty( @{$self->{'token_buffer'}} |
202
|
1398
|
|
|
|
|
1636
|
? $self->{'token_buffer'}[-1] : undef |
203
|
|
|
|
|
|
|
), "\n"; |
204
|
1398
|
|
|
|
|
1524
|
return shift @{$self->{'token_buffer'}}; # that's an undef if empty |
|
1398
|
|
|
|
|
3861
|
|
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub unget_token { |
208
|
96
|
|
|
96
|
1
|
147
|
my $self = shift; |
209
|
96
|
|
|
|
|
114
|
DEBUG and print STDERR "Ungetting ", scalar(@_), " tokens: ", |
210
|
|
|
|
|
|
|
@_ ? "@_\n" : "().\n"; |
211
|
96
|
|
|
|
|
179
|
foreach my $t (@_) { |
212
|
656
|
50
|
|
|
|
976
|
Carp::croak "Can't unget that, because it's not a token -- it's undef!" |
213
|
|
|
|
|
|
|
unless defined $t; |
214
|
656
|
50
|
|
|
|
1075
|
Carp::croak "Can't unget $t, because it's not a token -- it's a string!" |
215
|
|
|
|
|
|
|
unless ref $t; |
216
|
656
|
50
|
|
|
|
1420
|
Carp::croak "Can't unget $t, because it's not a token object!" |
217
|
|
|
|
|
|
|
unless UNIVERSAL::can($t, 'type'); |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
96
|
|
|
|
|
122
|
unshift @{$self->{'token_buffer'}}, @_; |
|
96
|
|
|
|
|
242
|
|
221
|
|
|
|
|
|
|
DEBUG > 1 and print STDERR "Token buffer now has ", |
222
|
96
|
|
|
|
|
119
|
scalar(@{$self->{'token_buffer'}}), " items in it.\n"; |
223
|
96
|
|
|
|
|
191
|
return; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# $self->{'source_filename'} = $source; |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub set_source { |
231
|
90
|
|
|
90
|
1
|
1000940
|
my $self = shift @_; |
232
|
90
|
50
|
|
|
|
195
|
return $self->{'source_fh'} unless @_; |
233
|
|
|
|
|
|
|
Carp::croak("Cannot assign new source to pull parser; create a new instance, instead") |
234
|
90
|
100
|
66
|
|
|
656
|
if $self->{'source_fh'} || $self->{'source_scalar_ref'} || $self->{'source_arrayref'}; |
|
|
|
66
|
|
|
|
|
235
|
89
|
|
|
|
|
134
|
my $handle; |
236
|
89
|
50
|
|
|
|
439
|
if(!defined $_[0]) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
237
|
0
|
|
|
|
|
0
|
Carp::croak("Can't use empty-string as a source for set_source"); |
238
|
|
|
|
|
|
|
} elsif(ref(\( $_[0] )) eq 'GLOB') { |
239
|
1
|
|
|
|
|
7
|
$self->{'source_filename'} = '' . ($handle = $_[0]); |
240
|
1
|
|
|
|
|
3
|
DEBUG and print STDERR "$self 's source is glob $_[0]\n"; |
241
|
|
|
|
|
|
|
# and fall thru |
242
|
|
|
|
|
|
|
} elsif(ref( $_[0] ) eq 'SCALAR') { |
243
|
70
|
|
|
|
|
115
|
$self->{'source_scalar_ref'} = $_[0]; |
244
|
70
|
|
|
|
|
90
|
DEBUG and print STDERR "$self 's source is scalar ref $_[0]\n"; |
245
|
70
|
|
|
|
|
123
|
return; |
246
|
|
|
|
|
|
|
} elsif(ref( $_[0] ) eq 'ARRAY') { |
247
|
2
|
|
|
|
|
3
|
$self->{'source_arrayref'} = $_[0]; |
248
|
2
|
|
|
|
|
3
|
DEBUG and print STDERR "$self 's source is array ref $_[0]\n"; |
249
|
2
|
|
|
|
|
5
|
return; |
250
|
|
|
|
|
|
|
} elsif(ref $_[0]) { |
251
|
2
|
|
|
|
|
8
|
$self->{'source_filename'} = '' . ($handle = $_[0]); |
252
|
2
|
|
|
|
|
3
|
DEBUG and print STDERR "$self 's source is fh-obj $_[0]\n"; |
253
|
|
|
|
|
|
|
} elsif(!length $_[0]) { |
254
|
0
|
|
|
|
|
0
|
Carp::croak("Can't use empty-string as a source for set_source"); |
255
|
|
|
|
|
|
|
} else { # It's a filename! |
256
|
14
|
|
|
|
|
24
|
DEBUG and print STDERR "$self 's source is filename $_[0]\n"; |
257
|
|
|
|
|
|
|
{ |
258
|
14
|
|
|
|
|
22
|
local *PODSOURCE; |
|
14
|
|
|
|
|
43
|
|
259
|
14
|
50
|
|
|
|
627
|
open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!"; |
260
|
14
|
|
|
|
|
82
|
$handle = *PODSOURCE{IO}; |
261
|
|
|
|
|
|
|
} |
262
|
14
|
|
|
|
|
48
|
$self->{'source_filename'} = $_[0]; |
263
|
14
|
|
|
|
|
26
|
DEBUG and print STDERR " Its name is $_[0].\n"; |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# TODO: file-discipline things here! |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
17
|
|
|
|
|
74
|
$self->{'source_fh'} = $handle; |
269
|
17
|
|
|
|
|
22
|
DEBUG and print STDERR " Its handle is $handle\n"; |
270
|
17
|
|
|
|
|
39
|
return 1; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
274
|
|
|
|
|
|
|
|
275
|
0
|
|
|
0
|
0
|
0
|
sub get_title_short { shift->get_short_title(@_) } # alias |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub get_short_title { |
278
|
21
|
|
|
21
|
1
|
77
|
my $title = shift->get_title(@_); |
279
|
21
|
100
|
|
|
|
133
|
$title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s; |
280
|
|
|
|
|
|
|
# turn "Foo::Bar -- bars for your foo" into "Foo::Bar" |
281
|
21
|
|
|
|
|
78
|
return $title; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
sub get_title { shift->_get_titled_section( |
285
|
39
|
|
|
39
|
1
|
223
|
'NAME', max_token => 50, desperate => 1, @_) |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
sub get_version { shift->_get_titled_section( |
288
|
3
|
|
|
3
|
1
|
13
|
'VERSION', |
289
|
|
|
|
|
|
|
max_token => 400, |
290
|
|
|
|
|
|
|
accept_verbatim => 1, |
291
|
|
|
|
|
|
|
max_content_length => 3_000, |
292
|
|
|
|
|
|
|
@_, |
293
|
|
|
|
|
|
|
); |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
sub get_description { shift->_get_titled_section( |
296
|
7
|
|
|
7
|
1
|
146
|
'DESCRIPTION', |
297
|
|
|
|
|
|
|
max_token => 400, |
298
|
|
|
|
|
|
|
max_content_length => 3_000, |
299
|
|
|
|
|
|
|
@_, |
300
|
|
|
|
|
|
|
) } |
301
|
|
|
|
|
|
|
|
302
|
0
|
|
|
0
|
0
|
0
|
sub get_authors { shift->get_author(@_) } # a harmless alias |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
sub get_author { |
305
|
2
|
|
|
2
|
1
|
88
|
my $this = shift; |
306
|
|
|
|
|
|
|
# Max_token is so high because these are |
307
|
|
|
|
|
|
|
# typically at the end of the document: |
308
|
2
|
100
|
|
|
|
7
|
$this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) || |
309
|
|
|
|
|
|
|
$this->_get_titled_section('AUTHORS', max_token => 10_000, @_); |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub _get_titled_section { |
315
|
|
|
|
|
|
|
# Based on a get_title originally contributed by Graham Barr |
316
|
52
|
|
|
52
|
|
191
|
my($self, $titlename, %options) = (@_); |
317
|
|
|
|
|
|
|
|
318
|
52
|
|
|
|
|
109
|
my $max_token = delete $options{'max_token'}; |
319
|
52
|
|
|
|
|
101
|
my $desperate_for_title = delete $options{'desperate'}; |
320
|
52
|
|
|
|
|
83
|
my $accept_verbatim = delete $options{'accept_verbatim'}; |
321
|
52
|
|
|
|
|
71
|
my $max_content_length = delete $options{'max_content_length'}; |
322
|
52
|
|
|
|
|
71
|
my $nocase = delete $options{'nocase'}; |
323
|
52
|
100
|
|
|
|
115
|
$max_content_length = 120 unless defined $max_content_length; |
324
|
|
|
|
|
|
|
|
325
|
52
|
0
|
|
|
|
135
|
Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ") |
|
|
50
|
|
|
|
|
|
326
|
|
|
|
|
|
|
. join " ", map "[$_]", sort keys %options |
327
|
|
|
|
|
|
|
) |
328
|
|
|
|
|
|
|
if keys %options; |
329
|
|
|
|
|
|
|
|
330
|
52
|
|
|
|
|
66
|
my %content_containers; |
331
|
52
|
|
|
|
|
83
|
$content_containers{'Para'} = 1; |
332
|
52
|
100
|
|
|
|
117
|
if($accept_verbatim) { |
333
|
3
|
|
|
|
|
5
|
$content_containers{'Verbatim'} = 1; |
334
|
3
|
|
|
|
|
5
|
$content_containers{'VerbatimFormatted'} = 1; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
52
|
|
|
|
|
80
|
my $token_count = 0; |
338
|
52
|
|
|
|
|
74
|
my $title; |
339
|
|
|
|
|
|
|
my @to_unget; |
340
|
52
|
|
|
|
|
63
|
my $state = 0; |
341
|
52
|
|
|
|
|
61
|
my $depth = 0; |
342
|
|
|
|
|
|
|
|
343
|
52
|
50
|
33
|
|
|
380
|
Carp::croak "What kind of titlename is \"$titlename\"?!" unless |
344
|
|
|
|
|
|
|
defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity |
345
|
52
|
|
|
|
|
100
|
my $titlename_re = quotemeta($titlename); |
346
|
|
|
|
|
|
|
|
347
|
52
|
|
|
|
|
115
|
my $head1_text_content; |
348
|
|
|
|
|
|
|
my $para_text_content; |
349
|
52
|
|
|
|
|
0
|
my $skipX; |
350
|
|
|
|
|
|
|
|
351
|
52
|
|
50
|
|
|
225
|
while( |
|
|
|
66
|
|
|
|
|
352
|
|
|
|
|
|
|
++$token_count <= ($max_token || 1_000_000) |
353
|
|
|
|
|
|
|
and defined(my $token = $self->get_token) |
354
|
|
|
|
|
|
|
) { |
355
|
563
|
|
|
|
|
723
|
push @to_unget, $token; |
356
|
|
|
|
|
|
|
|
357
|
563
|
100
|
|
|
|
966
|
if ($state == 0) { # seeking =head1 |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
358
|
276
|
100
|
100
|
|
|
552
|
if( $token->is_start and $token->tagname eq 'head1' ) { |
359
|
62
|
|
|
|
|
83
|
DEBUG and print STDERR " Found head1. Seeking content...\n"; |
360
|
62
|
|
|
|
|
74
|
++$state; |
361
|
62
|
|
|
|
|
181
|
$head1_text_content = ''; |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
elsif($state == 1) { # accumulating text until end of head1 |
366
|
131
|
100
|
100
|
|
|
299
|
if( $token->is_text ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
367
|
65
|
100
|
|
|
|
148
|
unless ($skipX) { |
368
|
64
|
|
|
|
|
75
|
DEBUG and print STDERR " Adding \"", $token->text, "\" to head1-content.\n"; |
369
|
64
|
|
|
|
|
152
|
$head1_text_content .= $token->text; |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
} elsif( $token->is_tagname('X') ) { |
372
|
|
|
|
|
|
|
# We're going to want to ignore X<> stuff. |
373
|
2
|
|
|
|
|
5
|
$skipX = $token->is_start; |
374
|
2
|
|
|
|
|
5
|
DEBUG and print STDERR +($skipX ? 'Start' : 'End'), 'ing ignoring of X<> tag'; |
375
|
|
|
|
|
|
|
} elsif( $token->is_end and $token->tagname eq 'head1' ) { |
376
|
62
|
|
|
|
|
75
|
DEBUG and print STDERR " Found end of head1. Considering content...\n"; |
377
|
62
|
100
|
|
|
|
103
|
$head1_text_content = uc $head1_text_content if $nocase; |
378
|
62
|
50
|
100
|
|
|
640
|
if($head1_text_content eq $titlename |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
379
|
|
|
|
|
|
|
or $head1_text_content =~ m/\($titlename_re\)/s |
380
|
|
|
|
|
|
|
# We accept "=head1 Nomen Modularis (NAME)" for sake of i18n |
381
|
|
|
|
|
|
|
) { |
382
|
36
|
|
|
|
|
50
|
DEBUG and print STDERR " Yup, it was $titlename. Seeking next para-content...\n"; |
383
|
36
|
|
|
|
|
122
|
++$state; |
384
|
|
|
|
|
|
|
} elsif( |
385
|
|
|
|
|
|
|
$desperate_for_title |
386
|
|
|
|
|
|
|
# if we're so desperate we'll take the first |
387
|
|
|
|
|
|
|
# =head1's content as a title |
388
|
|
|
|
|
|
|
and $head1_text_content =~ m/\S/ |
389
|
|
|
|
|
|
|
and $head1_text_content !~ m/^[ A-Z]+$/s |
390
|
|
|
|
|
|
|
and $head1_text_content !~ |
391
|
|
|
|
|
|
|
m/\((?: |
392
|
|
|
|
|
|
|
NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS |
393
|
|
|
|
|
|
|
| COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS? |
394
|
|
|
|
|
|
|
| CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT |
395
|
|
|
|
|
|
|
)\)/sx |
396
|
|
|
|
|
|
|
# avoid accepting things like =head1 Thingy Thongy (DESCRIPTION) |
397
|
|
|
|
|
|
|
and ($max_content_length |
398
|
|
|
|
|
|
|
? (length($head1_text_content) <= $max_content_length) # sanity |
399
|
|
|
|
|
|
|
: 1) |
400
|
|
|
|
|
|
|
) { |
401
|
|
|
|
|
|
|
# Looks good; trim it |
402
|
6
|
|
|
|
|
26
|
($title = $head1_text_content) =~ s/\s+$//; |
403
|
6
|
|
|
|
|
13
|
DEBUG and print STDERR " It looks titular: \"$title\".\n\n Using that.\n"; |
404
|
6
|
|
|
|
|
13
|
last; |
405
|
|
|
|
|
|
|
} else { |
406
|
20
|
|
|
|
|
35
|
--$state; |
407
|
20
|
|
|
|
|
68
|
DEBUG and print STDERR " Didn't look titular ($head1_text_content).\n", |
408
|
|
|
|
|
|
|
"\n Dropping back to seeking-head1-content mode...\n"; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
elsif($state == 2) { |
414
|
|
|
|
|
|
|
# seeking start of para (which must immediately follow) |
415
|
36
|
50
|
33
|
|
|
85
|
if($token->is_start and $content_containers{ $token->tagname }) { |
416
|
36
|
|
|
|
|
59
|
DEBUG and print STDERR " Found start of Para. Accumulating content...\n"; |
417
|
36
|
|
|
|
|
47
|
$para_text_content = ''; |
418
|
36
|
|
|
|
|
114
|
++$state; |
419
|
|
|
|
|
|
|
} else { |
420
|
0
|
|
|
|
|
0
|
DEBUG and print |
421
|
|
|
|
|
|
|
" Didn't see an immediately subsequent start-Para. Reseeking H1\n"; |
422
|
0
|
|
|
|
|
0
|
$state = 0; |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
elsif($state == 3) { |
427
|
|
|
|
|
|
|
# accumulating text until end of Para |
428
|
120
|
100
|
100
|
|
|
218
|
if( $token->is_text ) { |
|
|
100
|
|
|
|
|
|
429
|
60
|
|
|
|
|
76
|
DEBUG and print STDERR " Adding \"", $token->text, "\" to para-content.\n"; |
430
|
60
|
|
|
|
|
117
|
$para_text_content .= $token->text; |
431
|
|
|
|
|
|
|
# and keep looking |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
} elsif( $token->is_end and $content_containers{ $token->tagname } ) { |
434
|
36
|
|
|
|
|
44
|
DEBUG and print STDERR " Found end of Para. Considering content: ", |
435
|
|
|
|
|
|
|
$para_text_content, "\n"; |
436
|
|
|
|
|
|
|
|
437
|
36
|
50
|
33
|
|
|
231
|
if( $para_text_content =~ m/\S/ |
|
|
50
|
|
|
|
|
|
438
|
|
|
|
|
|
|
and ($max_content_length |
439
|
|
|
|
|
|
|
? (length($para_text_content) <= $max_content_length) |
440
|
|
|
|
|
|
|
: 1) |
441
|
|
|
|
|
|
|
) { |
442
|
|
|
|
|
|
|
# Some minimal sanity constraints, I think. |
443
|
36
|
|
|
|
|
47
|
DEBUG and print STDERR " It looks contentworthy, I guess. Using it.\n"; |
444
|
36
|
|
|
|
|
57
|
$title = $para_text_content; |
445
|
36
|
|
|
|
|
65
|
last; |
446
|
|
|
|
|
|
|
} else { |
447
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR " Doesn't look at all contentworthy!\n Giving up.\n"; |
448
|
0
|
|
|
|
|
0
|
undef $title; |
449
|
0
|
|
|
|
|
0
|
last; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
else { |
455
|
0
|
|
|
|
|
0
|
die "IMPOSSIBLE STATE $state!\n"; # should never happen |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# Put it all back! |
461
|
52
|
|
|
|
|
189
|
$self->unget_token(@to_unget); |
462
|
|
|
|
|
|
|
|
463
|
52
|
|
|
|
|
56
|
if(DEBUG) { |
464
|
|
|
|
|
|
|
if(defined $title) { print STDERR " Returning title <$title>\n" } |
465
|
|
|
|
|
|
|
else { print STDERR "Returning title <>\n" } |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
52
|
100
|
|
|
|
157
|
return '' unless defined $title; |
469
|
42
|
|
|
|
|
112
|
$title =~ s/^\s+//; |
470
|
42
|
|
|
|
|
212
|
return $title; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
474
|
|
|
|
|
|
|
# |
475
|
|
|
|
|
|
|
# Methods that actually do work at parse-time: |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
sub _handle_element_start { |
478
|
346
|
|
|
346
|
|
494
|
my $self = shift; # leaving ($element_name, $attr_hash_r) |
479
|
346
|
|
|
|
|
404
|
DEBUG > 2 and print STDERR "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n"; |
480
|
|
|
|
|
|
|
|
481
|
346
|
|
|
|
|
1401
|
push @{ $self->{'token_buffer'} }, |
482
|
346
|
|
|
|
|
448
|
$self->{'start_token_class'}->new(@_); |
483
|
346
|
|
|
|
|
683
|
return; |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub _handle_text { |
487
|
243
|
|
|
243
|
|
337
|
my $self = shift; # leaving ($text) |
488
|
243
|
|
|
|
|
285
|
DEBUG > 2 and print STDERR "== $_[0]\n"; |
489
|
243
|
|
|
|
|
913
|
push @{ $self->{'token_buffer'} }, |
490
|
243
|
|
|
|
|
279
|
$self->{'text_token_class'}->new(@_); |
491
|
243
|
|
|
|
|
602
|
return; |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
sub _handle_element_end { |
495
|
332
|
|
|
332
|
|
418
|
my $self = shift; # leaving ($element_name); |
496
|
332
|
|
|
|
|
360
|
DEBUG > 2 and print STDERR "-- $_[0]\n"; |
497
|
332
|
|
|
|
|
1025
|
push @{ $self->{'token_buffer'} }, |
498
|
332
|
|
|
|
|
403
|
$self->{'end_token_class'}->new(@_); |
499
|
332
|
|
|
|
|
618
|
return; |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
1; |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
__END__ |