line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Pod::Simple::PullParser; |
2
|
10
|
|
|
10
|
|
139158
|
use strict; |
|
10
|
|
|
|
|
35
|
|
|
10
|
|
|
|
|
396
|
|
3
|
|
|
|
|
|
|
our $VERSION = '3.45'; |
4
|
10
|
|
|
10
|
|
4770
|
use Pod::Simple (); |
|
10
|
|
|
|
|
24
|
|
|
10
|
|
|
|
|
375
|
|
5
|
10
|
|
|
10
|
|
359
|
BEGIN {our @ISA = ('Pod::Simple')} |
6
|
|
|
|
|
|
|
|
7
|
10
|
|
|
10
|
|
60
|
use Carp (); |
|
10
|
|
|
|
|
21
|
|
|
10
|
|
|
|
|
187
|
|
8
|
|
|
|
|
|
|
|
9
|
10
|
|
|
10
|
|
4520
|
use Pod::Simple::PullParserStartToken; |
|
10
|
|
|
|
|
26
|
|
|
10
|
|
|
|
|
303
|
|
10
|
10
|
|
|
10
|
|
4139
|
use Pod::Simple::PullParserEndToken; |
|
10
|
|
|
|
|
23
|
|
|
10
|
|
|
|
|
292
|
|
11
|
10
|
|
|
10
|
|
4205
|
use Pod::Simple::PullParserTextToken; |
|
10
|
|
|
|
|
24
|
|
|
10
|
|
|
|
|
414
|
|
12
|
|
|
|
|
|
|
|
13
|
10
|
50
|
|
10
|
|
2371
|
BEGIN { *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG } |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
__PACKAGE__->_accessorize( |
16
|
|
|
|
|
|
|
'source_fh', # the filehandle we're reading from |
17
|
|
|
|
|
|
|
'source_scalar_ref', # the scalarref we're reading from |
18
|
|
|
|
|
|
|
'source_arrayref', # the arrayref we're reading from |
19
|
|
|
|
|
|
|
); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
22
|
|
|
|
|
|
|
# |
23
|
|
|
|
|
|
|
# And here is how we implement a pull-parser on top of a push-parser... |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub filter { |
26
|
0
|
|
|
0
|
1
|
0
|
my($self, $source) = @_; |
27
|
0
|
0
|
|
|
|
0
|
$self = $self->new unless ref $self; |
28
|
|
|
|
|
|
|
|
29
|
0
|
0
|
|
|
|
0
|
$source = *STDIN{IO} unless defined $source; |
30
|
0
|
|
|
|
|
0
|
$self->set_source($source); |
31
|
0
|
|
|
|
|
0
|
$self->output_fh(*STDOUT{IO}); |
32
|
|
|
|
|
|
|
|
33
|
0
|
|
|
|
|
0
|
$self->run; # define run() in a subclass if you want to use filter()! |
34
|
0
|
|
|
|
|
0
|
return $self; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub parse_string_document { |
40
|
49
|
|
|
49
|
1
|
89
|
my $this = shift; |
41
|
49
|
|
|
|
|
150
|
$this->set_source(\ $_[0]); |
42
|
49
|
|
|
|
|
149
|
$this->run; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub parse_file { |
46
|
13
|
|
|
13
|
1
|
40
|
my($this, $filename) = @_; |
47
|
13
|
|
|
|
|
51
|
$this->set_source($filename); |
48
|
13
|
|
|
|
|
50
|
$this->run; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
52
|
|
|
|
|
|
|
# In case anyone tries to use them: |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub run { |
55
|
10
|
|
|
10
|
|
70
|
use Carp (); |
|
10
|
|
|
|
|
20
|
|
|
10
|
|
|
|
|
1158
|
|
56
|
0
|
0
|
0
|
0
|
0
|
0
|
if( __PACKAGE__ eq (ref($_[0]) || $_[0])) { # I'm not being subclassed! |
57
|
0
|
|
|
|
|
0
|
Carp::croak "You can call run() only on subclasses of " |
58
|
|
|
|
|
|
|
. __PACKAGE__; |
59
|
|
|
|
|
|
|
} else { |
60
|
0
|
|
0
|
|
|
0
|
Carp::croak join '', |
61
|
|
|
|
|
|
|
"You can't call run() because ", |
62
|
|
|
|
|
|
|
ref($_[0]) || $_[0], " didn't define a run() method"; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub parse_lines { |
67
|
10
|
|
|
10
|
|
99
|
use Carp (); |
|
10
|
|
|
|
|
19
|
|
|
10
|
|
|
|
|
494
|
|
68
|
0
|
|
|
0
|
1
|
0
|
Carp::croak "Use set_source with ", __PACKAGE__, |
69
|
|
|
|
|
|
|
" and subclasses, not parse_lines"; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub parse_line { |
73
|
10
|
|
|
10
|
|
57
|
use Carp (); |
|
10
|
|
|
|
|
17
|
|
|
10
|
|
|
|
|
29903
|
|
74
|
0
|
|
|
0
|
0
|
0
|
Carp::croak "Use set_source with ", __PACKAGE__, |
75
|
|
|
|
|
|
|
" and subclasses, not parse_line"; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
sub new { |
81
|
89
|
|
|
89
|
1
|
12999
|
my $class = shift; |
82
|
89
|
|
|
|
|
343
|
my $self = $class->SUPER::new(@_); |
83
|
89
|
50
|
|
|
|
238
|
die "Couldn't construct for $class" unless $self; |
84
|
|
|
|
|
|
|
|
85
|
89
|
|
50
|
|
|
417
|
$self->{'token_buffer'} ||= []; |
86
|
89
|
|
50
|
|
|
380
|
$self->{'start_token_class'} ||= 'Pod::Simple::PullParserStartToken'; |
87
|
89
|
|
50
|
|
|
343
|
$self->{'text_token_class'} ||= 'Pod::Simple::PullParserTextToken'; |
88
|
89
|
|
50
|
|
|
411
|
$self->{'end_token_class'} ||= 'Pod::Simple::PullParserEndToken'; |
89
|
|
|
|
|
|
|
|
90
|
89
|
|
|
|
|
113
|
DEBUG > 1 and print STDERR "New pullparser object: $self\n"; |
91
|
|
|
|
|
|
|
|
92
|
89
|
|
|
|
|
220
|
return $self; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub get_token { |
98
|
1398
|
|
|
1398
|
1
|
3473
|
my $self = shift; |
99
|
1398
|
|
|
|
|
1729
|
DEBUG > 1 and print STDERR "\nget_token starting up on $self.\n"; |
100
|
|
|
|
|
|
|
DEBUG > 2 and print STDERR " Items in token-buffer (", |
101
|
|
|
|
|
|
|
scalar( @{ $self->{'token_buffer'} } ) , |
102
|
|
|
|
|
|
|
") :\n", map( |
103
|
|
|
|
|
|
|
" " . $_->dump . "\n", @{ $self->{'token_buffer'} } |
104
|
|
|
|
|
|
|
), |
105
|
1398
|
|
|
|
|
1655
|
@{ $self->{'token_buffer'} } ? '' : ' (no tokens)', |
106
|
|
|
|
|
|
|
"\n" |
107
|
|
|
|
|
|
|
; |
108
|
|
|
|
|
|
|
|
109
|
1398
|
|
|
|
|
1800
|
until( @{ $self->{'token_buffer'} } ) { |
|
1985
|
|
|
|
|
4255
|
|
110
|
587
|
|
|
|
|
788
|
DEBUG > 3 and print STDERR "I need to get something into my empty token buffer...\n"; |
111
|
587
|
100
|
|
|
|
1721
|
if($self->{'source_dead'}) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
112
|
79
|
|
|
|
|
122
|
DEBUG and print STDERR "$self 's source is dead.\n"; |
113
|
79
|
|
|
|
|
156
|
push @{ $self->{'token_buffer'} }, undef; |
|
79
|
|
|
|
|
185
|
|
114
|
|
|
|
|
|
|
} elsif(exists $self->{'source_fh'}) { |
115
|
23
|
|
|
|
|
43
|
my @lines; |
116
|
23
|
|
33
|
|
|
127
|
my $fh = $self->{'source_fh'} |
117
|
|
|
|
|
|
|
|| Carp::croak('You have to call set_source before you can call get_token'); |
118
|
|
|
|
|
|
|
|
119
|
23
|
|
|
|
|
32
|
DEBUG and print STDERR "$self 's source is filehandle $fh.\n"; |
120
|
|
|
|
|
|
|
# Read those many lines at a time |
121
|
23
|
|
|
|
|
72
|
for(my $i = Pod::Simple::MANY_LINES; $i--;) { |
122
|
298
|
|
|
|
|
410
|
DEBUG > 3 and print STDERR " Fetching a line from source filehandle $fh...\n"; |
123
|
298
|
|
|
|
|
711
|
local $/ = $Pod::Simple::NL; |
124
|
298
|
|
|
|
|
1383
|
push @lines, scalar(<$fh>); # readline |
125
|
298
|
|
|
|
|
542
|
DEBUG > 3 and print STDERR " Line is: ", |
126
|
|
|
|
|
|
|
defined($lines[-1]) ? $lines[-1] : "\n"; |
127
|
298
|
100
|
|
|
|
875
|
unless( defined $lines[-1] ) { |
128
|
17
|
|
|
|
|
35
|
DEBUG and print STDERR "That's it for that source fh! Killing.\n"; |
129
|
17
|
|
|
|
|
46
|
delete $self->{'source_fh'}; # so it can be GC'd |
130
|
17
|
|
|
|
|
64
|
last; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
# but pass thru the undef, which will set source_dead to true |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# TODO: look to see if $lines[-1] is =encoding, and if so, |
135
|
|
|
|
|
|
|
# do horribly magic things |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
23
|
|
|
|
|
40
|
if(DEBUG > 8) { |
140
|
|
|
|
|
|
|
print STDERR "* I've gotten ", scalar(@lines), " lines:\n"; |
141
|
|
|
|
|
|
|
foreach my $l (@lines) { |
142
|
|
|
|
|
|
|
if(defined $l) { |
143
|
|
|
|
|
|
|
print STDERR " line {$l}\n"; |
144
|
|
|
|
|
|
|
} else { |
145
|
|
|
|
|
|
|
print STDERR " line undef\n"; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
print STDERR "* end of ", scalar(@lines), " lines\n"; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
23
|
|
|
|
|
127
|
$self->SUPER::parse_lines(@lines); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
} elsif(exists $self->{'source_arrayref'}) { |
154
|
|
|
|
|
|
|
DEBUG and print STDERR "$self 's source is arrayref $self->{'source_arrayref'}, with ", |
155
|
2
|
|
|
|
|
3
|
scalar(@{$self->{'source_arrayref'}}), " items left in it.\n"; |
156
|
|
|
|
|
|
|
|
157
|
2
|
|
|
|
|
3
|
DEBUG > 3 and print STDERR " Fetching ", Pod::Simple::MANY_LINES, " lines.\n"; |
158
|
|
|
|
|
|
|
$self->SUPER::parse_lines( |
159
|
2
|
|
|
|
|
5
|
splice @{ $self->{'source_arrayref'} }, |
|
2
|
|
|
|
|
12
|
|
160
|
|
|
|
|
|
|
0, |
161
|
|
|
|
|
|
|
Pod::Simple::MANY_LINES |
162
|
|
|
|
|
|
|
); |
163
|
2
|
50
|
|
|
|
14
|
unless( @{ $self->{'source_arrayref'} } ) { |
|
2
|
|
|
|
|
7
|
|
164
|
2
|
|
|
|
|
3
|
DEBUG and print STDERR "That's it for that source arrayref! Killing.\n"; |
165
|
2
|
|
|
|
|
6
|
$self->SUPER::parse_lines(undef); |
166
|
2
|
|
|
|
|
5
|
delete $self->{'source_arrayref'}; # so it can be GC'd |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
# to make sure that an undef is always sent to signal end-of-stream |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
} elsif(exists $self->{'source_scalar_ref'}) { |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
DEBUG and print STDERR "$self 's source is scalarref $self->{'source_scalar_ref'}, with ", |
173
|
|
|
|
|
|
|
length(${ $self->{'source_scalar_ref'} }) - |
174
|
483
|
|
|
|
|
604
|
(pos(${ $self->{'source_scalar_ref'} }) || 0), |
175
|
|
|
|
|
|
|
" characters left to parse.\n"; |
176
|
|
|
|
|
|
|
|
177
|
483
|
|
|
|
|
584
|
DEBUG > 3 and print STDERR " Fetching a line from source-string...\n"; |
178
|
483
|
100
|
|
|
|
609
|
if( ${ $self->{'source_scalar_ref'} } =~ |
|
483
|
|
|
|
|
2159
|
|
179
|
|
|
|
|
|
|
m/([^\n\r]*)((?:\r?\n)?)/g |
180
|
|
|
|
|
|
|
) { |
181
|
|
|
|
|
|
|
#print(">> $1\n"), |
182
|
|
|
|
|
|
|
$self->SUPER::parse_lines($1) |
183
|
|
|
|
|
|
|
if length($1) or length($2) |
184
|
56
|
|
|
|
|
123
|
or pos( ${ $self->{'source_scalar_ref'} }) |
185
|
427
|
100
|
100
|
|
|
2191
|
!= length( ${ $self->{'source_scalar_ref'} }); |
|
56
|
|
66
|
|
|
188
|
|
186
|
|
|
|
|
|
|
# I.e., unless it's a zero-length "empty line" at the very |
187
|
|
|
|
|
|
|
# end of "foo\nbar\n" (i.e., between the \n and the EOS). |
188
|
|
|
|
|
|
|
} else { # that's the end. Byebye |
189
|
56
|
|
|
|
|
207
|
$self->SUPER::parse_lines(undef); |
190
|
56
|
|
|
|
|
127
|
delete $self->{'source_scalar_ref'}; |
191
|
56
|
|
|
|
|
114
|
DEBUG and print STDERR "That's it for that source scalarref! Killing.\n"; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
} else { |
196
|
0
|
|
|
|
|
0
|
die "What source??"; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
DEBUG and print STDERR "get_token about to return ", |
200
|
|
|
|
|
|
|
Pod::Simple::pretty( @{$self->{'token_buffer'}} |
201
|
1398
|
|
|
|
|
1871
|
? $self->{'token_buffer'}[-1] : undef |
202
|
|
|
|
|
|
|
), "\n"; |
203
|
1398
|
|
|
|
|
1779
|
return shift @{$self->{'token_buffer'}}; # that's an undef if empty |
|
1398
|
|
|
|
|
4453
|
|
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub unget_token { |
207
|
96
|
|
|
96
|
1
|
165
|
my $self = shift; |
208
|
96
|
|
|
|
|
122
|
DEBUG and print STDERR "Ungetting ", scalar(@_), " tokens: ", |
209
|
|
|
|
|
|
|
@_ ? "@_\n" : "().\n"; |
210
|
96
|
|
|
|
|
173
|
foreach my $t (@_) { |
211
|
656
|
50
|
|
|
|
1127
|
Carp::croak "Can't unget that, because it's not a token -- it's undef!" |
212
|
|
|
|
|
|
|
unless defined $t; |
213
|
656
|
50
|
|
|
|
1144
|
Carp::croak "Can't unget $t, because it's not a token -- it's a string!" |
214
|
|
|
|
|
|
|
unless ref $t; |
215
|
656
|
50
|
|
|
|
1607
|
Carp::croak "Can't unget $t, because it's not a token object!" |
216
|
|
|
|
|
|
|
unless UNIVERSAL::can($t, 'type'); |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
96
|
|
|
|
|
192
|
unshift @{$self->{'token_buffer'}}, @_; |
|
96
|
|
|
|
|
288
|
|
220
|
|
|
|
|
|
|
DEBUG > 1 and print STDERR "Token buffer now has ", |
221
|
96
|
|
|
|
|
137
|
scalar(@{$self->{'token_buffer'}}), " items in it.\n"; |
222
|
96
|
|
|
|
|
203
|
return; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# $self->{'source_filename'} = $source; |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub set_source { |
230
|
90
|
|
|
90
|
1
|
1003015
|
my $self = shift @_; |
231
|
90
|
50
|
|
|
|
241
|
return $self->{'source_fh'} unless @_; |
232
|
|
|
|
|
|
|
Carp::croak("Cannot assign new source to pull parser; create a new instance, instead") |
233
|
90
|
100
|
66
|
|
|
694
|
if $self->{'source_fh'} || $self->{'source_scalar_ref'} || $self->{'source_arrayref'}; |
|
|
|
66
|
|
|
|
|
234
|
89
|
|
|
|
|
140
|
my $handle; |
235
|
89
|
50
|
|
|
|
479
|
if(!defined $_[0]) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
236
|
0
|
|
|
|
|
0
|
Carp::croak("Can't use empty-string as a source for set_source"); |
237
|
|
|
|
|
|
|
} elsif(ref(\( $_[0] )) eq 'GLOB') { |
238
|
1
|
|
|
|
|
7
|
$self->{'source_filename'} = '' . ($handle = $_[0]); |
239
|
1
|
|
|
|
|
3
|
DEBUG and print STDERR "$self 's source is glob $_[0]\n"; |
240
|
|
|
|
|
|
|
# and fall thru |
241
|
|
|
|
|
|
|
} elsif(ref( $_[0] ) eq 'SCALAR') { |
242
|
70
|
|
|
|
|
130
|
$self->{'source_scalar_ref'} = $_[0]; |
243
|
70
|
|
|
|
|
103
|
DEBUG and print STDERR "$self 's source is scalar ref $_[0]\n"; |
244
|
70
|
|
|
|
|
146
|
return; |
245
|
|
|
|
|
|
|
} elsif(ref( $_[0] ) eq 'ARRAY') { |
246
|
2
|
|
|
|
|
4
|
$self->{'source_arrayref'} = $_[0]; |
247
|
2
|
|
|
|
|
4
|
DEBUG and print STDERR "$self 's source is array ref $_[0]\n"; |
248
|
2
|
|
|
|
|
4
|
return; |
249
|
|
|
|
|
|
|
} elsif(ref $_[0]) { |
250
|
2
|
|
|
|
|
11
|
$self->{'source_filename'} = '' . ($handle = $_[0]); |
251
|
2
|
|
|
|
|
7
|
DEBUG and print STDERR "$self 's source is fh-obj $_[0]\n"; |
252
|
|
|
|
|
|
|
} elsif(!length $_[0]) { |
253
|
0
|
|
|
|
|
0
|
Carp::croak("Can't use empty-string as a source for set_source"); |
254
|
|
|
|
|
|
|
} else { # It's a filename! |
255
|
14
|
|
|
|
|
26
|
DEBUG and print STDERR "$self 's source is filename $_[0]\n"; |
256
|
|
|
|
|
|
|
{ |
257
|
14
|
|
|
|
|
27
|
local *PODSOURCE; |
|
14
|
|
|
|
|
49
|
|
258
|
14
|
50
|
|
|
|
722
|
open(PODSOURCE, "<$_[0]") || Carp::croak "Can't open $_[0]: $!"; |
259
|
14
|
|
|
|
|
90
|
$handle = *PODSOURCE{IO}; |
260
|
|
|
|
|
|
|
} |
261
|
14
|
|
|
|
|
46
|
$self->{'source_filename'} = $_[0]; |
262
|
14
|
|
|
|
|
31
|
DEBUG and print STDERR " Its name is $_[0].\n"; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# TODO: file-discipline things here! |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
17
|
|
|
|
|
37
|
$self->{'source_fh'} = $handle; |
268
|
17
|
|
|
|
|
28
|
DEBUG and print STDERR " Its handle is $handle\n"; |
269
|
17
|
|
|
|
|
37
|
return 1; |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
273
|
|
|
|
|
|
|
|
274
|
0
|
|
|
0
|
0
|
0
|
sub get_title_short { shift->get_short_title(@_) } # alias |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
sub get_short_title { |
277
|
21
|
|
|
21
|
1
|
68
|
my $title = shift->get_title(@_); |
278
|
21
|
100
|
|
|
|
123
|
$title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s; |
279
|
|
|
|
|
|
|
# turn "Foo::Bar -- bars for your foo" into "Foo::Bar" |
280
|
21
|
|
|
|
|
62
|
return $title; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub get_title { shift->_get_titled_section( |
284
|
39
|
|
|
39
|
1
|
732
|
'NAME', max_token => 50, desperate => 1, @_) |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
sub get_version { shift->_get_titled_section( |
287
|
3
|
|
|
3
|
1
|
22
|
'VERSION', |
288
|
|
|
|
|
|
|
max_token => 400, |
289
|
|
|
|
|
|
|
accept_verbatim => 1, |
290
|
|
|
|
|
|
|
max_content_length => 3_000, |
291
|
|
|
|
|
|
|
@_, |
292
|
|
|
|
|
|
|
); |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
sub get_description { shift->_get_titled_section( |
295
|
7
|
|
|
7
|
1
|
617
|
'DESCRIPTION', |
296
|
|
|
|
|
|
|
max_token => 400, |
297
|
|
|
|
|
|
|
max_content_length => 3_000, |
298
|
|
|
|
|
|
|
@_, |
299
|
|
|
|
|
|
|
) } |
300
|
|
|
|
|
|
|
|
301
|
0
|
|
|
0
|
0
|
0
|
sub get_authors { shift->get_author(@_) } # a harmless alias |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub get_author { |
304
|
2
|
|
|
2
|
1
|
647
|
my $this = shift; |
305
|
|
|
|
|
|
|
# Max_token is so high because these are |
306
|
|
|
|
|
|
|
# typically at the end of the document: |
307
|
2
|
100
|
|
|
|
9
|
$this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) || |
308
|
|
|
|
|
|
|
$this->_get_titled_section('AUTHORS', max_token => 10_000, @_); |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
sub _get_titled_section { |
314
|
|
|
|
|
|
|
# Based on a get_title originally contributed by Graham Barr |
315
|
52
|
|
|
52
|
|
180
|
my($self, $titlename, %options) = (@_); |
316
|
|
|
|
|
|
|
|
317
|
52
|
|
|
|
|
111
|
my $max_token = delete $options{'max_token'}; |
318
|
52
|
|
|
|
|
90
|
my $desperate_for_title = delete $options{'desperate'}; |
319
|
52
|
|
|
|
|
85
|
my $accept_verbatim = delete $options{'accept_verbatim'}; |
320
|
52
|
|
|
|
|
92
|
my $max_content_length = delete $options{'max_content_length'}; |
321
|
52
|
|
|
|
|
86
|
my $nocase = delete $options{'nocase'}; |
322
|
52
|
100
|
|
|
|
152
|
$max_content_length = 120 unless defined $max_content_length; |
323
|
|
|
|
|
|
|
|
324
|
52
|
0
|
|
|
|
129
|
Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ") |
|
|
50
|
|
|
|
|
|
325
|
|
|
|
|
|
|
. join " ", map "[$_]", sort keys %options |
326
|
|
|
|
|
|
|
) |
327
|
|
|
|
|
|
|
if keys %options; |
328
|
|
|
|
|
|
|
|
329
|
52
|
|
|
|
|
71
|
my %content_containers; |
330
|
52
|
|
|
|
|
135
|
$content_containers{'Para'} = 1; |
331
|
52
|
100
|
|
|
|
129
|
if($accept_verbatim) { |
332
|
3
|
|
|
|
|
12
|
$content_containers{'Verbatim'} = 1; |
333
|
3
|
|
|
|
|
9
|
$content_containers{'VerbatimFormatted'} = 1; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
52
|
|
|
|
|
76
|
my $token_count = 0; |
337
|
52
|
|
|
|
|
93
|
my $title; |
338
|
|
|
|
|
|
|
my @to_unget; |
339
|
52
|
|
|
|
|
74
|
my $state = 0; |
340
|
52
|
|
|
|
|
77
|
my $depth = 0; |
341
|
|
|
|
|
|
|
|
342
|
52
|
50
|
33
|
|
|
402
|
Carp::croak "What kind of titlename is \"$titlename\"?!" unless |
343
|
|
|
|
|
|
|
defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity |
344
|
52
|
|
|
|
|
129
|
my $titlename_re = quotemeta($titlename); |
345
|
|
|
|
|
|
|
|
346
|
52
|
|
|
|
|
132
|
my $head1_text_content; |
347
|
|
|
|
|
|
|
my $para_text_content; |
348
|
52
|
|
|
|
|
0
|
my $skipX; |
349
|
|
|
|
|
|
|
|
350
|
52
|
|
50
|
|
|
252
|
while( |
|
|
|
66
|
|
|
|
|
351
|
|
|
|
|
|
|
++$token_count <= ($max_token || 1_000_000) |
352
|
|
|
|
|
|
|
and defined(my $token = $self->get_token) |
353
|
|
|
|
|
|
|
) { |
354
|
563
|
|
|
|
|
861
|
push @to_unget, $token; |
355
|
|
|
|
|
|
|
|
356
|
563
|
100
|
|
|
|
1091
|
if ($state == 0) { # seeking =head1 |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
357
|
276
|
100
|
100
|
|
|
691
|
if( $token->is_start and $token->tagname eq 'head1' ) { |
358
|
62
|
|
|
|
|
94
|
DEBUG and print STDERR " Found head1. Seeking content...\n"; |
359
|
62
|
|
|
|
|
80
|
++$state; |
360
|
62
|
|
|
|
|
217
|
$head1_text_content = ''; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
elsif($state == 1) { # accumulating text until end of head1 |
365
|
131
|
100
|
100
|
|
|
380
|
if( $token->is_text ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
366
|
65
|
100
|
|
|
|
136
|
unless ($skipX) { |
367
|
64
|
|
|
|
|
87
|
DEBUG and print STDERR " Adding \"", $token->text, "\" to head1-content.\n"; |
368
|
64
|
|
|
|
|
190
|
$head1_text_content .= $token->text; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
} elsif( $token->is_tagname('X') ) { |
371
|
|
|
|
|
|
|
# We're going to want to ignore X<> stuff. |
372
|
2
|
|
|
|
|
6
|
$skipX = $token->is_start; |
373
|
2
|
|
|
|
|
7
|
DEBUG and print STDERR +($skipX ? 'Start' : 'End'), 'ing ignoring of X<> tag'; |
374
|
|
|
|
|
|
|
} elsif( $token->is_end and $token->tagname eq 'head1' ) { |
375
|
62
|
|
|
|
|
87
|
DEBUG and print STDERR " Found end of head1. Considering content...\n"; |
376
|
62
|
100
|
|
|
|
147
|
$head1_text_content = uc $head1_text_content if $nocase; |
377
|
62
|
50
|
100
|
|
|
778
|
if($head1_text_content eq $titlename |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
378
|
|
|
|
|
|
|
or $head1_text_content =~ m/\($titlename_re\)/s |
379
|
|
|
|
|
|
|
# We accept "=head1 Nomen Modularis (NAME)" for sake of i18n |
380
|
|
|
|
|
|
|
) { |
381
|
36
|
|
|
|
|
59
|
DEBUG and print STDERR " Yup, it was $titlename. Seeking next para-content...\n"; |
382
|
36
|
|
|
|
|
130
|
++$state; |
383
|
|
|
|
|
|
|
} elsif( |
384
|
|
|
|
|
|
|
$desperate_for_title |
385
|
|
|
|
|
|
|
# if we're so desperate we'll take the first |
386
|
|
|
|
|
|
|
# =head1's content as a title |
387
|
|
|
|
|
|
|
and $head1_text_content =~ m/\S/ |
388
|
|
|
|
|
|
|
and $head1_text_content !~ m/^[ A-Z]+$/s |
389
|
|
|
|
|
|
|
and $head1_text_content !~ |
390
|
|
|
|
|
|
|
m/\((?: |
391
|
|
|
|
|
|
|
NAME | TITLE | VERSION | AUTHORS? | DESCRIPTION | SYNOPSIS |
392
|
|
|
|
|
|
|
| COPYRIGHT | LICENSE | NOTES? | FUNCTIONS? | METHODS? |
393
|
|
|
|
|
|
|
| CAVEATS? | BUGS? | SEE\ ALSO | SWITCHES | ENVIRONMENT |
394
|
|
|
|
|
|
|
)\)/sx |
395
|
|
|
|
|
|
|
# avoid accepting things like =head1 Thingy Thongy (DESCRIPTION) |
396
|
|
|
|
|
|
|
and ($max_content_length |
397
|
|
|
|
|
|
|
? (length($head1_text_content) <= $max_content_length) # sanity |
398
|
|
|
|
|
|
|
: 1) |
399
|
|
|
|
|
|
|
) { |
400
|
|
|
|
|
|
|
# Looks good; trim it |
401
|
6
|
|
|
|
|
27
|
($title = $head1_text_content) =~ s/\s+$//; |
402
|
6
|
|
|
|
|
10
|
DEBUG and print STDERR " It looks titular: \"$title\".\n\n Using that.\n"; |
403
|
6
|
|
|
|
|
18
|
last; |
404
|
|
|
|
|
|
|
} else { |
405
|
20
|
|
|
|
|
35
|
--$state; |
406
|
20
|
|
|
|
|
79
|
DEBUG and print STDERR " Didn't look titular ($head1_text_content).\n", |
407
|
|
|
|
|
|
|
"\n Dropping back to seeking-head1-content mode...\n"; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
elsif($state == 2) { |
413
|
|
|
|
|
|
|
# seeking start of para (which must immediately follow) |
414
|
36
|
50
|
33
|
|
|
102
|
if($token->is_start and $content_containers{ $token->tagname }) { |
415
|
36
|
|
|
|
|
54
|
DEBUG and print STDERR " Found start of Para. Accumulating content...\n"; |
416
|
36
|
|
|
|
|
66
|
$para_text_content = ''; |
417
|
36
|
|
|
|
|
134
|
++$state; |
418
|
|
|
|
|
|
|
} else { |
419
|
0
|
|
|
|
|
0
|
DEBUG and print |
420
|
|
|
|
|
|
|
" Didn't see an immediately subsequent start-Para. Reseeking H1\n"; |
421
|
0
|
|
|
|
|
0
|
$state = 0; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
elsif($state == 3) { |
426
|
|
|
|
|
|
|
# accumulating text until end of Para |
427
|
120
|
100
|
100
|
|
|
251
|
if( $token->is_text ) { |
|
|
100
|
|
|
|
|
|
428
|
60
|
|
|
|
|
74
|
DEBUG and print STDERR " Adding \"", $token->text, "\" to para-content.\n"; |
429
|
60
|
|
|
|
|
147
|
$para_text_content .= $token->text; |
430
|
|
|
|
|
|
|
# and keep looking |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
} elsif( $token->is_end and $content_containers{ $token->tagname } ) { |
433
|
36
|
|
|
|
|
56
|
DEBUG and print STDERR " Found end of Para. Considering content: ", |
434
|
|
|
|
|
|
|
$para_text_content, "\n"; |
435
|
|
|
|
|
|
|
|
436
|
36
|
50
|
33
|
|
|
256
|
if( $para_text_content =~ m/\S/ |
|
|
50
|
|
|
|
|
|
437
|
|
|
|
|
|
|
and ($max_content_length |
438
|
|
|
|
|
|
|
? (length($para_text_content) <= $max_content_length) |
439
|
|
|
|
|
|
|
: 1) |
440
|
|
|
|
|
|
|
) { |
441
|
|
|
|
|
|
|
# Some minimal sanity constraints, I think. |
442
|
36
|
|
|
|
|
51
|
DEBUG and print STDERR " It looks contentworthy, I guess. Using it.\n"; |
443
|
36
|
|
|
|
|
64
|
$title = $para_text_content; |
444
|
36
|
|
|
|
|
77
|
last; |
445
|
|
|
|
|
|
|
} else { |
446
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR " Doesn't look at all contentworthy!\n Giving up.\n"; |
447
|
0
|
|
|
|
|
0
|
undef $title; |
448
|
0
|
|
|
|
|
0
|
last; |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
else { |
454
|
0
|
|
|
|
|
0
|
die "IMPOSSIBLE STATE $state!\n"; # should never happen |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
# Put it all back! |
460
|
52
|
|
|
|
|
213
|
$self->unget_token(@to_unget); |
461
|
|
|
|
|
|
|
|
462
|
52
|
|
|
|
|
64
|
if(DEBUG) { |
463
|
|
|
|
|
|
|
if(defined $title) { print STDERR " Returning title <$title>\n" } |
464
|
|
|
|
|
|
|
else { print STDERR "Returning title <>\n" } |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
52
|
100
|
|
|
|
154
|
return '' unless defined $title; |
468
|
42
|
|
|
|
|
142
|
$title =~ s/^\s+//; |
469
|
42
|
|
|
|
|
330
|
return $title; |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
473
|
|
|
|
|
|
|
# |
474
|
|
|
|
|
|
|
# Methods that actually do work at parse-time: |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
sub _handle_element_start { |
477
|
346
|
|
|
346
|
|
558
|
my $self = shift; # leaving ($element_name, $attr_hash_r) |
478
|
346
|
|
|
|
|
455
|
DEBUG > 2 and print STDERR "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n"; |
479
|
|
|
|
|
|
|
|
480
|
346
|
|
|
|
|
1498
|
push @{ $self->{'token_buffer'} }, |
481
|
346
|
|
|
|
|
483
|
$self->{'start_token_class'}->new(@_); |
482
|
346
|
|
|
|
|
774
|
return; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
sub _handle_text { |
486
|
243
|
|
|
243
|
|
346
|
my $self = shift; # leaving ($text) |
487
|
243
|
|
|
|
|
301
|
DEBUG > 2 and print STDERR "== $_[0]\n"; |
488
|
243
|
|
|
|
|
938
|
push @{ $self->{'token_buffer'} }, |
489
|
243
|
|
|
|
|
336
|
$self->{'text_token_class'}->new(@_); |
490
|
243
|
|
|
|
|
722
|
return; |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
sub _handle_element_end { |
494
|
332
|
|
|
332
|
|
485
|
my $self = shift; # leaving ($element_name); |
495
|
332
|
|
|
|
|
430
|
DEBUG > 2 and print STDERR "-- $_[0]\n"; |
496
|
332
|
|
|
|
|
1117
|
push @{ $self->{'token_buffer'} }, |
497
|
332
|
|
|
|
|
420
|
$self->{'end_token_class'}->new(@_); |
498
|
332
|
|
|
|
|
706
|
return; |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
1; |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
__END__ |