line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Pod::Simple::BlackBox; |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# "What's in the box?" "Pain." |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
########################################################################### |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# This is where all the scary things happen: parsing lines into |
8
|
|
|
|
|
|
|
# paragraphs; and then into directives, verbatims, and then also |
9
|
|
|
|
|
|
|
# turning formatting sequences into treelets. |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# Are you really sure you want to read this code? |
12
|
|
|
|
|
|
|
# |
13
|
|
|
|
|
|
|
#----------------------------------------------------------------------------- |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
# The basic work of this module Pod::Simple::BlackBox is doing the dirty work |
16
|
|
|
|
|
|
|
# of parsing Pod into treelets (generally one per non-verbatim paragraph), and |
17
|
|
|
|
|
|
|
# to call the proper callbacks on the treelets. |
18
|
|
|
|
|
|
|
# |
19
|
|
|
|
|
|
|
# Every node in a treelet is a ['name', {attrhash}, ...children...] |
20
|
|
|
|
|
|
|
|
21
|
68
|
|
|
68
|
|
2997
|
use integer; # vroom! |
|
68
|
|
|
|
|
278
|
|
|
68
|
|
|
|
|
429
|
|
22
|
68
|
|
|
68
|
|
2028
|
use strict; |
|
68
|
|
|
|
|
136
|
|
|
68
|
|
|
|
|
1243
|
|
23
|
68
|
|
|
68
|
|
300
|
use warnings; |
|
68
|
|
|
|
|
122
|
|
|
68
|
|
|
|
|
1444
|
|
24
|
68
|
|
|
68
|
|
331
|
use Carp (); |
|
68
|
|
|
|
|
138
|
|
|
68
|
|
|
|
|
12931
|
|
25
|
|
|
|
|
|
|
our $VERSION = '3.45'; |
26
|
|
|
|
|
|
|
#use constant DEBUG => 7; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub my_qr ($$) { |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# $1 is a pattern to compile and return. Older perls compile any |
31
|
|
|
|
|
|
|
# syntactically valid property, even if it isn't legal. To cope with |
32
|
|
|
|
|
|
|
# this, return an empty string unless the compiled pattern also |
33
|
|
|
|
|
|
|
# successfully matches $2, which the caller furnishes. |
34
|
|
|
|
|
|
|
|
35
|
624
|
|
|
624
|
0
|
1733
|
my ($input_re, $should_match) = @_; |
36
|
|
|
|
|
|
|
# XXX could have a third parameter $shouldnt_match for extra safety |
37
|
|
|
|
|
|
|
|
38
|
624
|
50
|
|
|
|
4013
|
my $use_utf8 = ($] le 5.006002) ? 'use utf8;' : ""; |
39
|
|
|
|
|
|
|
|
40
|
68
|
|
|
68
|
|
569
|
my $re = eval "no warnings; $use_utf8 qr/$input_re/"; |
|
68
|
|
|
68
|
|
158
|
|
|
68
|
|
|
68
|
|
3001
|
|
|
68
|
|
|
68
|
|
447
|
|
|
68
|
|
|
68
|
|
149
|
|
|
68
|
|
|
68
|
|
4596
|
|
|
68
|
|
|
68
|
|
45390
|
|
|
68
|
|
|
68
|
|
2485
|
|
|
68
|
|
|
68
|
|
1186
|
|
|
68
|
|
|
68
|
|
1732
|
|
|
68
|
|
|
3
|
|
202
|
|
|
68
|
|
|
3
|
|
2400
|
|
|
68
|
|
|
3
|
|
418
|
|
|
68
|
|
|
3
|
|
140
|
|
|
68
|
|
|
|
|
2319
|
|
|
68
|
|
|
|
|
445
|
|
|
68
|
|
|
|
|
139
|
|
|
68
|
|
|
|
|
2413
|
|
|
68
|
|
|
|
|
417
|
|
|
68
|
|
|
|
|
138
|
|
|
68
|
|
|
|
|
2548
|
|
|
68
|
|
|
|
|
414
|
|
|
68
|
|
|
|
|
136
|
|
|
68
|
|
|
|
|
2315
|
|
|
68
|
|
|
|
|
401
|
|
|
68
|
|
|
|
|
134
|
|
|
68
|
|
|
|
|
2357
|
|
|
68
|
|
|
|
|
468
|
|
|
68
|
|
|
|
|
139
|
|
|
68
|
|
|
|
|
3826
|
|
|
3
|
|
|
|
|
40
|
|
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
147
|
|
|
3
|
|
|
|
|
17
|
|
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
126
|
|
|
3
|
|
|
|
|
17
|
|
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
106
|
|
|
3
|
|
|
|
|
15
|
|
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
100
|
|
|
624
|
|
|
|
|
36824
|
|
41
|
|
|
|
|
|
|
#print STDERR __LINE__, ": $input_re: $@\n" if $@; |
42
|
624
|
50
|
|
|
|
2444
|
return "" if $@; |
43
|
|
|
|
|
|
|
|
44
|
68
|
|
|
68
|
|
905
|
my $matches = eval "no warnings; $use_utf8 '$should_match' =~ /$re/"; |
|
68
|
|
|
68
|
|
151
|
|
|
68
|
|
|
68
|
|
2934
|
|
|
68
|
|
|
68
|
|
545
|
|
|
68
|
|
|
68
|
|
146
|
|
|
68
|
|
|
68
|
|
3363
|
|
|
68
|
|
|
68
|
|
464
|
|
|
68
|
|
|
68
|
|
157
|
|
|
68
|
|
|
68
|
|
2599
|
|
|
68
|
|
|
3
|
|
458
|
|
|
68
|
|
|
3
|
|
134
|
|
|
68
|
|
|
3
|
|
2313
|
|
|
68
|
|
|
3
|
|
435
|
|
|
68
|
|
|
|
|
133
|
|
|
68
|
|
|
|
|
2547
|
|
|
68
|
|
|
|
|
425
|
|
|
68
|
|
|
|
|
150
|
|
|
68
|
|
|
|
|
2383
|
|
|
68
|
|
|
|
|
422
|
|
|
68
|
|
|
|
|
129
|
|
|
68
|
|
|
|
|
2574
|
|
|
68
|
|
|
|
|
419
|
|
|
68
|
|
|
|
|
132
|
|
|
68
|
|
|
|
|
2399
|
|
|
68
|
|
|
|
|
414
|
|
|
68
|
|
|
|
|
138
|
|
|
68
|
|
|
|
|
2988
|
|
|
3
|
|
|
|
|
19
|
|
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
124
|
|
|
3
|
|
|
|
|
22
|
|
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
127
|
|
|
3
|
|
|
|
|
25
|
|
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
104
|
|
|
3
|
|
|
|
|
19
|
|
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
110
|
|
|
624
|
|
|
|
|
34876
|
|
45
|
|
|
|
|
|
|
#print STDERR __LINE__, ": $input_re: $@\n" if $@; |
46
|
624
|
50
|
|
|
|
2417
|
return "" if $@; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
#print STDERR __LINE__, ": SUCCESS: $re\n" if $matches; |
49
|
624
|
50
|
|
|
|
2180
|
return $re if $matches; |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
#print STDERR __LINE__, ": $re: didn't match\n"; |
52
|
0
|
|
|
|
|
0
|
return ""; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
BEGIN { |
56
|
68
|
|
|
68
|
|
1933
|
require Pod::Simple; |
57
|
68
|
50
|
|
|
|
5946
|
*DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# Matches a character iff the character will have a different meaning |
61
|
|
|
|
|
|
|
# if we choose CP1252 vs UTF-8 if there is no =encoding line. |
62
|
|
|
|
|
|
|
# This is broken for early Perls on non-ASCII platforms. |
63
|
|
|
|
|
|
|
my $non_ascii_re = my_qr('[[:^ascii:]]', "\xB6"); |
64
|
|
|
|
|
|
|
$non_ascii_re = qr/[\x80-\xFF]/ unless $non_ascii_re; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# Use patterns understandable by Perl 5.6, if possible |
67
|
68
|
|
|
68
|
|
508
|
my $cs_re = do { no warnings; my_qr('\p{IsCs}', "\x{D800}") }; |
|
68
|
|
|
|
|
190
|
|
|
68
|
|
|
|
|
68600
|
|
68
|
|
|
|
|
|
|
my $cn_re = my_qr('\p{IsCn}', "\x{09E4}"); # code point unlikely |
69
|
|
|
|
|
|
|
# to get assigned |
70
|
|
|
|
|
|
|
my $rare_blocks_re = my_qr('[\p{InIPAExtensions}\p{InSpacingModifierLetters}]', |
71
|
|
|
|
|
|
|
"\x{250}"); |
72
|
|
|
|
|
|
|
$rare_blocks_re = my_qr('[\x{0250}-\x{02FF}]', "\x{250}") unless $rare_blocks_re; |
73
|
|
|
|
|
|
|
|
74
|
68
|
|
|
68
|
|
428
|
my $script_run_re = eval 'no warnings "experimental::script_run"; |
|
68
|
|
|
|
|
129
|
|
|
68
|
|
|
|
|
16734
|
|
75
|
|
|
|
|
|
|
qr/(*script_run: ^ .* $ )/x'; |
76
|
|
|
|
|
|
|
my $latin_re = my_qr('[\p{IsLatin}\p{IsInherited}\p{IsCommon}]', "\x{100}"); |
77
|
|
|
|
|
|
|
unless ($latin_re) { |
78
|
|
|
|
|
|
|
# This was machine generated to be the ranges of the union of the above |
79
|
|
|
|
|
|
|
# three properties, with things that were undefined by Unicode 4.1 filling |
80
|
|
|
|
|
|
|
# gaps. That is the version in use when Perl advanced enough to |
81
|
|
|
|
|
|
|
# successfully compile and execute the above pattern. |
82
|
|
|
|
|
|
|
$latin_re = my_qr('[\x00-\x{02E9}\x{02EC}-\x{0374}\x{037E}\x{0385}\x{0387}\x{0485}\x{0486}\x{0589}\x{060C}\x{061B}\x{061F}\x{0640}\x{064B}-\x{0655}\x{0670}\x{06DD}\x{0951}-\x{0954}\x{0964}\x{0965}\x{0E3F}\x{10FB}\x{16EB}-\x{16ED}\x{1735}\x{1736}\x{1802}\x{1803}\x{1805}\x{1D00}-\x{1D25}\x{1D2C}-\x{1D5C}\x{1D62}-\x{1D65}\x{1D6B}-\x{1D77}\x{1D79}-\x{1DBE}\x{1DC0}-\x{1EF9}\x{2000}-\x{2125}\x{2127}-\x{27FF}\x{2900}-\x{2B13}\x{2E00}-\x{2E1D}\x{2FF0}-\x{3004}\x{3006}\x{3008}-\x{3020}\x{302A}-\x{302D}\x{3030}-\x{3037}\x{303C}-\x{303F}\x{3099}-\x{309C}\x{30A0}\x{30FB}\x{30FC}\x{3190}-\x{319F}\x{31C0}-\x{31CF}\x{3220}-\x{325F}\x{327F}-\x{32CF}\x{3358}-\x{33FF}\x{4DC0}-\x{4DFF}\x{A700}-\x{A716}\x{FB00}-\x{FB06}\x{FD3E}\x{FD3F}\x{FE00}-\x{FE6B}\x{FEFF}-\x{FF65}\x{FF70}\x{FF9E}\x{FF9F}\x{FFE0}-\x{FFFD}\x{10100}-\x{1013F}\x{1D000}-\x{1D1DD}\x{1D300}-\x{1D7FF}]', "\x{100}"); |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
my $every_char_is_latin_re = my_qr("^(?:$latin_re)*\\z", "A"); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# Latin script code points not in the first release of Unicode |
88
|
|
|
|
|
|
|
my $later_latin_re = my_qr('[^\P{IsLatin}\p{IsAge=1.1}]', "\x{1F6}"); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# If this perl doesn't have the Deprecated property, there's only one code |
91
|
|
|
|
|
|
|
# point in it that we need be concerned with. |
92
|
|
|
|
|
|
|
my $deprecated_re = my_qr('\p{IsDeprecated}', "\x{149}"); |
93
|
|
|
|
|
|
|
$deprecated_re = qr/\x{149}/ unless $deprecated_re; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
my $utf8_bom; |
96
|
|
|
|
|
|
|
if (($] ge 5.007_003)) { |
97
|
|
|
|
|
|
|
$utf8_bom = "\x{FEFF}"; |
98
|
|
|
|
|
|
|
utf8::encode($utf8_bom); |
99
|
|
|
|
|
|
|
} else { |
100
|
|
|
|
|
|
|
$utf8_bom = "\xEF\xBB\xBF"; # No EBCDIC BOM detection for early Perls. |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# This is used so that the 'content_seen' method doesn't return true on a |
104
|
|
|
|
|
|
|
# file that just happens to have a line that matches /^=[a-zA-z]/. Only if |
105
|
|
|
|
|
|
|
# there is a valid =foo line will we return that content was seen. |
106
|
|
|
|
|
|
|
my $seen_legal_directive = 0; |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
109
|
|
|
|
|
|
|
|
110
|
0
|
|
|
0
|
0
|
0
|
sub parse_line { shift->parse_lines(@_) } # alias |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# - - - Turn back now! Run away! - - - |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub parse_lines { # Usage: $parser->parse_lines(@lines) |
115
|
|
|
|
|
|
|
# an undef means end-of-stream |
116
|
9477
|
|
|
9477
|
0
|
14576
|
my $self = shift; |
117
|
|
|
|
|
|
|
|
118
|
9477
|
|
|
|
|
14416
|
my $code_handler = $self->{'code_handler'}; |
119
|
9477
|
|
|
|
|
13969
|
my $cut_handler = $self->{'cut_handler'}; |
120
|
9477
|
|
|
|
|
12926
|
my $wl_handler = $self->{'whiteline_handler'}; |
121
|
9477
|
|
100
|
|
|
19384
|
$self->{'line_count'} ||= 0; |
122
|
|
|
|
|
|
|
|
123
|
9477
|
|
|
|
|
12029
|
my $scratch; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
DEBUG > 4 and |
126
|
9477
|
|
|
|
|
11683
|
print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n"; |
127
|
|
|
|
|
|
|
|
128
|
9477
|
|
|
|
|
11440
|
DEBUG > 5 and |
129
|
|
|
|
|
|
|
print STDERR "# About to parse lines: ", |
130
|
|
|
|
|
|
|
join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; |
131
|
|
|
|
|
|
|
|
132
|
9477
|
|
100
|
|
|
20197
|
my $paras = ($self->{'paras'} ||= []); |
133
|
|
|
|
|
|
|
# paragraph buffer. Because we need to defer processing of =over |
134
|
|
|
|
|
|
|
# directives and verbatim paragraphs. We call _ponder_paragraph_buffer |
135
|
|
|
|
|
|
|
# to process this. |
136
|
|
|
|
|
|
|
|
137
|
9477
|
|
100
|
|
|
19034
|
$self->{'pod_para_count'} ||= 0; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# An attempt to match the pod portions of a line. This is not fool proof, |
140
|
|
|
|
|
|
|
# but is good enough to serve as part of the heuristic for guessing the pod |
141
|
|
|
|
|
|
|
# encoding if not specified. |
142
|
9477
|
|
|
|
|
12786
|
my $codes = join '', grep { / ^ [A-Za-z] $/x } sort keys %{$self->{accept_codes}}; |
|
90736
|
|
|
|
|
187603
|
|
|
9477
|
|
|
|
|
47050
|
|
143
|
9477
|
|
|
|
|
46599
|
my $pod_chars_re = qr/ ^ = [A-Za-z]+ | [\Q$codes\E] < /x; |
144
|
|
|
|
|
|
|
|
145
|
9477
|
|
|
|
|
15128
|
my $line; |
146
|
9477
|
|
|
|
|
16761
|
foreach my $source_line (@_) { |
147
|
19029
|
50
|
|
|
|
39996
|
if( $self->{'source_dead'} ) { |
148
|
0
|
|
|
|
|
0
|
DEBUG > 4 and print STDERR "# Source is dead.\n"; |
149
|
0
|
|
|
|
|
0
|
last; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
19029
|
100
|
|
|
|
37396
|
unless( defined $source_line ) { |
153
|
890
|
|
|
|
|
1300
|
DEBUG > 4 and print STDERR "# Undef-line seen.\n"; |
154
|
|
|
|
|
|
|
|
155
|
890
|
|
|
|
|
2896
|
push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; |
156
|
890
|
|
|
|
|
2511
|
push @$paras, $paras->[-1], $paras->[-1]; |
157
|
|
|
|
|
|
|
# So that it definitely fills the buffer. |
158
|
890
|
|
|
|
|
1697
|
$self->{'source_dead'} = 1; |
159
|
890
|
|
|
|
|
2376
|
$self->_ponder_paragraph_buffer; |
160
|
890
|
|
|
|
|
1717
|
next; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
164
|
18139
|
100
|
|
|
|
33846
|
if( $self->{'line_count'}++ ) { |
165
|
17253
|
|
|
|
|
37011
|
($line = $source_line) =~ tr/\n\r//d; |
166
|
|
|
|
|
|
|
# If we don't have two vars, we'll end up with that there |
167
|
|
|
|
|
|
|
# tr/// modding the (potentially read-only) original source line! |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
} else { |
170
|
886
|
|
|
|
|
1220
|
DEBUG > 2 and print STDERR "First line: [$source_line]\n"; |
171
|
|
|
|
|
|
|
|
172
|
886
|
50
|
|
|
|
5694
|
if( ($line = $source_line) =~ s/^$utf8_bom//s ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
173
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "UTF-8 BOM seen. Faking a '=encoding utf8'.\n"; |
174
|
0
|
|
|
|
|
0
|
$self->_handle_encoding_line( "=encoding utf8" ); |
175
|
0
|
|
|
|
|
0
|
delete $self->{'_processed_encoding'}; |
176
|
0
|
|
|
|
|
0
|
$line =~ tr/\n\r//d; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
} elsif( $line =~ s/^\xFE\xFF//s ) { |
179
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; |
180
|
|
|
|
|
|
|
$self->scream( |
181
|
0
|
|
|
|
|
0
|
$self->{'line_count'}, |
182
|
|
|
|
|
|
|
"UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." |
183
|
|
|
|
|
|
|
); |
184
|
0
|
|
|
|
|
0
|
splice @_; |
185
|
0
|
|
|
|
|
0
|
push @_, undef; |
186
|
0
|
|
|
|
|
0
|
next; |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# TODO: implement somehow? |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
} elsif( $line =~ s/^\xFF\xFE//s ) { |
191
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; |
192
|
|
|
|
|
|
|
$self->scream( |
193
|
0
|
|
|
|
|
0
|
$self->{'line_count'}, |
194
|
|
|
|
|
|
|
"UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." |
195
|
|
|
|
|
|
|
); |
196
|
0
|
|
|
|
|
0
|
splice @_; |
197
|
0
|
|
|
|
|
0
|
push @_, undef; |
198
|
0
|
|
|
|
|
0
|
next; |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# TODO: implement somehow? |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
} else { |
203
|
886
|
|
|
|
|
1723
|
DEBUG > 2 and print STDERR "First line is BOM-less.\n"; |
204
|
886
|
|
|
|
|
2465
|
($line = $source_line) =~ tr/\n\r//d; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
18139
|
100
|
100
|
|
|
124359
|
if(!$self->{'parse_characters'} && !$self->{'encoding'} |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
209
|
|
|
|
|
|
|
&& ($self->{'in_pod'} || $line =~ /^=/s) |
210
|
|
|
|
|
|
|
&& $line =~ /$non_ascii_re/ |
211
|
|
|
|
|
|
|
) { |
212
|
|
|
|
|
|
|
|
213
|
21
|
|
|
|
|
40
|
my $encoding; |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# No =encoding line, and we are at the first pod line in the input that |
216
|
|
|
|
|
|
|
# contains a non-ascii byte, that is, one whose meaning varies depending |
217
|
|
|
|
|
|
|
# on whether the file is encoded in UTF-8 or CP1252, which are the two |
218
|
|
|
|
|
|
|
# possibilities permitted by the pod spec. (ASCII is assumed if the |
219
|
|
|
|
|
|
|
# file only contains ASCII bytes.) In order to process this line, we |
220
|
|
|
|
|
|
|
# need to figure out what encoding we will use for the file. |
221
|
|
|
|
|
|
|
# |
222
|
|
|
|
|
|
|
# Strictly speaking ISO 8859-1 (Latin 1) refers to the code points |
223
|
|
|
|
|
|
|
# 160-255, but it is used here, as it often colloquially is, to refer to |
224
|
|
|
|
|
|
|
# the complete set of code points 0-255, including ASCII (0-127), the C1 |
225
|
|
|
|
|
|
|
# controls (128-159), and strict Latin 1 (160-255). |
226
|
|
|
|
|
|
|
# |
227
|
|
|
|
|
|
|
# CP1252 is effectively a superset of Latin 1, because it differs only |
228
|
|
|
|
|
|
|
# from colloquial 8859-1 in the C1 controls, which are very unlikely to |
229
|
|
|
|
|
|
|
# actually be present in 8859-1 files, so can be used for other purposes |
230
|
|
|
|
|
|
|
# without conflict. CP 1252 uses most of them for graphic characters. |
231
|
|
|
|
|
|
|
# |
232
|
|
|
|
|
|
|
# Note that all ASCII-range bytes represent their corresponding code |
233
|
|
|
|
|
|
|
# points in both CP1252 and UTF-8. In ASCII platform UTF-8, all other |
234
|
|
|
|
|
|
|
# code points require multiple (non-ASCII) bytes to represent. (A |
235
|
|
|
|
|
|
|
# separate paragraph for EBCDIC is below.) The multi-byte |
236
|
|
|
|
|
|
|
# representation is quite structured. If we find an isolated byte that |
237
|
|
|
|
|
|
|
# would require multiple bytes to represent in UTF-8, we know that the |
238
|
|
|
|
|
|
|
# encoding is not UTF-8. If we find a sequence of bytes that violates |
239
|
|
|
|
|
|
|
# the UTF-8 structure, we also can presume the encoding isn't UTF-8, and |
240
|
|
|
|
|
|
|
# hence must be 1252. |
241
|
|
|
|
|
|
|
# |
242
|
|
|
|
|
|
|
# But there are ambiguous cases where we could guess wrong. If so, the |
243
|
|
|
|
|
|
|
# user will end up having to supply an =encoding line. We use all |
244
|
|
|
|
|
|
|
# readily available information to improve our chances of guessing |
245
|
|
|
|
|
|
|
# right. The odds of something not being UTF-8, but still passing a |
246
|
|
|
|
|
|
|
# UTF-8 validity test go down very rapidly with increasing length of the |
247
|
|
|
|
|
|
|
# sequence. Therefore we look at all non-ascii sequences on the line. |
248
|
|
|
|
|
|
|
# If any of the sequences can't be UTF-8, we quit there and choose |
249
|
|
|
|
|
|
|
# CP1252. If all could be UTF-8, we see if any of the code points |
250
|
|
|
|
|
|
|
# represented are unlikely to be in pod. If so, we guess CP1252. If |
251
|
|
|
|
|
|
|
# not, we check if the line is all in the same script; if not guess |
252
|
|
|
|
|
|
|
# CP1252; otherwise UTF-8. For perls that don't have convenient script |
253
|
|
|
|
|
|
|
# run testing, see if there is both Latin and non-Latin. If so, CP1252, |
254
|
|
|
|
|
|
|
# otherwise UTF-8. |
255
|
|
|
|
|
|
|
# |
256
|
|
|
|
|
|
|
# On EBCDIC platforms, the situation is somewhat different. In |
257
|
|
|
|
|
|
|
# UTF-EBCDIC, not only do ASCII-range bytes represent their code points, |
258
|
|
|
|
|
|
|
# but so do the bytes that are for the C1 controls. Recall that these |
259
|
|
|
|
|
|
|
# correspond to the unused portion of 8859-1 that 1252 mostly takes |
260
|
|
|
|
|
|
|
# over. That means that there are fewer code points that are |
261
|
|
|
|
|
|
|
# represented by multi-bytes. But, note that the these controls are |
262
|
|
|
|
|
|
|
# very unlikely to be in pod text. So if we encounter one of them, it |
263
|
|
|
|
|
|
|
# means that it is quite likely CP1252 and not UTF-8. The net result is |
264
|
|
|
|
|
|
|
# the same code below is used for both platforms. |
265
|
|
|
|
|
|
|
# |
266
|
|
|
|
|
|
|
# XXX probably if the line has E that evaluates to illegal CP1252, |
267
|
|
|
|
|
|
|
# then it is UTF-8. But we haven't processed E<> yet. |
268
|
|
|
|
|
|
|
|
269
|
21
|
50
|
|
|
|
151
|
goto set_1252 if $] lt 5.006_000; # No UTF-8 on very early perls |
270
|
|
|
|
|
|
|
|
271
|
21
|
|
|
|
|
45
|
my $copy; |
272
|
|
|
|
|
|
|
|
273
|
68
|
|
|
68
|
|
534
|
no warnings 'utf8'; |
|
68
|
|
|
|
|
156
|
|
|
68
|
|
|
|
|
6545
|
|
274
|
|
|
|
|
|
|
|
275
|
21
|
50
|
|
|
|
81
|
if ($] ge 5.007_003) { |
276
|
21
|
|
|
|
|
50
|
$copy = $line; |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
# On perls that have this function, we can use it to easily see if the |
279
|
|
|
|
|
|
|
# sequence is valid UTF-8 or not; if valid it turns on the UTF-8 flag |
280
|
|
|
|
|
|
|
# needed below for script run detection |
281
|
21
|
100
|
|
|
|
344
|
goto set_1252 if ! utf8::decode($copy); |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
elsif (ord("A") != 65) { # Early EBCDIC, assume UTF-8. What's a windows |
284
|
|
|
|
|
|
|
# code page doing here anyway? |
285
|
|
|
|
|
|
|
goto set_utf8; |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
else { # ASCII, no decode(): do it ourselves using the fundamental |
288
|
|
|
|
|
|
|
# characteristics of UTF-8 |
289
|
68
|
|
|
68
|
|
45073
|
use if $] le 5.006002, 'utf8'; |
|
68
|
|
|
|
|
943
|
|
|
68
|
|
|
|
|
1130
|
|
290
|
|
|
|
|
|
|
|
291
|
0
|
|
|
|
|
0
|
my $char_ord; |
292
|
|
|
|
|
|
|
my $needed; # How many continuation bytes to gobble up |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
# Initialize the translated line with a dummy character that will be |
295
|
|
|
|
|
|
|
# deleted after everything else is done. This dummy makes sure that |
296
|
|
|
|
|
|
|
# $copy will be in UTF-8. Doing it now avoids the bugs in early perls |
297
|
|
|
|
|
|
|
# with upgrading in the middle |
298
|
0
|
|
|
|
|
0
|
$copy = chr(0x100); |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
# Parse through the line |
301
|
0
|
|
|
|
|
0
|
for (my $i = 0; $i < length $line; $i++) { |
302
|
0
|
|
|
|
|
0
|
my $byte = substr($line, $i, 1); |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
# ASCII bytes are trivially dealt with |
305
|
0
|
0
|
|
|
|
0
|
if ($byte !~ $non_ascii_re) { |
306
|
0
|
|
|
|
|
0
|
$copy .= $byte; |
307
|
0
|
|
|
|
|
0
|
next; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
0
|
|
|
|
|
0
|
my $b_ord = ord $byte; |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
# Now figure out what this code point would be if the input is |
313
|
|
|
|
|
|
|
# actually in UTF-8. If, in the process, we discover that it isn't |
314
|
|
|
|
|
|
|
# well-formed UTF-8, we guess CP1252. |
315
|
|
|
|
|
|
|
# |
316
|
|
|
|
|
|
|
# Start the process. If it is UTF-8, we are at the first, start |
317
|
|
|
|
|
|
|
# byte, of a multi-byte sequence. We look at this byte to figure |
318
|
|
|
|
|
|
|
# out how many continuation bytes are needed, and to initialize the |
319
|
|
|
|
|
|
|
# code point accumulator with the data from this byte. |
320
|
|
|
|
|
|
|
# |
321
|
|
|
|
|
|
|
# Normally the minimum continuation byte is 0x80, but in certain |
322
|
|
|
|
|
|
|
# instances the minimum is a higher number. So the code below |
323
|
|
|
|
|
|
|
# overrides this for those instances. |
324
|
0
|
|
|
|
|
0
|
my $min_cont = 0x80; |
325
|
|
|
|
|
|
|
|
326
|
0
|
0
|
|
|
|
0
|
if ($b_ord < 0xC2) { # A start byte < C2 is malformed |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
327
|
0
|
|
|
|
|
0
|
goto set_1252; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
elsif ($b_ord <= 0xDF) { |
330
|
0
|
|
|
|
|
0
|
$needed = 1; |
331
|
0
|
|
|
|
|
0
|
$char_ord = $b_ord & 0x1F; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
elsif ($b_ord <= 0xEF) { |
334
|
0
|
0
|
|
|
|
0
|
$min_cont = 0xA0 if $b_ord == 0xE0; |
335
|
0
|
|
|
|
|
0
|
$needed = 2; |
336
|
0
|
|
|
|
|
0
|
$char_ord = $b_ord & (0x1F >> 1); |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
elsif ($b_ord <= 0xF4) { |
339
|
0
|
0
|
|
|
|
0
|
$min_cont = 0x90 if $b_ord == 0xF0; |
340
|
0
|
|
|
|
|
0
|
$needed = 3; |
341
|
0
|
|
|
|
|
0
|
$char_ord = $b_ord & (0x1F >> 2); |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
else { # F4 is the highest start byte for legal Unicode; higher is |
344
|
|
|
|
|
|
|
# unlikely to be in pod. |
345
|
0
|
|
|
|
|
0
|
goto set_1252; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# ? not enough continuation bytes available |
349
|
0
|
0
|
|
|
|
0
|
goto set_1252 if $i + $needed >= length $line; |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
# Accumulate the ordinal of the character from the remaining |
352
|
|
|
|
|
|
|
# (continuation) bytes. |
353
|
0
|
|
|
|
|
0
|
while ($needed-- > 0) { |
354
|
0
|
|
|
|
|
0
|
my $cont = substr($line, ++$i, 1); |
355
|
0
|
|
|
|
|
0
|
$b_ord = ord $cont; |
356
|
0
|
0
|
0
|
|
|
0
|
goto set_1252 if $b_ord < $min_cont || $b_ord > 0xBF; |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# In all cases, any next continuation bytes all have the same |
359
|
|
|
|
|
|
|
# minimum legal value |
360
|
0
|
|
|
|
|
0
|
$min_cont = 0x80; |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# Accumulate this byte's contribution to the code point |
363
|
0
|
|
|
|
|
0
|
$char_ord <<= 6; |
364
|
0
|
|
|
|
|
0
|
$char_ord |= ($b_ord & 0x3F); |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# Here, the sequence that formed this code point was valid UTF-8, |
368
|
|
|
|
|
|
|
# so add the completed character to the output |
369
|
0
|
|
|
|
|
0
|
$copy .= chr $char_ord; |
370
|
|
|
|
|
|
|
} # End of loop through line |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# Delete the dummy first character |
373
|
0
|
|
|
|
|
0
|
$copy = substr($copy, 1); |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
# Here, $copy is legal UTF-8. |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# If it can't be legal CP1252, no need to look further. (These bytes |
379
|
|
|
|
|
|
|
# aren't valid in CP1252.) This test could have been placed higher in |
380
|
|
|
|
|
|
|
# the code, but it seemed wrong to set the encoding to UTF-8 without |
381
|
|
|
|
|
|
|
# making sure that the very first instance is well-formed. But what if |
382
|
|
|
|
|
|
|
# it isn't legal CP1252 either? We have to choose one or the other, and |
383
|
|
|
|
|
|
|
# It seems safer to favor the single-byte encoding over the multi-byte. |
384
|
6
|
50
|
|
|
|
19
|
goto set_utf8 if ord("A") == 65 && $line =~ /[\x81\x8D\x8F\x90\x9D]/; |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
# The C1 controls are not likely to appear in pod |
387
|
6
|
50
|
|
|
|
27
|
goto set_1252 if ord("A") == 65 && $copy =~ /[\x80-\x9F]/; |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# Nor are surrogates nor unassigned, nor deprecated. |
390
|
6
|
50
|
|
|
|
38
|
DEBUG > 8 and print STDERR __LINE__, ": $copy: surrogate\n" if $copy =~ $cs_re; |
391
|
6
|
50
|
33
|
|
|
51
|
goto set_1252 if $cs_re && $copy =~ $cs_re; |
392
|
6
|
50
|
33
|
|
|
56
|
DEBUG > 8 and print STDERR __LINE__, ": $copy: unassigned\n" if $cn_re && $copy =~ $cn_re; |
393
|
6
|
50
|
33
|
|
|
37
|
goto set_1252 if $cn_re && $copy =~ $cn_re; |
394
|
6
|
50
|
|
|
|
34
|
DEBUG > 8 and print STDERR __LINE__, ": $copy: deprecated\n" if $copy =~ $deprecated_re; |
395
|
6
|
50
|
|
|
|
24
|
goto set_1252 if $copy =~ $deprecated_re; |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# Nor are rare code points. But this is hard to determine. khw |
398
|
|
|
|
|
|
|
# believes that IPA characters and the modifier letters are unlikely to |
399
|
|
|
|
|
|
|
# be in pod (and certainly very unlikely to be the in the first line in |
400
|
|
|
|
|
|
|
# the pod containing non-ASCII) |
401
|
6
|
100
|
|
|
|
44
|
DEBUG > 8 and print STDERR __LINE__, ": $copy: rare\n" if $copy =~ $rare_blocks_re; |
402
|
6
|
100
|
66
|
|
|
37
|
goto set_1252 if $rare_blocks_re && $copy =~ $rare_blocks_re; |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
# The first Unicode version included essentially every Latin character |
405
|
|
|
|
|
|
|
# in modern usage. So, a Latin character not in the first release will |
406
|
|
|
|
|
|
|
# unlikely be in pod. |
407
|
5
|
50
|
33
|
|
|
43
|
DEBUG > 8 and print STDERR __LINE__, ": $copy: later_latin\n" if $later_latin_re && $copy =~ $later_latin_re; |
408
|
5
|
50
|
33
|
|
|
30
|
goto set_1252 if $later_latin_re && $copy =~ $later_latin_re; |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# On perls that handle script runs, if the UTF-8 interpretation yields |
411
|
|
|
|
|
|
|
# a single script, we guess UTF-8, otherwise just having a mixture of |
412
|
|
|
|
|
|
|
# scripts is suspicious, so guess CP1252. We first strip off, as best |
413
|
|
|
|
|
|
|
# we can, the ASCII characters that look like they are pod directives, |
414
|
|
|
|
|
|
|
# as these would always show as mixed with non-Latin text. |
415
|
5
|
|
|
|
|
75
|
$copy =~ s/$pod_chars_re//g; |
416
|
|
|
|
|
|
|
|
417
|
5
|
50
|
|
|
|
19
|
if ($script_run_re) { |
418
|
0
|
0
|
|
|
|
0
|
goto set_utf8 if $copy =~ $script_run_re; |
419
|
0
|
|
|
|
|
0
|
DEBUG > 8 and print STDERR __LINE__, ": not script run\n"; |
420
|
0
|
|
|
|
|
0
|
goto set_1252; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
# Even without script runs, but on recent enough perls and Unicodes, we |
424
|
|
|
|
|
|
|
# can check if there is a mixture of both Latin and non-Latin. Again, |
425
|
|
|
|
|
|
|
# having a mixture of scripts is suspicious, so assume CP1252 |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# If it's all non-Latin, there is no CP1252, as that is Latin |
428
|
|
|
|
|
|
|
# characters and punct, etc. |
429
|
5
|
50
|
|
|
|
38
|
DEBUG > 8 and print STDERR __LINE__, ": $copy: not latin\n" if $copy !~ $latin_re; |
430
|
5
|
50
|
|
|
|
33
|
goto set_utf8 if $copy !~ $latin_re; |
431
|
|
|
|
|
|
|
|
432
|
5
|
100
|
|
|
|
70
|
DEBUG > 8 and print STDERR __LINE__, ": $copy: all latin\n" if $copy =~ $every_char_is_latin_re; |
433
|
5
|
100
|
|
|
|
44
|
goto set_utf8 if $copy =~ $every_char_is_latin_re; |
434
|
|
|
|
|
|
|
|
435
|
1
|
|
|
|
|
2
|
DEBUG > 8 and print STDERR __LINE__, ": $copy: mixed\n"; |
436
|
|
|
|
|
|
|
|
437
|
17
|
|
|
|
|
24
|
set_1252: |
438
|
|
|
|
|
|
|
DEBUG > 9 and print STDERR __LINE__, ": $copy: is 1252\n"; |
439
|
17
|
|
|
|
|
35
|
$encoding = 'CP1252'; |
440
|
17
|
|
|
|
|
47
|
goto done_set; |
441
|
|
|
|
|
|
|
|
442
|
4
|
|
|
|
|
5
|
set_utf8: |
443
|
|
|
|
|
|
|
DEBUG > 9 and print STDERR __LINE__, ": $copy: is UTF-8\n"; |
444
|
4
|
|
|
|
|
9
|
$encoding = 'UTF-8'; |
445
|
|
|
|
|
|
|
|
446
|
21
|
|
|
|
|
128
|
done_set: |
447
|
|
|
|
|
|
|
$self->_handle_encoding_line( "=encoding $encoding" ); |
448
|
21
|
|
|
|
|
46
|
delete $self->{'_processed_encoding'}; |
449
|
21
|
50
|
|
|
|
101
|
$self->{'_transcoder'} && $self->{'_transcoder'}->($line); |
450
|
|
|
|
|
|
|
|
451
|
21
|
|
|
|
|
400
|
my ($word) = $line =~ /(\S*$non_ascii_re\S*)/; |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
$self->whine( |
454
|
21
|
|
|
|
|
171
|
$self->{'line_count'}, |
455
|
|
|
|
|
|
|
"Non-ASCII character seen before =encoding in '$word'. Assuming $encoding" |
456
|
|
|
|
|
|
|
); |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
18139
|
|
|
|
|
26444
|
DEBUG > 5 and print STDERR "# Parsing line: [$line]\n"; |
460
|
|
|
|
|
|
|
|
461
|
18139
|
100
|
|
|
|
33122
|
if(!$self->{'in_pod'}) { |
462
|
1700
|
100
|
|
|
|
4884
|
if($line =~ m/^=([a-zA-Z][a-zA-Z0-9]*)(?:\s|$)/s) { |
463
|
941
|
100
|
|
|
|
2659
|
if($1 eq 'cut') { |
464
|
|
|
|
|
|
|
$self->scream( |
465
|
4
|
|
|
|
|
40
|
$self->{'line_count'}, |
466
|
|
|
|
|
|
|
"=cut found outside a pod block. Skipping to next block." |
467
|
|
|
|
|
|
|
); |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
## Before there were errata sections in the world, it was |
470
|
|
|
|
|
|
|
## least-pessimal to abort processing the file. But now we can |
471
|
|
|
|
|
|
|
## just barrel on thru (but still not start a pod block). |
472
|
|
|
|
|
|
|
#splice @_; |
473
|
|
|
|
|
|
|
#push @_, undef; |
474
|
|
|
|
|
|
|
|
475
|
4
|
|
|
|
|
10
|
next; |
476
|
|
|
|
|
|
|
} else { |
477
|
|
|
|
|
|
|
$self->{'in_pod'} = $self->{'start_of_pod_block'} |
478
|
937
|
|
|
|
|
2586
|
= $self->{'last_was_blank'} = 1; |
479
|
|
|
|
|
|
|
# And fall thru to the pod-mode block further down |
480
|
|
|
|
|
|
|
} |
481
|
|
|
|
|
|
|
} else { |
482
|
759
|
|
|
|
|
972
|
DEBUG > 5 and print STDERR "# It's a code-line.\n"; |
483
|
759
|
100
|
|
|
|
1481
|
$code_handler->(map $_, $line, $self->{'line_count'}, $self) |
484
|
|
|
|
|
|
|
if $code_handler; |
485
|
|
|
|
|
|
|
# Note: this may cause code to be processed out of order relative |
486
|
|
|
|
|
|
|
# to pods, but in order relative to cuts. |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
# Note also that we haven't yet applied the transcoding to $line |
489
|
|
|
|
|
|
|
# by time we call $code_handler! |
490
|
|
|
|
|
|
|
|
491
|
759
|
50
|
|
|
|
1572
|
if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { |
492
|
|
|
|
|
|
|
# That RE is from perlsyn, section "Plain Old Comments (Not!)", |
493
|
|
|
|
|
|
|
#$fname = $2 if defined $2; |
494
|
|
|
|
|
|
|
#DEBUG > 1 and defined $2 and print STDERR "# Setting fname to \"$fname\"\n"; |
495
|
0
|
|
|
|
|
0
|
DEBUG > 1 and print STDERR "# Setting nextline to $1\n"; |
496
|
0
|
|
|
|
|
0
|
$self->{'line_count'} = $1 - 1; |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
759
|
|
|
|
|
1540
|
next; |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . |
504
|
|
|
|
|
|
|
# Else we're in pod mode: |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# Apply any necessary transcoding: |
507
|
17376
|
100
|
|
|
|
32515
|
$self->{'_transcoder'} && $self->{'_transcoder'}->($line); |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
# HERE WE CATCH =encoding EARLY! |
510
|
17376
|
100
|
|
|
|
32270
|
if( $line =~ m/^=encoding\s+\S+\s*$/s ) { |
511
|
39
|
100
|
|
|
|
189
|
next if $self->parse_characters; # Ignore this line |
512
|
38
|
|
|
|
|
142
|
$line = $self->_handle_encoding_line( $line ); |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
17375
|
100
|
|
|
|
61699
|
if($line =~ m/^=cut/s) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
516
|
|
|
|
|
|
|
# here ends the pod block, and therefore the previous pod para |
517
|
166
|
|
|
|
|
282
|
DEBUG > 1 and print STDERR "Noting =cut at line ${$self}{'line_count'}\n"; |
518
|
166
|
|
|
|
|
384
|
$self->{'in_pod'} = 0; |
519
|
|
|
|
|
|
|
# ++$self->{'pod_para_count'}; |
520
|
166
|
|
|
|
|
489
|
$self->_ponder_paragraph_buffer(); |
521
|
|
|
|
|
|
|
# by now it's safe to consider the previous paragraph as done. |
522
|
166
|
|
|
|
|
244
|
DEBUG > 6 and print STDERR "Processing any cut handler, line ${$self}{'line_count'}\n"; |
523
|
166
|
100
|
|
|
|
566
|
$cut_handler->(map $_, $line, $self->{'line_count'}, $self) |
524
|
|
|
|
|
|
|
if $cut_handler; |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
# TODO: add to docs: Note: this may cause cuts to be processed out |
527
|
|
|
|
|
|
|
# of order relative to pods, but in order relative to code. |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
} elsif($line =~ m/^(\s*)$/s) { # it's a blank line |
530
|
5731
|
100
|
66
|
|
|
21489
|
if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line |
531
|
22
|
100
|
|
|
|
82
|
$wl_handler->(map $_, $line, $self->{'line_count'}, $self) |
532
|
|
|
|
|
|
|
if $wl_handler; |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
5731
|
100
|
66
|
|
|
26387
|
if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { |
|
|
|
100
|
|
|
|
|
536
|
632
|
|
|
|
|
941
|
DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n"; |
537
|
632
|
|
|
|
|
819
|
push @{$paras->[-1]}, $line; |
|
632
|
|
|
|
|
1482
|
|
538
|
|
|
|
|
|
|
} # otherwise it's not interesting |
539
|
|
|
|
|
|
|
|
540
|
5731
|
100
|
100
|
|
|
18434
|
if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { |
541
|
5636
|
|
|
|
|
7208
|
DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n"; |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
5731
|
|
|
|
|
10028
|
$self->{'last_was_blank'} = 1; |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
} elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... |
547
|
|
|
|
|
|
|
|
548
|
6341
|
100
|
|
|
|
19133
|
if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(\s+|$)(.*)/s) { |
|
|
100
|
|
|
|
|
|
549
|
|
|
|
|
|
|
# THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS |
550
|
2855
|
|
|
|
|
11865
|
my $new = [$1, {'start_line' => $self->{'line_count'}}, $3]; |
551
|
2855
|
100
|
100
|
|
|
11617
|
$new->[1]{'~orig_spacer'} = $2 if $2 && $2 ne " "; |
552
|
|
|
|
|
|
|
# Note that in "=head1 foo", the WS is lost. |
553
|
|
|
|
|
|
|
# Example: ['=head1', {'start_line' => 123}, ' foo'] |
554
|
|
|
|
|
|
|
|
555
|
2855
|
|
|
|
|
4530
|
++$self->{'pod_para_count'}; |
556
|
|
|
|
|
|
|
|
557
|
2855
|
|
|
|
|
8597
|
$self->_ponder_paragraph_buffer(); |
558
|
|
|
|
|
|
|
# by now it's safe to consider the previous paragraph as done. |
559
|
|
|
|
|
|
|
|
560
|
2855
|
|
|
|
|
5375
|
push @$paras, $new; # the new incipient paragraph |
561
|
2855
|
|
|
|
|
4335
|
DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
} elsif($line =~ m/^\s/s) { |
564
|
|
|
|
|
|
|
|
565
|
717
|
100
|
33
|
|
|
3904
|
if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { |
|
|
|
66
|
|
|
|
|
566
|
173
|
|
|
|
|
244
|
DEBUG > 1 and print STDERR "Resuming verbatim para at line ${$self}{'line_count'}\n"; |
567
|
173
|
|
|
|
|
254
|
push @{$paras->[-1]}, $line; |
|
173
|
|
|
|
|
476
|
|
568
|
|
|
|
|
|
|
} else { |
569
|
544
|
|
|
|
|
855
|
++$self->{'pod_para_count'}; |
570
|
544
|
|
|
|
|
1437
|
$self->_ponder_paragraph_buffer(); |
571
|
|
|
|
|
|
|
# by now it's safe to consider the previous paragraph as done. |
572
|
544
|
|
|
|
|
757
|
DEBUG > 1 and print STDERR "Starting verbatim para at line ${$self}{'line_count'}\n"; |
573
|
544
|
|
|
|
|
2110
|
push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
} else { |
576
|
2769
|
|
|
|
|
4226
|
++$self->{'pod_para_count'}; |
577
|
2769
|
|
|
|
|
7229
|
$self->_ponder_paragraph_buffer(); |
578
|
|
|
|
|
|
|
# by now it's safe to consider the previous paragraph as done. |
579
|
2769
|
|
|
|
|
9864
|
push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; |
580
|
2769
|
|
|
|
|
4415
|
DEBUG > 1 and print STDERR "Starting plain para at line ${$self}{'line_count'}\n"; |
581
|
|
|
|
|
|
|
} |
582
|
6341
|
|
|
|
|
13755
|
$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
} else { |
585
|
|
|
|
|
|
|
# It's a non-blank line /continuing/ the current para |
586
|
5137
|
50
|
|
|
|
9434
|
if(@$paras) { |
587
|
5137
|
|
|
|
|
6209
|
DEBUG > 2 and print STDERR "Line ${$self}{'line_count'} continues current paragraph\n"; |
588
|
5137
|
|
|
|
|
6284
|
push @{$paras->[-1]}, $line; |
|
5137
|
|
|
|
|
11370
|
|
589
|
|
|
|
|
|
|
} else { |
590
|
|
|
|
|
|
|
# Unexpected case! |
591
|
0
|
|
|
|
|
0
|
die "Continuing a paragraph but \@\$paras is empty?"; |
592
|
|
|
|
|
|
|
} |
593
|
5137
|
|
|
|
|
9883
|
$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
} # ends the big while loop |
597
|
|
|
|
|
|
|
|
598
|
9477
|
|
|
|
|
12486
|
DEBUG > 1 and print STDERR (pretty(@$paras), "\n"); |
599
|
9477
|
|
|
|
|
46774
|
return $self; |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub _handle_encoding_line { |
605
|
59
|
|
|
59
|
|
194
|
my($self, $line) = @_; |
606
|
|
|
|
|
|
|
|
607
|
59
|
50
|
|
|
|
183
|
return if $self->parse_characters; |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
# The point of this routine is to set $self->{'_transcoder'} as indicated. |
610
|
|
|
|
|
|
|
|
611
|
59
|
50
|
|
|
|
330
|
return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; |
612
|
59
|
|
|
|
|
104
|
DEBUG > 1 and print STDERR "Found an encoding line \"=encoding $1\"\n"; |
613
|
|
|
|
|
|
|
|
614
|
59
|
|
|
|
|
170
|
my $e = $1; |
615
|
59
|
|
|
|
|
104
|
my $orig = $e; |
616
|
59
|
|
|
|
|
115
|
push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; |
|
59
|
|
|
|
|
267
|
|
617
|
|
|
|
|
|
|
|
618
|
59
|
|
|
|
|
106
|
my $enc_error; |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
# Cf. perldoc Encode and perldoc Encode::Supported |
621
|
|
|
|
|
|
|
|
622
|
59
|
|
|
|
|
7537
|
require Pod::Simple::Transcode; |
623
|
|
|
|
|
|
|
|
624
|
59
|
100
|
33
|
|
|
226
|
if( $self->{'encoding'} ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
625
|
5
|
|
|
|
|
24
|
my $norm_current = $self->{'encoding'}; |
626
|
5
|
|
|
|
|
12
|
my $norm_e = $e; |
627
|
5
|
|
|
|
|
15
|
foreach my $that ($norm_current, $norm_e) { |
628
|
10
|
|
|
|
|
23
|
$that = lc($that); |
629
|
10
|
|
|
|
|
49
|
$that =~ s/[-_]//g; |
630
|
|
|
|
|
|
|
} |
631
|
5
|
100
|
|
|
|
21
|
if($norm_current eq $norm_e) { |
632
|
3
|
|
|
|
|
4
|
DEBUG > 1 and print STDERR "The '=encoding $orig' line is ", |
633
|
|
|
|
|
|
|
"redundant. ($norm_current eq $norm_e). Ignoring.\n"; |
634
|
3
|
|
|
|
|
9
|
$enc_error = ''; |
635
|
|
|
|
|
|
|
# But that doesn't necessarily mean that the earlier one went okay |
636
|
|
|
|
|
|
|
} else { |
637
|
2
|
|
|
|
|
27
|
$enc_error = "Encoding is already set to " . $self->{'encoding'}; |
638
|
2
|
|
|
|
|
4
|
DEBUG > 1 and print STDERR $enc_error; |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
} elsif ( |
641
|
|
|
|
|
|
|
# OK, let's turn on the encoding |
642
|
|
|
|
|
|
|
do { |
643
|
54
|
|
|
|
|
92
|
DEBUG > 1 and print STDERR " Setting encoding to $e\n"; |
644
|
54
|
|
|
|
|
147
|
$self->{'encoding'} = $e; |
645
|
54
|
|
|
|
|
631
|
1; |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
and $e eq 'HACKRAW' |
648
|
|
|
|
|
|
|
) { |
649
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR " Putting in HACKRAW (no-op) encoding mode.\n"; |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
} elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
die($enc_error = "WHAT? _transcoder is already set?!") |
654
|
52
|
50
|
|
|
|
42757
|
if $self->{'_transcoder'}; # should never happen |
655
|
52
|
|
|
|
|
248
|
require Pod::Simple::Transcode; |
656
|
52
|
|
|
|
|
279
|
$self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); |
657
|
52
|
|
|
|
|
120
|
eval { |
658
|
52
|
|
|
|
|
160
|
my @x = ('', "abc", "123"); |
659
|
52
|
|
|
|
|
239
|
$self->{'_transcoder'}->(@x); |
660
|
|
|
|
|
|
|
}; |
661
|
52
|
50
|
|
|
|
185
|
$@ && die( $enc_error = |
662
|
|
|
|
|
|
|
"Really unexpected error setting up encoding $e: $@\nAborting" |
663
|
|
|
|
|
|
|
); |
664
|
52
|
|
|
|
|
143
|
$self->{'detected_encoding'} = $e; |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
} else { |
667
|
2
|
|
|
|
|
1875
|
my @supported = Pod::Simple::Transcode::->all_encodings; |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
# Note unsupported, and complain |
670
|
2
|
|
|
|
|
2104
|
DEBUG and print STDERR " Encoding [$e] is unsupported.", |
671
|
|
|
|
|
|
|
"\nSupporteds: @supported\n"; |
672
|
2
|
|
|
|
|
13
|
my $suggestion = ''; |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
# Look for a near match: |
675
|
2
|
|
|
|
|
10
|
my $norm = lc($e); |
676
|
2
|
|
|
|
|
7
|
$norm =~ tr[-_][]d; |
677
|
2
|
|
|
|
|
5
|
my $n; |
678
|
2
|
|
|
|
|
7
|
foreach my $enc (@supported) { |
679
|
248
|
|
|
|
|
323
|
$n = lc($enc); |
680
|
248
|
|
|
|
|
310
|
$n =~ tr[-_][]d; |
681
|
248
|
50
|
|
|
|
419
|
next unless $n eq $norm; |
682
|
0
|
|
|
|
|
0
|
$suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; |
683
|
0
|
|
|
|
|
0
|
last; |
684
|
|
|
|
|
|
|
} |
685
|
2
|
|
|
|
|
25
|
my $encmodver = Pod::Simple::Transcode::->encmodver; |
686
|
2
|
|
|
|
|
37
|
$enc_error = join '' => |
687
|
|
|
|
|
|
|
"This document probably does not appear as it should, because its ", |
688
|
|
|
|
|
|
|
"\"=encoding $e\" line calls for an unsupported encoding.", |
689
|
|
|
|
|
|
|
$suggestion, " [$encmodver\'s supported encodings are: @supported]" |
690
|
|
|
|
|
|
|
; |
691
|
|
|
|
|
|
|
|
692
|
2
|
|
|
|
|
28
|
$self->scream( $self->{'line_count'}, $enc_error ); |
693
|
|
|
|
|
|
|
} |
694
|
59
|
|
|
|
|
115
|
push @{ $self->{'encoding_command_statuses'} }, $enc_error; |
|
59
|
|
|
|
|
181
|
|
695
|
59
|
100
|
|
|
|
191
|
if (defined($self->{'_processed_encoding'})) { |
696
|
|
|
|
|
|
|
# Double declaration. |
697
|
1
|
|
|
|
|
8
|
$self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives'); |
698
|
|
|
|
|
|
|
} |
699
|
59
|
|
|
|
|
122
|
$self->{'_processed_encoding'} = $orig; |
700
|
|
|
|
|
|
|
|
701
|
59
|
|
|
|
|
127
|
return $line; |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
sub _handle_encoding_second_level { |
707
|
|
|
|
|
|
|
# By time this is called, the encoding (if well formed) will already |
708
|
|
|
|
|
|
|
# have been acted on. |
709
|
38
|
|
|
38
|
|
123
|
my($self, $para) = @_; |
710
|
38
|
|
|
|
|
117
|
my @x = @$para; |
711
|
38
|
|
|
|
|
138
|
my $content = join ' ', splice @x, 2; |
712
|
38
|
|
|
|
|
112
|
$content =~ s/^\s+//s; |
713
|
38
|
|
|
|
|
107
|
$content =~ s/\s+$//s; |
714
|
|
|
|
|
|
|
|
715
|
38
|
|
|
|
|
66
|
DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n"; |
716
|
|
|
|
|
|
|
|
717
|
38
|
100
|
|
|
|
103
|
if (defined($self->{'_processed_encoding'})) { |
718
|
|
|
|
|
|
|
#if($content ne $self->{'_processed_encoding'}) { |
719
|
|
|
|
|
|
|
# Could it happen? |
720
|
|
|
|
|
|
|
#} |
721
|
37
|
|
|
|
|
98
|
delete $self->{'_processed_encoding'}; |
722
|
|
|
|
|
|
|
# It's already been handled. Check for errors. |
723
|
37
|
50
|
|
|
|
184
|
if(! $self->{'encoding_command_statuses'} ) { |
|
|
100
|
|
|
|
|
|
724
|
0
|
|
|
|
|
0
|
DEBUG > 2 and print STDERR " CRAZY ERROR: It wasn't really handled?!\n"; |
725
|
|
|
|
|
|
|
} elsif( $self->{'encoding_command_statuses'}[-1] ) { |
726
|
|
|
|
|
|
|
$self->whine( $para->[1]{'start_line'}, |
727
|
|
|
|
|
|
|
sprintf "Couldn't do %s: %s", |
728
|
|
|
|
|
|
|
$self->{'encoding_command_reqs' }[-1], |
729
|
4
|
|
|
|
|
73
|
$self->{'encoding_command_statuses'}[-1], |
730
|
|
|
|
|
|
|
); |
731
|
|
|
|
|
|
|
} else { |
732
|
33
|
|
|
|
|
55
|
DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n"; |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
} else { |
736
|
|
|
|
|
|
|
# Otherwise it's a syntax error |
737
|
1
|
|
|
|
|
20
|
$self->whine( $para->[1]{'start_line'}, |
738
|
|
|
|
|
|
|
"Invalid =encoding syntax: $content" |
739
|
|
|
|
|
|
|
); |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
|
742
|
38
|
|
|
|
|
83
|
return; |
743
|
|
|
|
|
|
|
} |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
{ |
748
|
|
|
|
|
|
|
my $m = -321; # magic line number |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
sub _gen_errata { |
751
|
890
|
|
|
890
|
|
1416
|
my $self = $_[0]; |
752
|
|
|
|
|
|
|
# Return 0 or more fake-o paragraphs explaining the accumulated |
753
|
|
|
|
|
|
|
# errors on this document. |
754
|
|
|
|
|
|
|
|
755
|
890
|
100
|
66
|
|
|
3006
|
return() unless $self->{'errata'} and keys %{$self->{'errata'}}; |
|
40
|
|
|
|
|
235
|
|
756
|
|
|
|
|
|
|
|
757
|
40
|
|
|
|
|
95
|
my @out; |
758
|
|
|
|
|
|
|
|
759
|
40
|
|
|
|
|
79
|
foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { |
|
26
|
|
|
|
|
74
|
|
|
40
|
|
|
|
|
204
|
|
760
|
|
|
|
|
|
|
push @out, |
761
|
|
|
|
|
|
|
['=item', {'start_line' => $m}, "Around line $line:"], |
762
|
|
|
|
|
|
|
map( ['~Para', {'start_line' => $m, '~cooked' => 1}, |
763
|
|
|
|
|
|
|
#['~Top', {'start_line' => $m}, |
764
|
|
|
|
|
|
|
$_ |
765
|
|
|
|
|
|
|
#] |
766
|
|
|
|
|
|
|
], |
767
|
59
|
|
|
|
|
291
|
@{$self->{'errata'}{$line}} |
|
59
|
|
|
|
|
372
|
|
768
|
|
|
|
|
|
|
) |
769
|
|
|
|
|
|
|
; |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
# TODO: report of unknown entities? unrenderable characters? |
773
|
|
|
|
|
|
|
|
774
|
40
|
|
|
|
|
469
|
unshift @out, |
775
|
|
|
|
|
|
|
['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], |
776
|
|
|
|
|
|
|
['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, |
777
|
|
|
|
|
|
|
"Hey! ", |
778
|
|
|
|
|
|
|
['B', {}, |
779
|
|
|
|
|
|
|
'The above document had some coding errors, which are explained below:' |
780
|
|
|
|
|
|
|
] |
781
|
|
|
|
|
|
|
], |
782
|
|
|
|
|
|
|
['=over', {'start_line' => $m, 'errata' => 1}, ''], |
783
|
|
|
|
|
|
|
; |
784
|
|
|
|
|
|
|
|
785
|
40
|
|
|
|
|
189
|
push @out, |
786
|
|
|
|
|
|
|
['=back', {'start_line' => $m, 'errata' => 1}, ''], |
787
|
|
|
|
|
|
|
; |
788
|
|
|
|
|
|
|
|
789
|
40
|
|
|
|
|
86
|
DEBUG and print STDERR "\n<<\n", pretty(\@out), "\n>>\n\n"; |
790
|
|
|
|
|
|
|
|
791
|
40
|
|
|
|
|
159
|
return @out; |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
############################################################################## |
799
|
|
|
|
|
|
|
## |
800
|
|
|
|
|
|
|
## stop reading now stop reading now stop reading now stop reading now stop |
801
|
|
|
|
|
|
|
## |
802
|
|
|
|
|
|
|
## HERE IT BECOMES REALLY SCARY |
803
|
|
|
|
|
|
|
## |
804
|
|
|
|
|
|
|
## stop reading now stop reading now stop reading now stop reading now stop |
805
|
|
|
|
|
|
|
## |
806
|
|
|
|
|
|
|
############################################################################## |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
sub _ponder_paragraph_buffer { |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
# Para-token types as found in the buffer. |
811
|
|
|
|
|
|
|
# ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, |
812
|
|
|
|
|
|
|
# =over, =back, =item |
813
|
|
|
|
|
|
|
# and the null =pod (to be complained about if over one line) |
814
|
|
|
|
|
|
|
# |
815
|
|
|
|
|
|
|
# "~data" paragraphs are something we generate at this level, depending on |
816
|
|
|
|
|
|
|
# a currently open =over region |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
# Events fired: Begin and end for: |
819
|
|
|
|
|
|
|
# directivename (like head1 .. head4), item, extend, |
820
|
|
|
|
|
|
|
# for (from =begin...=end, =for), |
821
|
|
|
|
|
|
|
# over-bullet, over-number, over-text, over-block, |
822
|
|
|
|
|
|
|
# item-bullet, item-number, item-text, |
823
|
|
|
|
|
|
|
# Document, |
824
|
|
|
|
|
|
|
# Data, Para, Verbatim |
825
|
|
|
|
|
|
|
# B, C, longdirname (TODO -- wha?), etc. for all directives |
826
|
|
|
|
|
|
|
# |
827
|
|
|
|
|
|
|
|
828
|
7224
|
|
|
7224
|
|
10817
|
my $self = $_[0]; |
829
|
7224
|
|
|
|
|
9573
|
my $paras; |
830
|
7224
|
100
|
|
|
|
9119
|
return unless @{$paras = $self->{'paras'}}; |
|
7224
|
|
|
|
|
16797
|
|
831
|
6290
|
|
100
|
|
|
16328
|
my $curr_open = ($self->{'curr_open'} ||= []); |
832
|
|
|
|
|
|
|
|
833
|
6290
|
|
|
|
|
8542
|
my $scratch; |
834
|
|
|
|
|
|
|
|
835
|
6290
|
|
|
|
|
7845
|
DEBUG > 10 and print STDERR "# Paragraph buffer: <<", pretty($paras), ">>\n"; |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
# We have something in our buffer. So apparently the document has started. |
838
|
6290
|
100
|
|
|
|
11846
|
unless($self->{'doc_has_started'}) { |
839
|
904
|
|
|
|
|
2302
|
$self->{'doc_has_started'} = 1; |
840
|
|
|
|
|
|
|
|
841
|
904
|
|
|
|
|
1270
|
my $starting_contentless; |
842
|
904
|
|
66
|
|
|
5347
|
$starting_contentless = |
843
|
|
|
|
|
|
|
( |
844
|
|
|
|
|
|
|
!@$curr_open |
845
|
|
|
|
|
|
|
and @$paras and ! grep $_->[0] ne '~end', @$paras |
846
|
|
|
|
|
|
|
# i.e., if the paras is all ~ends |
847
|
|
|
|
|
|
|
) |
848
|
|
|
|
|
|
|
; |
849
|
904
|
|
|
|
|
1314
|
DEBUG and print STDERR "# Starting ", |
850
|
|
|
|
|
|
|
$starting_contentless ? 'contentless' : 'contentful', |
851
|
|
|
|
|
|
|
" document\n" |
852
|
|
|
|
|
|
|
; |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
$self->_handle_element_start( |
855
|
|
|
|
|
|
|
($scratch = 'Document'), |
856
|
|
|
|
|
|
|
{ |
857
|
904
|
100
|
|
|
|
4710
|
'start_line' => $paras->[0][1]{'start_line'}, |
858
|
|
|
|
|
|
|
$starting_contentless ? ( 'contentless' => 1 ) : (), |
859
|
|
|
|
|
|
|
}, |
860
|
|
|
|
|
|
|
); |
861
|
|
|
|
|
|
|
} |
862
|
|
|
|
|
|
|
|
863
|
6290
|
|
|
|
|
10917
|
my($para, $para_type); |
864
|
6290
|
|
|
|
|
12276
|
while(@$paras) { |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
# If a directive, assume it's legal; subtract below if found not to be |
867
|
8981
|
100
|
|
|
|
25179
|
$seen_legal_directive++ if $paras->[0][0] =~ /^=/; |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
last if @$paras == 1 |
870
|
|
|
|
|
|
|
and ( $paras->[0][0] eq '=over' |
871
|
|
|
|
|
|
|
or $paras->[0][0] eq '=item' |
872
|
8981
|
100
|
100
|
|
|
37861
|
or ($paras->[0][0] eq '~Verbatim' and $self->{'in_pod'})); |
|
|
|
100
|
|
|
|
|
873
|
|
|
|
|
|
|
# Those're the three kinds of paragraphs that require lookahead. |
874
|
|
|
|
|
|
|
# Actually, an "=item Foo" inside an region |
875
|
|
|
|
|
|
|
# and any =item inside an region (rare) |
876
|
|
|
|
|
|
|
# don't require any lookahead, but all others (bullets |
877
|
|
|
|
|
|
|
# and numbers) do. |
878
|
|
|
|
|
|
|
# The verbatim is different from the other two, because those might be |
879
|
|
|
|
|
|
|
# like: |
880
|
|
|
|
|
|
|
# |
881
|
|
|
|
|
|
|
# =item |
882
|
|
|
|
|
|
|
# ... |
883
|
|
|
|
|
|
|
# =cut |
884
|
|
|
|
|
|
|
# ... |
885
|
|
|
|
|
|
|
# =item |
886
|
|
|
|
|
|
|
# |
887
|
|
|
|
|
|
|
# The =cut here finishes the paragraph but doesn't terminate the =over |
888
|
|
|
|
|
|
|
# they should be in. (khw apologizes that he didn't comment at the time |
889
|
|
|
|
|
|
|
# why the 'in_pod' works, and no longer remembers why, and doesn't think |
890
|
|
|
|
|
|
|
# it is currently worth the effort to re-figure it out.) |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
# TODO: whinge about many kinds of directives in non-resolving =for regions? |
893
|
|
|
|
|
|
|
# TODO: many? like what? =head1 etc? |
894
|
|
|
|
|
|
|
|
895
|
7466
|
|
|
|
|
14801
|
$para = shift @$paras; |
896
|
7466
|
|
|
|
|
12976
|
$para_type = $para->[0]; |
897
|
|
|
|
|
|
|
|
898
|
7466
|
|
|
|
|
9519
|
DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (", |
899
|
|
|
|
|
|
|
$self->_dump_curr_open(), ")\n"; |
900
|
|
|
|
|
|
|
|
901
|
7466
|
100
|
|
|
|
23502
|
if($para_type eq '=for') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
902
|
50
|
50
|
|
|
|
140
|
next if $self->_ponder_for($para,$curr_open,$paras); |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
} elsif($para_type eq '=begin') { |
905
|
127
|
50
|
|
|
|
321
|
next if $self->_ponder_begin($para,$curr_open,$paras); |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
} elsif($para_type eq '=end') { |
908
|
130
|
50
|
|
|
|
335
|
next if $self->_ponder_end($para,$curr_open,$paras); |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
} elsif($para_type eq '~end') { # The virtual end-document signal |
911
|
940
|
50
|
|
|
|
2806
|
next if $self->_ponder_doc_end($para,$curr_open,$paras); |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
916
|
|
|
|
|
|
|
#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
917
|
6219
|
100
|
|
|
|
15507
|
if(grep $_->[1]{'~ignore'}, @$curr_open) { |
918
|
42
|
|
|
|
|
58
|
DEBUG > 1 and |
919
|
|
|
|
|
|
|
print STDERR "Skipping $para_type paragraph because in ignore mode.\n"; |
920
|
42
|
|
|
|
|
83
|
next; |
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
923
|
|
|
|
|
|
|
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ |
924
|
|
|
|
|
|
|
|
925
|
6177
|
100
|
|
|
|
15371
|
if($para_type eq '=pod') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
926
|
578
|
|
|
|
|
1669
|
$self->_ponder_pod($para,$curr_open,$paras); |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
} elsif($para_type eq '=over') { |
929
|
213
|
100
|
|
|
|
840
|
next if $self->_ponder_over($para,$curr_open,$paras); |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
} elsif($para_type eq '=back') { |
932
|
209
|
100
|
|
|
|
808
|
next if $self->_ponder_back($para,$curr_open,$paras); |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
} else { |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
# All non-magical codes!!! |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
# Here we start using $para_type for our own twisted purposes, to |
939
|
|
|
|
|
|
|
# mean how it should get treated, not as what the element name |
940
|
|
|
|
|
|
|
# should be. |
941
|
|
|
|
|
|
|
|
942
|
5177
|
|
|
|
|
6722
|
DEBUG > 1 and print STDERR "Pondering non-magical $para_type\n"; |
943
|
|
|
|
|
|
|
|
944
|
5177
|
|
|
|
|
6683
|
my $i; |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
# Enforce some =headN discipline |
947
|
5177
|
100
|
66
|
|
|
14525
|
if($para_type =~ m/^=head\d$/s |
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
948
|
|
|
|
|
|
|
and ! $self->{'accept_heads_anywhere'} |
949
|
|
|
|
|
|
|
and @$curr_open |
950
|
|
|
|
|
|
|
and $curr_open->[-1][0] eq '=over' |
951
|
|
|
|
|
|
|
) { |
952
|
6
|
|
|
|
|
10
|
DEBUG > 2 and print STDERR "'=$para_type' inside an '=over'!\n"; |
953
|
|
|
|
|
|
|
$self->whine( |
954
|
6
|
|
|
|
|
32
|
$para->[1]{'start_line'}, |
955
|
|
|
|
|
|
|
"You forgot a '=back' before '$para_type'" |
956
|
|
|
|
|
|
|
); |
957
|
6
|
|
|
|
|
31
|
unshift @$paras, ['=back', {}, ''], $para; # close the =over |
958
|
6
|
|
|
|
|
16
|
next; |
959
|
|
|
|
|
|
|
} |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
|
962
|
5171
|
100
|
66
|
|
|
19466
|
if($para_type eq '=item') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
963
|
|
|
|
|
|
|
|
964
|
1022
|
|
|
|
|
1444
|
my $over; |
965
|
1022
|
50
|
33
|
|
|
2715
|
unless(@$curr_open and |
966
|
1130
|
|
|
|
|
4301
|
$over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { |
967
|
|
|
|
|
|
|
$self->whine( |
968
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
969
|
|
|
|
|
|
|
"'=item' outside of any '=over'" |
970
|
|
|
|
|
|
|
); |
971
|
|
|
|
|
|
|
unshift @$paras, |
972
|
0
|
|
|
|
|
0
|
['=over', {'start_line' => $para->[1]{'start_line'}}, ''], |
973
|
|
|
|
|
|
|
$para |
974
|
|
|
|
|
|
|
; |
975
|
0
|
|
|
|
|
0
|
next; |
976
|
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
|
979
|
1022
|
|
|
|
|
2087
|
my $over_type = $over->[1]{'~type'}; |
980
|
|
|
|
|
|
|
|
981
|
1022
|
50
|
|
|
|
3583
|
if(!$over_type) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
982
|
|
|
|
|
|
|
# Shouldn't happen1 |
983
|
|
|
|
|
|
|
die "Typeless over in stack, starting at line " |
984
|
0
|
|
|
|
|
0
|
. $over->[1]{'start_line'}; |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
} elsif($over_type eq 'block') { |
987
|
0
|
0
|
|
|
|
0
|
unless($curr_open->[-1][1]{'~bitched_about'}) { |
988
|
0
|
|
|
|
|
0
|
$curr_open->[-1][1]{'~bitched_about'} = 1; |
989
|
|
|
|
|
|
|
$self->whine( |
990
|
|
|
|
|
|
|
$curr_open->[-1][1]{'start_line'}, |
991
|
|
|
|
|
|
|
"You can't have =items (as at line " |
992
|
0
|
|
|
|
|
0
|
. $para->[1]{'start_line'} |
993
|
|
|
|
|
|
|
. ") unless the first thing after the =over is an =item" |
994
|
|
|
|
|
|
|
); |
995
|
|
|
|
|
|
|
} |
996
|
|
|
|
|
|
|
# Just turn it into a paragraph and reconsider it |
997
|
0
|
|
|
|
|
0
|
$para->[0] = '~Para'; |
998
|
0
|
|
|
|
|
0
|
unshift @$paras, $para; |
999
|
0
|
|
|
|
|
0
|
next; |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
} elsif($over_type eq 'text') { |
1002
|
835
|
|
|
|
|
2428
|
my $item_type = $self->_get_item_type($para); |
1003
|
|
|
|
|
|
|
# That kills the content of the item if it's a number or bullet. |
1004
|
835
|
|
|
|
|
1307
|
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; |
1005
|
|
|
|
|
|
|
|
1006
|
835
|
100
|
66
|
|
|
1878
|
if($item_type eq 'text') { |
|
|
50
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
# Nothing special needs doing for 'text' |
1008
|
|
|
|
|
|
|
} elsif($item_type eq 'number' or $item_type eq 'bullet') { |
1009
|
|
|
|
|
|
|
$self->whine( |
1010
|
4
|
|
|
|
|
41
|
$para->[1]{'start_line'}, |
1011
|
|
|
|
|
|
|
"Expected text after =item, not a $item_type" |
1012
|
|
|
|
|
|
|
); |
1013
|
|
|
|
|
|
|
# Undo our clobbering: |
1014
|
4
|
|
|
|
|
14
|
push @$para, $para->[1]{'~orig_content'}; |
1015
|
4
|
|
|
|
|
8
|
delete $para->[1]{'number'}; |
1016
|
|
|
|
|
|
|
# Only a PROPER item-number element is allowed |
1017
|
|
|
|
|
|
|
# to have a number attribute. |
1018
|
|
|
|
|
|
|
} else { |
1019
|
0
|
|
|
|
|
0
|
die "Unhandled item type $item_type"; # should never happen |
1020
|
|
|
|
|
|
|
} |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
# =item-text thingies don't need any assimilation, it seems. |
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
} elsif($over_type eq 'number') { |
1025
|
28
|
|
|
|
|
109
|
my $item_type = $self->_get_item_type($para); |
1026
|
|
|
|
|
|
|
# That kills the content of the item if it's a number or bullet. |
1027
|
28
|
|
|
|
|
46
|
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; |
1028
|
|
|
|
|
|
|
|
1029
|
28
|
|
|
|
|
74
|
my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; |
1030
|
|
|
|
|
|
|
|
1031
|
28
|
50
|
|
|
|
179
|
if($item_type eq 'bullet') { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
# Hm, it's not numeric. Correct for this. |
1033
|
0
|
|
|
|
|
0
|
$para->[1]{'number'} = $expected_value; |
1034
|
|
|
|
|
|
|
$self->whine( |
1035
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1036
|
|
|
|
|
|
|
"Expected '=item $expected_value'" |
1037
|
|
|
|
|
|
|
); |
1038
|
0
|
|
|
|
|
0
|
push @$para, $para->[1]{'~orig_content'}; |
1039
|
|
|
|
|
|
|
# restore the bullet, blocking the assimilation of next para |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
} elsif($item_type eq 'text') { |
1042
|
|
|
|
|
|
|
# Hm, it's not numeric. Correct for this. |
1043
|
0
|
|
|
|
|
0
|
$para->[1]{'number'} = $expected_value; |
1044
|
|
|
|
|
|
|
$self->whine( |
1045
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1046
|
|
|
|
|
|
|
"Expected '=item $expected_value'" |
1047
|
|
|
|
|
|
|
); |
1048
|
|
|
|
|
|
|
# Text content will still be there and will block next ~Para |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
} elsif($item_type ne 'number') { |
1051
|
0
|
|
|
|
|
0
|
die "Unknown item type $item_type"; # should never happen |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
} elsif($expected_value == $para->[1]{'number'}) { |
1054
|
28
|
|
|
|
|
46
|
DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
} else { |
1057
|
0
|
|
|
|
|
0
|
DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, |
1058
|
|
|
|
|
|
|
" instead of the expected value of $expected_value\n"; |
1059
|
|
|
|
|
|
|
$self->whine( |
1060
|
|
|
|
|
|
|
$para->[1]{'start_line'}, |
1061
|
0
|
|
|
|
|
0
|
"You have '=item " . $para->[1]{'number'} . |
1062
|
|
|
|
|
|
|
"' instead of the expected '=item $expected_value'" |
1063
|
|
|
|
|
|
|
); |
1064
|
0
|
|
|
|
|
0
|
$para->[1]{'number'} = $expected_value; # correcting!! |
1065
|
|
|
|
|
|
|
} |
1066
|
|
|
|
|
|
|
|
1067
|
28
|
50
|
|
|
|
86
|
if(@$para == 2) { |
1068
|
|
|
|
|
|
|
# For the cases where we /didn't/ push to @$para |
1069
|
28
|
100
|
|
|
|
64
|
if($paras->[0][0] eq '~Para') { |
1070
|
25
|
|
|
|
|
37
|
DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; |
1071
|
25
|
|
|
|
|
39
|
push @$para, splice @{shift @$paras},2; |
|
25
|
|
|
|
|
70
|
|
1072
|
|
|
|
|
|
|
} else { |
1073
|
3
|
|
|
|
|
6
|
DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; |
1074
|
3
|
|
|
|
|
5
|
push @$para, ''; # Just so it's not contentless |
1075
|
|
|
|
|
|
|
} |
1076
|
|
|
|
|
|
|
} |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
} elsif($over_type eq 'bullet') { |
1080
|
159
|
|
|
|
|
475
|
my $item_type = $self->_get_item_type($para); |
1081
|
|
|
|
|
|
|
# That kills the content of the item if it's a number or bullet. |
1082
|
159
|
|
|
|
|
263
|
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; |
1083
|
|
|
|
|
|
|
|
1084
|
159
|
50
|
|
|
|
350
|
if($item_type eq 'bullet') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
# as expected! |
1086
|
|
|
|
|
|
|
|
1087
|
159
|
100
|
|
|
|
367
|
if( $para->[1]{'~_freaky_para_hack'} ) { |
1088
|
101
|
|
|
|
|
131
|
DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; |
1089
|
101
|
|
|
|
|
197
|
push @$para, $para->[1]{'~_freaky_para_hack'}; |
1090
|
|
|
|
|
|
|
} |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
} elsif($item_type eq 'number') { |
1093
|
|
|
|
|
|
|
$self->whine( |
1094
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1095
|
|
|
|
|
|
|
"Expected '=item *'" |
1096
|
|
|
|
|
|
|
); |
1097
|
0
|
|
|
|
|
0
|
push @$para, $para->[1]{'~orig_content'}; |
1098
|
|
|
|
|
|
|
# and block assimilation of the next paragraph |
1099
|
0
|
|
|
|
|
0
|
delete $para->[1]{'number'}; |
1100
|
|
|
|
|
|
|
# Only a PROPER item-number element is allowed |
1101
|
|
|
|
|
|
|
# to have a number attribute. |
1102
|
|
|
|
|
|
|
} elsif($item_type eq 'text') { |
1103
|
|
|
|
|
|
|
$self->whine( |
1104
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1105
|
|
|
|
|
|
|
"Expected '=item *'" |
1106
|
|
|
|
|
|
|
); |
1107
|
|
|
|
|
|
|
# But doesn't need processing. But it'll block assimilation |
1108
|
|
|
|
|
|
|
# of the next para. |
1109
|
|
|
|
|
|
|
} else { |
1110
|
0
|
|
|
|
|
0
|
die "Unhandled item type $item_type"; # should never happen |
1111
|
|
|
|
|
|
|
} |
1112
|
|
|
|
|
|
|
|
1113
|
159
|
100
|
|
|
|
398
|
if(@$para == 2) { |
1114
|
|
|
|
|
|
|
# For the cases where we /didn't/ push to @$para |
1115
|
58
|
50
|
|
|
|
152
|
if($paras->[0][0] eq '~Para') { |
1116
|
58
|
|
|
|
|
88
|
DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; |
1117
|
58
|
|
|
|
|
92
|
push @$para, splice @{shift @$paras},2; |
|
58
|
|
|
|
|
160
|
|
1118
|
|
|
|
|
|
|
} else { |
1119
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; |
1120
|
0
|
|
|
|
|
0
|
push @$para, ''; # Just so it's not contentless |
1121
|
|
|
|
|
|
|
} |
1122
|
|
|
|
|
|
|
} |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
} else { |
1125
|
0
|
|
|
|
|
0
|
die "Unhandled =over type \"$over_type\"?"; |
1126
|
|
|
|
|
|
|
# Shouldn't happen! |
1127
|
|
|
|
|
|
|
} |
1128
|
|
|
|
|
|
|
|
1129
|
1022
|
|
|
|
|
1701
|
$para_type = 'Plain'; |
1130
|
1022
|
|
|
|
|
2771
|
$para->[0] .= '-' . $over_type; |
1131
|
|
|
|
|
|
|
# Whew. Now fall thru and process it. |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
} elsif($para_type eq '=extend') { |
1135
|
|
|
|
|
|
|
# Well, might as well implement it here. |
1136
|
21
|
|
|
|
|
76
|
$self->_ponder_extend($para); |
1137
|
21
|
|
|
|
|
50
|
next; # and skip |
1138
|
|
|
|
|
|
|
} elsif($para_type eq '=encoding') { |
1139
|
|
|
|
|
|
|
# Not actually acted on here, but we catch errors here. |
1140
|
38
|
|
|
|
|
205
|
$self->_handle_encoding_second_level($para); |
1141
|
38
|
100
|
|
|
|
130
|
next unless $self->keep_encoding_directive; |
1142
|
35
|
|
|
|
|
77
|
$para_type = 'Plain'; |
1143
|
|
|
|
|
|
|
} elsif($para_type eq '~Verbatim') { |
1144
|
538
|
|
|
|
|
972
|
$para->[0] = 'Verbatim'; |
1145
|
538
|
|
|
|
|
810
|
$para_type = '?Verbatim'; |
1146
|
|
|
|
|
|
|
} elsif($para_type eq '~Para') { |
1147
|
2772
|
|
|
|
|
4592
|
$para->[0] = 'Para'; |
1148
|
2772
|
|
|
|
|
4243
|
$para_type = '?Plain'; |
1149
|
|
|
|
|
|
|
} elsif($para_type eq 'Data') { |
1150
|
30
|
|
|
|
|
56
|
$para->[0] = 'Data'; |
1151
|
30
|
|
|
|
|
45
|
$para_type = '?Data'; |
1152
|
|
|
|
|
|
|
} elsif( $para_type =~ s/^=//s |
1153
|
|
|
|
|
|
|
and defined( $para_type = $self->{'accept_directives'}{$para_type} ) |
1154
|
|
|
|
|
|
|
) { |
1155
|
738
|
|
|
|
|
1323
|
DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n"; |
1156
|
|
|
|
|
|
|
} else { |
1157
|
|
|
|
|
|
|
# An unknown directive! |
1158
|
12
|
|
|
|
|
37
|
$seen_legal_directive--; |
1159
|
|
|
|
|
|
|
DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n", |
1160
|
12
|
|
|
|
|
22
|
$para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) |
1161
|
|
|
|
|
|
|
; |
1162
|
|
|
|
|
|
|
$self->whine( |
1163
|
12
|
|
|
|
|
129
|
$para->[1]{'start_line'}, |
1164
|
|
|
|
|
|
|
"Unknown directive: $para->[0]" |
1165
|
|
|
|
|
|
|
); |
1166
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
# And maybe treat it as text instead of just letting it go? |
1168
|
12
|
|
|
|
|
51
|
next; |
1169
|
|
|
|
|
|
|
} |
1170
|
|
|
|
|
|
|
|
1171
|
5135
|
100
|
|
|
|
17627
|
if($para_type =~ s/^\?//s) { |
1172
|
3340
|
100
|
|
|
|
6775
|
if(! @$curr_open) { # usual case |
1173
|
2066
|
|
|
|
|
2841
|
DEBUG and print STDERR "Treating $para_type paragraph as such because stack is empty.\n"; |
1174
|
|
|
|
|
|
|
} else { |
1175
|
1274
|
|
|
|
|
3373
|
my @fors = grep $_->[0] eq '=for', @$curr_open; |
1176
|
|
|
|
|
|
|
DEBUG > 1 and print STDERR "Containing fors: ", |
1177
|
1274
|
|
|
|
|
1783
|
join(',', map $_->[1]{'target'}, @fors), "\n"; |
1178
|
|
|
|
|
|
|
|
1179
|
1274
|
100
|
|
|
|
2590
|
if(! @fors) { |
|
|
100
|
|
|
|
|
|
1180
|
1150
|
|
|
|
|
1718
|
DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n"; |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
#} elsif(grep $_->[1]{'~resolve'}, @fors) { |
1183
|
|
|
|
|
|
|
#} elsif(not grep !$_->[1]{'~resolve'}, @fors) { |
1184
|
|
|
|
|
|
|
} elsif( $fors[-1][1]{'~resolve'} ) { |
1185
|
|
|
|
|
|
|
# Look to the immediately containing for |
1186
|
|
|
|
|
|
|
|
1187
|
73
|
100
|
|
|
|
154
|
if($para_type eq 'Data') { |
1188
|
18
|
|
|
|
|
24
|
DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; |
1189
|
18
|
|
|
|
|
29
|
$para->[0] = 'Para'; |
1190
|
18
|
|
|
|
|
40
|
$para_type = 'Plain'; |
1191
|
|
|
|
|
|
|
} else { |
1192
|
55
|
|
|
|
|
85
|
DEBUG and print STDERR "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; |
1193
|
|
|
|
|
|
|
} |
1194
|
|
|
|
|
|
|
} else { |
1195
|
51
|
|
|
|
|
88
|
DEBUG and print STDERR "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; |
1196
|
51
|
|
|
|
|
136
|
$para->[0] = $para_type = 'Data'; |
1197
|
|
|
|
|
|
|
} |
1198
|
|
|
|
|
|
|
} |
1199
|
|
|
|
|
|
|
} |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
1202
|
5135
|
100
|
|
|
|
10340
|
if($para_type eq 'Plain') { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1203
|
4547
|
|
|
|
|
10516
|
$self->_ponder_Plain($para); |
1204
|
|
|
|
|
|
|
} elsif($para_type eq 'Verbatim') { |
1205
|
534
|
|
|
|
|
1429
|
$self->_ponder_Verbatim($para); |
1206
|
|
|
|
|
|
|
} elsif($para_type eq 'Data') { |
1207
|
54
|
|
|
|
|
174
|
$self->_ponder_Data($para); |
1208
|
|
|
|
|
|
|
} else { |
1209
|
0
|
|
|
|
|
0
|
die "\$para type is $para_type -- how did that happen?"; |
1210
|
|
|
|
|
|
|
# Shouldn't happen. |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
1214
|
5135
|
|
|
|
|
13931
|
$para->[0] =~ s/^[~=]//s; |
1215
|
|
|
|
|
|
|
|
1216
|
5135
|
|
|
|
|
7329
|
DEBUG and print STDERR "\n", pretty($para), "\n"; |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
# traverse the treelet (which might well be just one string scalar) |
1219
|
|
|
|
|
|
|
$self->{'content_seen'} ||= 1 if $seen_legal_directive |
1220
|
5135
|
100
|
100
|
|
|
22584
|
&& ! $self->{'~tried_gen_errata'}; |
|
|
|
100
|
|
|
|
|
1221
|
5135
|
|
|
|
|
12610
|
$self->_traverse_treelet_bit(@$para); |
1222
|
|
|
|
|
|
|
} |
1223
|
|
|
|
|
|
|
} |
1224
|
|
|
|
|
|
|
|
1225
|
6290
|
|
|
|
|
17108
|
return; |
1226
|
|
|
|
|
|
|
} |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
########################################################################### |
1229
|
|
|
|
|
|
|
# The sub-ponderers... |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
sub _ponder_for { |
1234
|
50
|
|
|
50
|
|
124
|
my ($self,$para,$curr_open,$paras) = @_; |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
# Fake it out as a begin/end |
1237
|
50
|
|
|
|
|
70
|
my $target; |
1238
|
|
|
|
|
|
|
|
1239
|
50
|
50
|
|
|
|
128
|
if(grep $_->[1]{'~ignore'}, @$curr_open) { |
1240
|
0
|
|
|
|
|
0
|
DEBUG > 1 and print STDERR "Ignoring ignorable =for\n"; |
1241
|
0
|
|
|
|
|
0
|
return 1; |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
|
1244
|
50
|
|
|
|
|
151
|
for(my $i = 2; $i < @$para; ++$i) { |
1245
|
50
|
50
|
|
|
|
234
|
if($para->[$i] =~ s/^\s*(\S+)\s*//s) { |
1246
|
50
|
|
|
|
|
107
|
$target = $1; |
1247
|
50
|
|
|
|
|
83
|
last; |
1248
|
|
|
|
|
|
|
} |
1249
|
|
|
|
|
|
|
} |
1250
|
50
|
50
|
|
|
|
115
|
unless(defined $target) { |
1251
|
|
|
|
|
|
|
$self->whine( |
1252
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1253
|
|
|
|
|
|
|
"=for without a target?" |
1254
|
|
|
|
|
|
|
); |
1255
|
0
|
|
|
|
|
0
|
return 1; |
1256
|
|
|
|
|
|
|
} |
1257
|
50
|
|
|
|
|
70
|
DEBUG > 1 and |
1258
|
|
|
|
|
|
|
print STDERR "Faking out a =for $target as a =begin $target / =end $target\n"; |
1259
|
|
|
|
|
|
|
|
1260
|
50
|
|
|
|
|
115
|
$para->[0] = 'Data'; |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
unshift @$paras, |
1263
|
|
|
|
|
|
|
['=begin', |
1264
|
|
|
|
|
|
|
{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, |
1265
|
|
|
|
|
|
|
$target, |
1266
|
|
|
|
|
|
|
], |
1267
|
|
|
|
|
|
|
$para, |
1268
|
|
|
|
|
|
|
['=end', |
1269
|
50
|
|
|
|
|
320
|
{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, |
1270
|
|
|
|
|
|
|
$target, |
1271
|
|
|
|
|
|
|
], |
1272
|
|
|
|
|
|
|
; |
1273
|
|
|
|
|
|
|
|
1274
|
50
|
|
|
|
|
185
|
return 1; |
1275
|
|
|
|
|
|
|
} |
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
sub _ponder_begin { |
1278
|
127
|
|
|
127
|
|
253
|
my ($self,$para,$curr_open,$paras) = @_; |
1279
|
127
|
|
|
|
|
333
|
my $content = join ' ', splice @$para, 2; |
1280
|
127
|
|
|
|
|
310
|
$content =~ s/^\s+//s; |
1281
|
127
|
|
|
|
|
283
|
$content =~ s/\s+$//s; |
1282
|
127
|
50
|
|
|
|
279
|
unless(length($content)) { |
1283
|
|
|
|
|
|
|
$self->whine( |
1284
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1285
|
|
|
|
|
|
|
"=begin without a target?" |
1286
|
|
|
|
|
|
|
); |
1287
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Ignoring targetless =begin\n"; |
1288
|
0
|
|
|
|
|
0
|
return 1; |
1289
|
|
|
|
|
|
|
} |
1290
|
|
|
|
|
|
|
|
1291
|
127
|
|
|
|
|
562
|
my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; |
1292
|
127
|
100
|
|
|
|
330
|
$para->[1]{'title'} = $title if ($title); |
1293
|
127
|
|
|
|
|
330
|
$para->[1]{'target'} = $target; # without any ':' |
1294
|
127
|
|
|
|
|
183
|
$content = $target; # strip off the title |
1295
|
|
|
|
|
|
|
|
1296
|
127
|
|
|
|
|
208
|
$content =~ s/^:!/!:/s; |
1297
|
127
|
|
|
|
|
173
|
my $neg; # whether this is a negation-match |
1298
|
127
|
100
|
|
|
|
395
|
$neg = 1 if $content =~ s/^!//s; |
1299
|
127
|
|
|
|
|
229
|
my $to_resolve; # whether to process formatting codes |
1300
|
127
|
100
|
|
|
|
322
|
$to_resolve = 1 if $content =~ s/^://s; |
1301
|
|
|
|
|
|
|
|
1302
|
127
|
|
|
|
|
189
|
my $dont_ignore; # whether this target matches us |
1303
|
|
|
|
|
|
|
|
1304
|
127
|
100
|
|
|
|
447
|
foreach my $target_name ( |
1305
|
|
|
|
|
|
|
split(',', $content, -1), |
1306
|
|
|
|
|
|
|
$neg ? () : '*' |
1307
|
|
|
|
|
|
|
) { |
1308
|
249
|
|
|
|
|
316
|
DEBUG > 2 and |
1309
|
|
|
|
|
|
|
print STDERR " Considering whether =begin $content matches $target_name\n"; |
1310
|
249
|
100
|
|
|
|
570
|
next unless $self->{'accept_targets'}{$target_name}; |
1311
|
|
|
|
|
|
|
|
1312
|
66
|
|
|
|
|
91
|
DEBUG > 2 and |
1313
|
|
|
|
|
|
|
print STDERR " It DOES match the acceptable target $target_name!\n"; |
1314
|
|
|
|
|
|
|
$to_resolve = 1 |
1315
|
66
|
100
|
|
|
|
179
|
if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; |
1316
|
66
|
|
|
|
|
105
|
$dont_ignore = 1; |
1317
|
66
|
|
|
|
|
155
|
$para->[1]{'target_matching'} = $target_name; |
1318
|
66
|
|
|
|
|
103
|
last; # stop looking at other target names |
1319
|
|
|
|
|
|
|
} |
1320
|
|
|
|
|
|
|
|
1321
|
127
|
100
|
|
|
|
304
|
if($neg) { |
1322
|
33
|
100
|
|
|
|
67
|
if( $dont_ignore ) { |
1323
|
6
|
|
|
|
|
12
|
$dont_ignore = ''; |
1324
|
6
|
|
|
|
|
11
|
delete $para->[1]{'target_matching'}; |
1325
|
6
|
|
|
|
|
9
|
DEBUG > 2 and print STDERR " But the leading ! means that this is a NON-match!\n"; |
1326
|
|
|
|
|
|
|
} else { |
1327
|
27
|
|
|
|
|
44
|
$dont_ignore = 1; |
1328
|
27
|
|
|
|
|
55
|
$para->[1]{'target_matching'} = '!'; |
1329
|
27
|
|
|
|
|
41
|
DEBUG > 2 and print STDERR " But the leading ! means that this IS a match!\n"; |
1330
|
|
|
|
|
|
|
} |
1331
|
|
|
|
|
|
|
} |
1332
|
|
|
|
|
|
|
|
1333
|
127
|
|
|
|
|
198
|
$para->[0] = '=for'; # Just what we happen to call these, internally |
1334
|
127
|
|
100
|
|
|
448
|
$para->[1]{'~really'} ||= '=begin'; |
1335
|
127
|
|
100
|
|
|
397
|
$para->[1]{'~ignore'} = (! $dont_ignore) || 0; |
1336
|
127
|
|
100
|
|
|
326
|
$para->[1]{'~resolve'} = $to_resolve || 0; |
1337
|
|
|
|
|
|
|
|
1338
|
127
|
|
|
|
|
152
|
DEBUG > 1 and print STDERR " Making note to ", $dont_ignore ? 'not ' : '', |
1339
|
|
|
|
|
|
|
"ignore contents of this region\n"; |
1340
|
127
|
|
|
|
|
170
|
DEBUG > 1 and $dont_ignore and print STDERR " Making note to treat contents as ", |
1341
|
|
|
|
|
|
|
($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; |
1342
|
127
|
|
|
|
|
176
|
DEBUG > 1 and print STDERR " (Stack now: ", $self->_dump_curr_open(), ")\n"; |
1343
|
|
|
|
|
|
|
|
1344
|
127
|
|
|
|
|
244
|
push @$curr_open, $para; |
1345
|
127
|
100
|
100
|
|
|
505
|
if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { |
1346
|
40
|
|
|
|
|
54
|
DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n"; |
1347
|
|
|
|
|
|
|
} else { |
1348
|
87
|
50
|
100
|
|
|
281
|
$self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; |
1349
|
87
|
|
|
|
|
291
|
$self->_handle_element_start((my $scratch='for'), $para->[1]); |
1350
|
|
|
|
|
|
|
} |
1351
|
|
|
|
|
|
|
|
1352
|
127
|
|
|
|
|
449
|
return 1; |
1353
|
|
|
|
|
|
|
} |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
sub _ponder_end { |
1356
|
130
|
|
|
130
|
|
293
|
my ($self,$para,$curr_open,$paras) = @_; |
1357
|
130
|
|
|
|
|
373
|
my $content = join ' ', splice @$para, 2; |
1358
|
130
|
|
|
|
|
331
|
$content =~ s/^\s+//s; |
1359
|
130
|
|
|
|
|
292
|
$content =~ s/\s+$//s; |
1360
|
130
|
|
|
|
|
182
|
DEBUG and print STDERR "Ogling '=end $content' directive\n"; |
1361
|
|
|
|
|
|
|
|
1362
|
130
|
50
|
|
|
|
281
|
unless(length($content)) { |
1363
|
|
|
|
|
|
|
$self->whine( |
1364
|
|
|
|
|
|
|
$para->[1]{'start_line'}, |
1365
|
|
|
|
|
|
|
"'=end' without a target?" . ( |
1366
|
|
|
|
|
|
|
( @$curr_open and $curr_open->[-1][0] eq '=for' ) |
1367
|
0
|
0
|
0
|
|
|
0
|
? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) |
1368
|
|
|
|
|
|
|
: '' |
1369
|
|
|
|
|
|
|
) |
1370
|
|
|
|
|
|
|
); |
1371
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Ignoring targetless =end\n"; |
1372
|
0
|
|
|
|
|
0
|
return 1; |
1373
|
|
|
|
|
|
|
} |
1374
|
|
|
|
|
|
|
|
1375
|
130
|
50
|
|
|
|
404
|
unless($content =~ m/^\S+$/) { # i.e., unless it's one word |
1376
|
|
|
|
|
|
|
$self->whine( |
1377
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1378
|
|
|
|
|
|
|
"'=end $content' is invalid. (Stack: " |
1379
|
|
|
|
|
|
|
. $self->_dump_curr_open() . ')' |
1380
|
|
|
|
|
|
|
); |
1381
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; |
1382
|
0
|
|
|
|
|
0
|
return 1; |
1383
|
|
|
|
|
|
|
} |
1384
|
|
|
|
|
|
|
|
1385
|
130
|
50
|
33
|
|
|
496
|
unless(@$curr_open and $curr_open->[-1][0] eq '=for') { |
1386
|
|
|
|
|
|
|
$self->whine( |
1387
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1388
|
|
|
|
|
|
|
"=end $content without matching =begin. (Stack: " |
1389
|
|
|
|
|
|
|
. $self->_dump_curr_open() . ')' |
1390
|
|
|
|
|
|
|
); |
1391
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; |
1392
|
0
|
|
|
|
|
0
|
return 1; |
1393
|
|
|
|
|
|
|
} |
1394
|
|
|
|
|
|
|
|
1395
|
130
|
100
|
|
|
|
324
|
unless($content eq $curr_open->[-1][1]{'target'}) { |
1396
|
|
|
|
|
|
|
$self->whine( |
1397
|
|
|
|
|
|
|
$para->[1]{'start_line'}, |
1398
|
|
|
|
|
|
|
"=end $content doesn't match =begin " |
1399
|
3
|
|
|
|
|
20
|
. $curr_open->[-1][1]{'target'} |
1400
|
|
|
|
|
|
|
. ". (Stack: " |
1401
|
|
|
|
|
|
|
. $self->_dump_curr_open() . ')' |
1402
|
|
|
|
|
|
|
); |
1403
|
3
|
|
|
|
|
4
|
DEBUG and print STDERR "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; |
1404
|
3
|
|
|
|
|
13
|
return 1; |
1405
|
|
|
|
|
|
|
} |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
# Else it's okay to close... |
1408
|
127
|
100
|
|
|
|
376
|
if(grep $_->[1]{'~ignore'}, @$curr_open) { |
1409
|
40
|
|
|
|
|
52
|
DEBUG > 1 and print STDERR "Not firing any event for this =end $content because in an ignored region\n"; |
1410
|
|
|
|
|
|
|
# And that may be because of this to-be-closed =for region, or some |
1411
|
|
|
|
|
|
|
# other one, but it doesn't matter. |
1412
|
|
|
|
|
|
|
} else { |
1413
|
87
|
|
|
|
|
165
|
$curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; |
1414
|
|
|
|
|
|
|
# what's that for? |
1415
|
|
|
|
|
|
|
|
1416
|
87
|
50
|
50
|
|
|
266
|
$self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; |
1417
|
87
|
|
|
|
|
274
|
$self->_handle_element_end( my $scratch = 'for', $para->[1]); |
1418
|
|
|
|
|
|
|
} |
1419
|
127
|
|
|
|
|
208
|
DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; |
1420
|
127
|
|
|
|
|
203
|
pop @$curr_open; |
1421
|
|
|
|
|
|
|
|
1422
|
127
|
|
|
|
|
602
|
return 1; |
1423
|
|
|
|
|
|
|
} |
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
sub _ponder_doc_end { |
1426
|
940
|
|
|
940
|
|
2277
|
my ($self,$para,$curr_open,$paras) = @_; |
1427
|
940
|
100
|
|
|
|
1966
|
if(@$curr_open) { # Deal with things left open |
1428
|
10
|
|
|
|
|
16
|
DEBUG and print STDERR "Stack is nonempty at end-document: (", |
1429
|
|
|
|
|
|
|
$self->_dump_curr_open(), ")\n"; |
1430
|
|
|
|
|
|
|
|
1431
|
10
|
|
|
|
|
19
|
DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n"; |
1432
|
10
|
|
|
|
|
56
|
unshift @$paras, $self->_closers_for_all_curr_open; |
1433
|
|
|
|
|
|
|
# Make sure there is exactly one ~end in the parastack, at the end: |
1434
|
10
|
|
|
|
|
56
|
@$paras = grep $_->[0] ne '~end', @$paras; |
1435
|
10
|
|
|
|
|
27
|
push @$paras, $para, $para; |
1436
|
|
|
|
|
|
|
# We need two -- once for the next cycle where we |
1437
|
|
|
|
|
|
|
# generate errata, and then another to be at the end |
1438
|
|
|
|
|
|
|
# when that loop back around to process the errata. |
1439
|
10
|
|
|
|
|
44
|
return 1; |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
} else { |
1442
|
930
|
|
|
|
|
1318
|
DEBUG and print STDERR "Okay, stack is empty now.\n"; |
1443
|
|
|
|
|
|
|
} |
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
# Try generating errata section, if applicable |
1446
|
930
|
100
|
|
|
|
2177
|
unless($self->{'~tried_gen_errata'}) { |
1447
|
890
|
|
|
|
|
1728
|
$self->{'~tried_gen_errata'} = 1; |
1448
|
890
|
|
|
|
|
2292
|
my @extras = $self->_gen_errata(); |
1449
|
890
|
100
|
|
|
|
2119
|
if(@extras) { |
1450
|
40
|
|
|
|
|
118
|
unshift @$paras, @extras; |
1451
|
40
|
|
|
|
|
82
|
DEBUG and print STDERR "Generated errata... relooping...\n"; |
1452
|
40
|
|
|
|
|
199
|
return 1; # I.e., loop around again to process these fake-o paragraphs |
1453
|
|
|
|
|
|
|
} |
1454
|
|
|
|
|
|
|
} |
1455
|
|
|
|
|
|
|
|
1456
|
890
|
|
|
|
|
1716
|
splice @$paras; # Well, that's that for this paragraph buffer. |
1457
|
890
|
|
|
|
|
1258
|
DEBUG and print STDERR "Throwing end-document event.\n"; |
1458
|
|
|
|
|
|
|
|
1459
|
890
|
|
|
|
|
2790
|
$self->_handle_element_end( my $scratch = 'Document' ); |
1460
|
890
|
|
|
|
|
3175
|
return 1; # Hasta la byebye |
1461
|
|
|
|
|
|
|
} |
1462
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
sub _ponder_pod { |
1464
|
578
|
|
|
578
|
|
1287
|
my ($self,$para,$curr_open,$paras) = @_; |
1465
|
|
|
|
|
|
|
$self->whine( |
1466
|
578
|
50
|
|
|
|
1309
|
$para->[1]{'start_line'}, |
1467
|
|
|
|
|
|
|
"=pod directives shouldn't be over one line long! Ignoring all " |
1468
|
|
|
|
|
|
|
. (@$para - 2) . " lines of content" |
1469
|
|
|
|
|
|
|
) if @$para > 3; |
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
# Content ignored unless 'pod_handler' is set |
1472
|
578
|
100
|
|
|
|
1304
|
if (my $pod_handler = $self->{'pod_handler'}) { |
1473
|
6
|
|
|
|
|
22
|
my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2]; |
1474
|
6
|
100
|
|
|
|
18
|
$line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output |
1475
|
6
|
|
|
|
|
15
|
$pod_handler->($line, $line_num, $self); |
1476
|
|
|
|
|
|
|
} |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
# The surrounding methods set content_seen, so let us remain consistent. |
1479
|
|
|
|
|
|
|
# I do not know why it was not here before -- should it not be here? |
1480
|
|
|
|
|
|
|
# $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; |
1481
|
|
|
|
|
|
|
|
1482
|
578
|
|
|
|
|
1614
|
return; |
1483
|
|
|
|
|
|
|
} |
1484
|
|
|
|
|
|
|
|
1485
|
|
|
|
|
|
|
sub _ponder_over { |
1486
|
213
|
|
|
213
|
|
569
|
my ($self,$para,$curr_open,$paras) = @_; |
1487
|
213
|
50
|
|
|
|
497
|
return 1 unless @$paras; |
1488
|
213
|
|
|
|
|
345
|
my $list_type; |
1489
|
|
|
|
|
|
|
|
1490
|
213
|
100
|
|
|
|
666
|
if($paras->[0][0] eq '=item') { # most common case |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1491
|
185
|
|
|
|
|
717
|
$list_type = $self->_get_initial_item_type($paras->[0]); |
1492
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
} elsif($paras->[0][0] eq '=back') { |
1494
|
|
|
|
|
|
|
# Ignore empty lists by default |
1495
|
6
|
100
|
|
|
|
19
|
if ($self->{'parse_empty_lists'}) { |
1496
|
2
|
|
|
|
|
4
|
$list_type = 'empty'; |
1497
|
|
|
|
|
|
|
} else { |
1498
|
4
|
|
|
|
|
8
|
shift @$paras; |
1499
|
4
|
|
|
|
|
19
|
return 1; |
1500
|
|
|
|
|
|
|
} |
1501
|
|
|
|
|
|
|
} elsif($paras->[0][0] eq '~end') { |
1502
|
|
|
|
|
|
|
$self->whine( |
1503
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1504
|
|
|
|
|
|
|
"=over is the last thing in the document?!" |
1505
|
|
|
|
|
|
|
); |
1506
|
0
|
|
|
|
|
0
|
return 1; # But feh, ignore it. |
1507
|
|
|
|
|
|
|
} else { |
1508
|
22
|
|
|
|
|
36
|
$list_type = 'block'; |
1509
|
|
|
|
|
|
|
} |
1510
|
209
|
|
|
|
|
484
|
$para->[1]{'~type'} = $list_type; |
1511
|
209
|
|
|
|
|
473
|
push @$curr_open, $para; |
1512
|
|
|
|
|
|
|
# yes, we reuse the paragraph as a stack item |
1513
|
|
|
|
|
|
|
|
1514
|
209
|
|
|
|
|
719
|
my $content = join ' ', splice @$para, 2; |
1515
|
209
|
|
|
|
|
527
|
$para->[1]{'~orig_content'} = $content; |
1516
|
209
|
|
|
|
|
307
|
my $overness; |
1517
|
209
|
100
|
|
|
|
1081
|
if($content =~ m/^\s*$/s) { |
|
|
50
|
|
|
|
|
|
1518
|
119
|
|
|
|
|
352
|
$para->[1]{'indent'} = 4; |
1519
|
|
|
|
|
|
|
} elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { |
1520
|
68
|
|
|
68
|
|
496223
|
no integer; |
|
68
|
|
|
|
|
216
|
|
|
68
|
|
|
|
|
472
|
|
1521
|
90
|
|
|
|
|
310
|
$para->[1]{'indent'} = $1; |
1522
|
90
|
50
|
|
|
|
279
|
if($1 == 0) { |
1523
|
|
|
|
|
|
|
$self->whine( |
1524
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1525
|
|
|
|
|
|
|
"Can't have a 0 in =over $content" |
1526
|
|
|
|
|
|
|
); |
1527
|
0
|
|
|
|
|
0
|
$para->[1]{'indent'} = 4; |
1528
|
|
|
|
|
|
|
} |
1529
|
|
|
|
|
|
|
} else { |
1530
|
|
|
|
|
|
|
$self->whine( |
1531
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1532
|
|
|
|
|
|
|
"=over should be: '=over' or '=over positive_number'" |
1533
|
|
|
|
|
|
|
); |
1534
|
0
|
|
|
|
|
0
|
$para->[1]{'indent'} = 4; |
1535
|
|
|
|
|
|
|
} |
1536
|
209
|
|
|
|
|
336
|
DEBUG > 1 and print STDERR "=over found of type $list_type\n"; |
1537
|
|
|
|
|
|
|
|
1538
|
209
|
100
|
100
|
|
|
812
|
$self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; |
1539
|
209
|
|
|
|
|
944
|
$self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); |
1540
|
|
|
|
|
|
|
|
1541
|
209
|
|
|
|
|
776
|
return; |
1542
|
|
|
|
|
|
|
} |
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
sub _ponder_back { |
1545
|
209
|
|
|
209
|
|
557
|
my ($self,$para,$curr_open,$paras) = @_; |
1546
|
|
|
|
|
|
|
# TODO: fire off or or ?? |
1547
|
|
|
|
|
|
|
|
1548
|
209
|
|
|
|
|
582
|
my $content = join ' ', splice @$para, 2; |
1549
|
209
|
50
|
|
|
|
561
|
if($content =~ m/\S/) { |
1550
|
|
|
|
|
|
|
$self->whine( |
1551
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1552
|
|
|
|
|
|
|
"=back doesn't take any parameters, but you said =back $content" |
1553
|
|
|
|
|
|
|
); |
1554
|
|
|
|
|
|
|
} |
1555
|
|
|
|
|
|
|
|
1556
|
209
|
50
|
33
|
|
|
939
|
if(@$curr_open and $curr_open->[-1][0] eq '=over') { |
1557
|
209
|
|
|
|
|
322
|
DEBUG > 1 and print STDERR "=back happily closes matching =over\n"; |
1558
|
|
|
|
|
|
|
# Expected case: we're closing the most recently opened thing |
1559
|
|
|
|
|
|
|
#my $over = pop @$curr_open; |
1560
|
209
|
100
|
50
|
|
|
702
|
$self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'}; |
1561
|
|
|
|
|
|
|
$self->_handle_element_end( my $scratch = |
1562
|
209
|
|
|
|
|
933
|
'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] |
1563
|
|
|
|
|
|
|
); |
1564
|
|
|
|
|
|
|
} else { |
1565
|
0
|
|
|
|
|
0
|
DEBUG > 1 and print STDERR "=back found without a matching =over. Stack: (", |
1566
|
|
|
|
|
|
|
join(', ', map $_->[0], @$curr_open), ").\n"; |
1567
|
|
|
|
|
|
|
$self->whine( |
1568
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1569
|
|
|
|
|
|
|
'=back without =over' |
1570
|
|
|
|
|
|
|
); |
1571
|
0
|
|
|
|
|
0
|
return 1; # and ignore it |
1572
|
|
|
|
|
|
|
} |
1573
|
|
|
|
|
|
|
} |
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
sub _ponder_item { |
1576
|
0
|
|
|
0
|
|
0
|
my ($self,$para,$curr_open,$paras) = @_; |
1577
|
0
|
|
|
|
|
0
|
my $over; |
1578
|
0
|
0
|
0
|
|
|
0
|
unless(@$curr_open and |
1579
|
0
|
|
|
|
|
0
|
$over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { |
1580
|
|
|
|
|
|
|
$self->whine( |
1581
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1582
|
|
|
|
|
|
|
"'=item' outside of any '=over'" |
1583
|
|
|
|
|
|
|
); |
1584
|
|
|
|
|
|
|
unshift @$paras, |
1585
|
0
|
|
|
|
|
0
|
['=over', {'start_line' => $para->[1]{'start_line'}}, ''], |
1586
|
|
|
|
|
|
|
$para |
1587
|
|
|
|
|
|
|
; |
1588
|
0
|
|
|
|
|
0
|
return 1; |
1589
|
|
|
|
|
|
|
} |
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
|
1592
|
0
|
|
|
|
|
0
|
my $over_type = $over->[1]{'~type'}; |
1593
|
|
|
|
|
|
|
|
1594
|
0
|
0
|
|
|
|
0
|
if(!$over_type) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
# Shouldn't happen1 |
1596
|
|
|
|
|
|
|
die "Typeless over in stack, starting at line " |
1597
|
0
|
|
|
|
|
0
|
. $over->[1]{'start_line'}; |
1598
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
} elsif($over_type eq 'block') { |
1600
|
0
|
0
|
|
|
|
0
|
unless($curr_open->[-1][1]{'~bitched_about'}) { |
1601
|
0
|
|
|
|
|
0
|
$curr_open->[-1][1]{'~bitched_about'} = 1; |
1602
|
|
|
|
|
|
|
$self->whine( |
1603
|
|
|
|
|
|
|
$curr_open->[-1][1]{'start_line'}, |
1604
|
|
|
|
|
|
|
"You can't have =items (as at line " |
1605
|
0
|
|
|
|
|
0
|
. $para->[1]{'start_line'} |
1606
|
|
|
|
|
|
|
. ") unless the first thing after the =over is an =item" |
1607
|
|
|
|
|
|
|
); |
1608
|
|
|
|
|
|
|
} |
1609
|
|
|
|
|
|
|
# Just turn it into a paragraph and reconsider it |
1610
|
0
|
|
|
|
|
0
|
$para->[0] = '~Para'; |
1611
|
0
|
|
|
|
|
0
|
unshift @$paras, $para; |
1612
|
0
|
|
|
|
|
0
|
return 1; |
1613
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
} elsif($over_type eq 'text') { |
1615
|
0
|
|
|
|
|
0
|
my $item_type = $self->_get_item_type($para); |
1616
|
|
|
|
|
|
|
# That kills the content of the item if it's a number or bullet. |
1617
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; |
1618
|
|
|
|
|
|
|
|
1619
|
0
|
0
|
0
|
|
|
0
|
if($item_type eq 'text') { |
|
|
0
|
|
|
|
|
|
1620
|
|
|
|
|
|
|
# Nothing special needs doing for 'text' |
1621
|
|
|
|
|
|
|
} elsif($item_type eq 'number' or $item_type eq 'bullet') { |
1622
|
|
|
|
|
|
|
$self->whine( |
1623
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1624
|
|
|
|
|
|
|
"Expected text after =item, not a $item_type" |
1625
|
|
|
|
|
|
|
); |
1626
|
|
|
|
|
|
|
# Undo our clobbering: |
1627
|
0
|
|
|
|
|
0
|
push @$para, $para->[1]{'~orig_content'}; |
1628
|
0
|
|
|
|
|
0
|
delete $para->[1]{'number'}; |
1629
|
|
|
|
|
|
|
# Only a PROPER item-number element is allowed |
1630
|
|
|
|
|
|
|
# to have a number attribute. |
1631
|
|
|
|
|
|
|
} else { |
1632
|
0
|
|
|
|
|
0
|
die "Unhandled item type $item_type"; # should never happen |
1633
|
|
|
|
|
|
|
} |
1634
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
# =item-text thingies don't need any assimilation, it seems. |
1636
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
} elsif($over_type eq 'number') { |
1638
|
0
|
|
|
|
|
0
|
my $item_type = $self->_get_item_type($para); |
1639
|
|
|
|
|
|
|
# That kills the content of the item if it's a number or bullet. |
1640
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; |
1641
|
|
|
|
|
|
|
|
1642
|
0
|
|
|
|
|
0
|
my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; |
1643
|
|
|
|
|
|
|
|
1644
|
0
|
0
|
|
|
|
0
|
if($item_type eq 'bullet') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
# Hm, it's not numeric. Correct for this. |
1646
|
0
|
|
|
|
|
0
|
$para->[1]{'number'} = $expected_value; |
1647
|
|
|
|
|
|
|
$self->whine( |
1648
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1649
|
|
|
|
|
|
|
"Expected '=item $expected_value'" |
1650
|
|
|
|
|
|
|
); |
1651
|
0
|
|
|
|
|
0
|
push @$para, $para->[1]{'~orig_content'}; |
1652
|
|
|
|
|
|
|
# restore the bullet, blocking the assimilation of next para |
1653
|
|
|
|
|
|
|
|
1654
|
|
|
|
|
|
|
} elsif($item_type eq 'text') { |
1655
|
|
|
|
|
|
|
# Hm, it's not numeric. Correct for this. |
1656
|
0
|
|
|
|
|
0
|
$para->[1]{'number'} = $expected_value; |
1657
|
|
|
|
|
|
|
$self->whine( |
1658
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1659
|
|
|
|
|
|
|
"Expected '=item $expected_value'" |
1660
|
|
|
|
|
|
|
); |
1661
|
|
|
|
|
|
|
# Text content will still be there and will block next ~Para |
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
} elsif($item_type ne 'number') { |
1664
|
0
|
|
|
|
|
0
|
die "Unknown item type $item_type"; # should never happen |
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
} elsif($expected_value == $para->[1]{'number'}) { |
1667
|
0
|
|
|
|
|
0
|
DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; |
1668
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
} else { |
1670
|
0
|
|
|
|
|
0
|
DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, |
1671
|
|
|
|
|
|
|
" instead of the expected value of $expected_value\n"; |
1672
|
|
|
|
|
|
|
$self->whine( |
1673
|
|
|
|
|
|
|
$para->[1]{'start_line'}, |
1674
|
0
|
|
|
|
|
0
|
"You have '=item " . $para->[1]{'number'} . |
1675
|
|
|
|
|
|
|
"' instead of the expected '=item $expected_value'" |
1676
|
|
|
|
|
|
|
); |
1677
|
0
|
|
|
|
|
0
|
$para->[1]{'number'} = $expected_value; # correcting!! |
1678
|
|
|
|
|
|
|
} |
1679
|
|
|
|
|
|
|
|
1680
|
0
|
0
|
|
|
|
0
|
if(@$para == 2) { |
1681
|
|
|
|
|
|
|
# For the cases where we /didn't/ push to @$para |
1682
|
0
|
0
|
|
|
|
0
|
if($paras->[0][0] eq '~Para') { |
1683
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; |
1684
|
0
|
|
|
|
|
0
|
push @$para, splice @{shift @$paras},2; |
|
0
|
|
|
|
|
0
|
|
1685
|
|
|
|
|
|
|
} else { |
1686
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; |
1687
|
0
|
|
|
|
|
0
|
push @$para, ''; # Just so it's not contentless |
1688
|
|
|
|
|
|
|
} |
1689
|
|
|
|
|
|
|
} |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
} elsif($over_type eq 'bullet') { |
1693
|
0
|
|
|
|
|
0
|
my $item_type = $self->_get_item_type($para); |
1694
|
|
|
|
|
|
|
# That kills the content of the item if it's a number or bullet. |
1695
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; |
1696
|
|
|
|
|
|
|
|
1697
|
0
|
0
|
|
|
|
0
|
if($item_type eq 'bullet') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
# as expected! |
1699
|
|
|
|
|
|
|
|
1700
|
0
|
0
|
|
|
|
0
|
if( $para->[1]{'~_freaky_para_hack'} ) { |
1701
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; |
1702
|
0
|
|
|
|
|
0
|
push @$para, $para->[1]{'~_freaky_para_hack'}; |
1703
|
|
|
|
|
|
|
} |
1704
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
} elsif($item_type eq 'number') { |
1706
|
|
|
|
|
|
|
$self->whine( |
1707
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1708
|
|
|
|
|
|
|
"Expected '=item *'" |
1709
|
|
|
|
|
|
|
); |
1710
|
0
|
|
|
|
|
0
|
push @$para, $para->[1]{'~orig_content'}; |
1711
|
|
|
|
|
|
|
# and block assimilation of the next paragraph |
1712
|
0
|
|
|
|
|
0
|
delete $para->[1]{'number'}; |
1713
|
|
|
|
|
|
|
# Only a PROPER item-number element is allowed |
1714
|
|
|
|
|
|
|
# to have a number attribute. |
1715
|
|
|
|
|
|
|
} elsif($item_type eq 'text') { |
1716
|
|
|
|
|
|
|
$self->whine( |
1717
|
0
|
|
|
|
|
0
|
$para->[1]{'start_line'}, |
1718
|
|
|
|
|
|
|
"Expected '=item *'" |
1719
|
|
|
|
|
|
|
); |
1720
|
|
|
|
|
|
|
# But doesn't need processing. But it'll block assimilation |
1721
|
|
|
|
|
|
|
# of the next para. |
1722
|
|
|
|
|
|
|
} else { |
1723
|
0
|
|
|
|
|
0
|
die "Unhandled item type $item_type"; # should never happen |
1724
|
|
|
|
|
|
|
} |
1725
|
|
|
|
|
|
|
|
1726
|
0
|
0
|
|
|
|
0
|
if(@$para == 2) { |
1727
|
|
|
|
|
|
|
# For the cases where we /didn't/ push to @$para |
1728
|
0
|
0
|
|
|
|
0
|
if($paras->[0][0] eq '~Para') { |
1729
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; |
1730
|
0
|
|
|
|
|
0
|
push @$para, splice @{shift @$paras},2; |
|
0
|
|
|
|
|
0
|
|
1731
|
|
|
|
|
|
|
} else { |
1732
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; |
1733
|
0
|
|
|
|
|
0
|
push @$para, ''; # Just so it's not contentless |
1734
|
|
|
|
|
|
|
} |
1735
|
|
|
|
|
|
|
} |
1736
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
} else { |
1738
|
0
|
|
|
|
|
0
|
die "Unhandled =over type \"$over_type\"?"; |
1739
|
|
|
|
|
|
|
# Shouldn't happen! |
1740
|
|
|
|
|
|
|
} |
1741
|
0
|
|
|
|
|
0
|
$para->[0] .= '-' . $over_type; |
1742
|
|
|
|
|
|
|
|
1743
|
0
|
|
|
|
|
0
|
return; |
1744
|
|
|
|
|
|
|
} |
1745
|
|
|
|
|
|
|
|
1746
|
|
|
|
|
|
|
sub _ponder_Plain { |
1747
|
4547
|
|
|
4547
|
|
8598
|
my ($self,$para) = @_; |
1748
|
4547
|
|
|
|
|
6540
|
DEBUG and print STDERR " giving plain treatment...\n"; |
1749
|
4547
|
100
|
100
|
|
|
25638
|
unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) |
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
1750
|
|
|
|
|
|
|
or $para->[1]{'~cooked'} |
1751
|
|
|
|
|
|
|
) { |
1752
|
|
|
|
|
|
|
push @$para, |
1753
|
4442
|
|
|
|
|
6911
|
@{$self->_make_treelet( |
1754
|
|
|
|
|
|
|
join("\n", splice(@$para, 2)), |
1755
|
4442
|
|
|
|
|
20743
|
$para->[1]{'start_line'} |
1756
|
|
|
|
|
|
|
)}; |
1757
|
|
|
|
|
|
|
} |
1758
|
|
|
|
|
|
|
# Empty paragraphs don't need a treelet for any reason I can see. |
1759
|
|
|
|
|
|
|
# And precooked paragraphs already have a treelet. |
1760
|
4547
|
|
|
|
|
9488
|
return; |
1761
|
|
|
|
|
|
|
} |
1762
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
sub _ponder_Verbatim { |
1764
|
534
|
|
|
534
|
|
1088
|
my ($self,$para) = @_; |
1765
|
534
|
|
|
|
|
734
|
DEBUG and print STDERR " giving verbatim treatment...\n"; |
1766
|
|
|
|
|
|
|
|
1767
|
534
|
|
|
|
|
1148
|
$para->[1]{'xml:space'} = 'preserve'; |
1768
|
|
|
|
|
|
|
|
1769
|
534
|
100
|
|
|
|
1232
|
unless ($self->{'_output_is_for_JustPod'}) { |
1770
|
|
|
|
|
|
|
# Fix illegal settings for expand_verbatim_tabs() |
1771
|
|
|
|
|
|
|
# This is because this module doesn't do input error checking, but khw |
1772
|
|
|
|
|
|
|
# doesn't want to add yet another instance of that. |
1773
|
453
|
|
|
|
|
1312
|
my $tab_width = $self->expand_verbatim_tabs; |
1774
|
453
|
100
|
100
|
|
|
2014
|
$tab_width = $self->expand_verbatim_tabs(8) |
1775
|
|
|
|
|
|
|
if ! defined $tab_width |
1776
|
|
|
|
|
|
|
|| $tab_width =~ /\D/; |
1777
|
|
|
|
|
|
|
|
1778
|
453
|
|
|
|
|
1136
|
my $indent = $self->strip_verbatim_indent; |
1779
|
453
|
100
|
100
|
|
|
1091
|
if ($indent && ref $indent eq 'CODE') { |
1780
|
10
|
|
|
|
|
14
|
my @shifted = (shift @{$para}, shift @{$para}); |
|
10
|
|
|
|
|
18
|
|
|
10
|
|
|
|
|
31
|
|
1781
|
10
|
|
|
|
|
32
|
$indent = $indent->($para); |
1782
|
10
|
|
|
|
|
106
|
unshift @{$para}, @shifted; |
|
10
|
|
|
|
|
30
|
|
1783
|
|
|
|
|
|
|
} |
1784
|
|
|
|
|
|
|
|
1785
|
453
|
|
|
|
|
1144
|
for(my $i = 2; $i < @$para; $i++) { |
1786
|
2077
|
|
|
|
|
3227
|
foreach my $line ($para->[$i]) { # just for aliasing |
1787
|
|
|
|
|
|
|
# Strip indentation. |
1788
|
2077
|
100
|
|
|
|
3491
|
$line =~ s/^\Q$indent// if $indent; |
1789
|
2077
|
100
|
|
|
|
3315
|
next unless $tab_width; |
1790
|
|
|
|
|
|
|
|
1791
|
|
|
|
|
|
|
# This is commented out because of github issue #85, and the |
1792
|
|
|
|
|
|
|
# current maintainers don't know why it was there in the first |
1793
|
|
|
|
|
|
|
# place. |
1794
|
|
|
|
|
|
|
#&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); |
1795
|
2076
|
|
|
|
|
5882
|
while( $line =~ |
1796
|
|
|
|
|
|
|
# Sort of adapted from Text::Tabs. |
1797
|
189
|
|
|
|
|
1253
|
s/^([^\t]*)(\t+)/$1.(" " x ((length($2) |
1798
|
|
|
|
|
|
|
* $tab_width) |
1799
|
|
|
|
|
|
|
-(length($1) % $tab_width)))/e |
1800
|
|
|
|
|
|
|
) {} |
1801
|
|
|
|
|
|
|
|
1802
|
|
|
|
|
|
|
# TODO: whinge about (or otherwise treat) unindented or overlong lines |
1803
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
} |
1805
|
|
|
|
|
|
|
} |
1806
|
|
|
|
|
|
|
} |
1807
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
# Now the VerbatimFormatted hoodoo... |
1809
|
534
|
100
|
66
|
|
|
2251
|
if( $self->{'accept_codes'} and |
|
|
100
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
$self->{'accept_codes'}{'VerbatimFormatted'} |
1811
|
|
|
|
|
|
|
) { |
1812
|
103
|
|
100
|
|
|
443
|
while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } |
|
76
|
|
|
|
|
285
|
|
1813
|
|
|
|
|
|
|
# Kill any number of terminal newlines |
1814
|
103
|
|
|
|
|
293
|
$self->_verbatim_format($para); |
1815
|
|
|
|
|
|
|
} elsif ($self->{'codes_in_verbatim'}) { |
1816
|
|
|
|
|
|
|
push @$para, |
1817
|
2
|
|
|
|
|
10
|
@{$self->_make_treelet( |
1818
|
|
|
|
|
|
|
join("\n", splice(@$para, 2)), |
1819
|
2
|
|
|
|
|
22
|
$para->[1]{'start_line'}, $para->[1]{'xml:space'} |
1820
|
|
|
|
|
|
|
)}; |
1821
|
2
|
|
|
|
|
8
|
$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines |
1822
|
|
|
|
|
|
|
} else { |
1823
|
429
|
100
|
|
|
|
1939
|
push @$para, join "\n", splice(@$para, 2) if @$para > 3; |
1824
|
429
|
|
|
|
|
2691
|
$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines |
1825
|
|
|
|
|
|
|
} |
1826
|
534
|
|
|
|
|
1134
|
return; |
1827
|
|
|
|
|
|
|
} |
1828
|
|
|
|
|
|
|
|
1829
|
|
|
|
|
|
|
sub _ponder_Data { |
1830
|
54
|
|
|
54
|
|
113
|
my ($self,$para) = @_; |
1831
|
54
|
|
|
|
|
76
|
DEBUG and print STDERR " giving data treatment...\n"; |
1832
|
54
|
|
|
|
|
107
|
$para->[1]{'xml:space'} = 'preserve'; |
1833
|
54
|
100
|
|
|
|
164
|
push @$para, join "\n", splice(@$para, 2) if @$para > 3; |
1834
|
54
|
|
|
|
|
102
|
return; |
1835
|
|
|
|
|
|
|
} |
1836
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
|
1840
|
|
|
|
|
|
|
########################################################################### |
1841
|
|
|
|
|
|
|
|
1842
|
|
|
|
|
|
|
sub _traverse_treelet_bit { # for use only by the routine above |
1843
|
8010
|
|
|
8010
|
|
17045
|
my($self, $name) = splice @_,0,2; |
1844
|
|
|
|
|
|
|
|
1845
|
8010
|
|
|
|
|
10339
|
my $scratch; |
1846
|
8010
|
|
|
|
|
24844
|
$self->_handle_element_start(($scratch=$name), shift @_); |
1847
|
|
|
|
|
|
|
|
1848
|
8010
|
|
|
|
|
17130
|
while (@_) { |
1849
|
12846
|
|
|
|
|
19857
|
my $x = shift; |
1850
|
12846
|
100
|
|
|
|
21384
|
if (ref($x)) { |
1851
|
2881
|
|
|
|
|
5926
|
&_traverse_treelet_bit($self, @$x); |
1852
|
|
|
|
|
|
|
} else { |
1853
|
9965
|
|
100
|
|
|
25237
|
$x .= shift while @_ && !ref($_[0]); |
1854
|
9965
|
|
|
|
|
20973
|
$self->_handle_text($x); |
1855
|
|
|
|
|
|
|
} |
1856
|
|
|
|
|
|
|
} |
1857
|
|
|
|
|
|
|
|
1858
|
8010
|
|
|
|
|
23415
|
$self->_handle_element_end($scratch=$name); |
1859
|
8010
|
|
|
|
|
21314
|
return; |
1860
|
|
|
|
|
|
|
} |
1861
|
|
|
|
|
|
|
|
1862
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
1863
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
sub _closers_for_all_curr_open { |
1865
|
10
|
|
|
10
|
|
23
|
my $self = $_[0]; |
1866
|
10
|
|
|
|
|
18
|
my @closers; |
1867
|
10
|
50
|
|
|
|
16
|
foreach my $still_open (@{ $self->{'curr_open'} || return }) { |
|
10
|
|
|
|
|
73
|
|
1868
|
15
|
|
|
|
|
43
|
my @copy = @$still_open; |
1869
|
15
|
|
|
|
|
26
|
$copy[1] = {%{ $copy[1] }}; |
|
15
|
|
|
|
|
79
|
|
1870
|
|
|
|
|
|
|
#$copy[1]{'start_line'} = -1; |
1871
|
15
|
100
|
|
|
|
63
|
if($copy[0] eq '=for') { |
|
|
50
|
|
|
|
|
|
1872
|
9
|
|
|
|
|
18
|
$copy[0] = '=end'; |
1873
|
|
|
|
|
|
|
} elsif($copy[0] eq '=over') { |
1874
|
|
|
|
|
|
|
$self->whine( |
1875
|
|
|
|
|
|
|
$still_open->[1]{start_line} , |
1876
|
6
|
|
|
|
|
43
|
"=over without closing =back" |
1877
|
|
|
|
|
|
|
); |
1878
|
|
|
|
|
|
|
|
1879
|
6
|
|
|
|
|
12
|
$copy[0] = '=back'; |
1880
|
|
|
|
|
|
|
} else { |
1881
|
0
|
|
|
|
|
0
|
die "I don't know how to auto-close an open $copy[0] region"; |
1882
|
|
|
|
|
|
|
} |
1883
|
|
|
|
|
|
|
|
1884
|
15
|
50
|
|
|
|
55
|
unless( @copy > 2 ) { |
1885
|
15
|
|
|
|
|
37
|
push @copy, $copy[1]{'target'}; |
1886
|
15
|
100
|
|
|
|
46
|
$copy[-1] = '' unless defined $copy[-1]; |
1887
|
|
|
|
|
|
|
# since =over's don't have targets |
1888
|
|
|
|
|
|
|
} |
1889
|
|
|
|
|
|
|
|
1890
|
15
|
|
|
|
|
29
|
$copy[1]{'fake-closer'} = 1; |
1891
|
|
|
|
|
|
|
|
1892
|
15
|
|
|
|
|
18
|
DEBUG and print STDERR "Queuing up fake-o event: ", pretty(\@copy), "\n"; |
1893
|
15
|
|
|
|
|
60
|
unshift @closers, \@copy; |
1894
|
|
|
|
|
|
|
} |
1895
|
10
|
|
|
|
|
37
|
return @closers; |
1896
|
|
|
|
|
|
|
} |
1897
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
1899
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
sub _verbatim_format { |
1901
|
103
|
|
|
103
|
|
212
|
my($it, $p) = @_; |
1902
|
|
|
|
|
|
|
|
1903
|
103
|
|
|
|
|
121
|
my $formatting; |
1904
|
|
|
|
|
|
|
|
1905
|
103
|
|
|
|
|
241
|
for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines |
1906
|
559
|
|
|
|
|
664
|
DEBUG and print STDERR "_verbatim_format appends a newline to $i: $p->[$i]\n"; |
1907
|
559
|
|
|
|
|
1283
|
$p->[$i] .= "\n"; |
1908
|
|
|
|
|
|
|
# Unlike with simple Verbatim blocks, we don't end up just doing |
1909
|
|
|
|
|
|
|
# a join("\n", ...) on the contents, so we have to append a |
1910
|
|
|
|
|
|
|
# newline to every line, and then nix the last one later. |
1911
|
|
|
|
|
|
|
} |
1912
|
|
|
|
|
|
|
|
1913
|
103
|
|
|
|
|
153
|
if( DEBUG > 4 ) { |
1914
|
|
|
|
|
|
|
print STDERR "<<\n"; |
1915
|
|
|
|
|
|
|
for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines |
1916
|
|
|
|
|
|
|
print STDERR "_verbatim_format $i: $p->[$i]"; |
1917
|
|
|
|
|
|
|
} |
1918
|
|
|
|
|
|
|
print STDERR ">>\n"; |
1919
|
|
|
|
|
|
|
} |
1920
|
|
|
|
|
|
|
|
1921
|
103
|
|
|
|
|
301
|
for(my $i = $#$p; $i > 2; $i--) { |
1922
|
|
|
|
|
|
|
# work backwards over the lines, except the first (#2) |
1923
|
|
|
|
|
|
|
|
1924
|
|
|
|
|
|
|
#next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s |
1925
|
|
|
|
|
|
|
# and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; |
1926
|
|
|
|
|
|
|
# look at a formatty line preceding a nonformatty one |
1927
|
455
|
|
|
|
|
532
|
DEBUG > 5 and print STDERR "Scrutinizing line $i: $$p[$i]\n"; |
1928
|
455
|
100
|
|
|
|
747
|
if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { |
1929
|
11
|
|
|
|
|
14
|
DEBUG > 5 and print STDERR " It's a formatty line. ", |
1930
|
|
|
|
|
|
|
"Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; |
1931
|
|
|
|
|
|
|
|
1932
|
11
|
100
|
|
|
|
24
|
if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { |
1933
|
1
|
|
|
|
|
2
|
DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n"; |
1934
|
1
|
|
|
|
|
3
|
next; |
1935
|
|
|
|
|
|
|
} else { |
1936
|
10
|
|
|
|
|
13
|
DEBUG > 5 and print STDERR " Previous line is non-formatty! Yay!\n"; |
1937
|
|
|
|
|
|
|
} |
1938
|
|
|
|
|
|
|
} else { |
1939
|
444
|
|
|
|
|
553
|
DEBUG > 5 and print STDERR " It's not a formatty line. Ignoring\n"; |
1940
|
444
|
|
|
|
|
782
|
next; |
1941
|
|
|
|
|
|
|
} |
1942
|
|
|
|
|
|
|
|
1943
|
|
|
|
|
|
|
# A formatty line has to have #: in the first two columns, and uses |
1944
|
|
|
|
|
|
|
# "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. |
1945
|
|
|
|
|
|
|
# Example: |
1946
|
|
|
|
|
|
|
# What do you want? i like pie. [or whatever] |
1947
|
|
|
|
|
|
|
# #:^^^^^^^^^^^^^^^^^ ///////////// |
1948
|
|
|
|
|
|
|
|
1949
|
|
|
|
|
|
|
|
1950
|
10
|
|
|
|
|
14
|
DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; |
1951
|
|
|
|
|
|
|
|
1952
|
10
|
|
|
|
|
27
|
$formatting = ' ' . $1; |
1953
|
10
|
|
|
|
|
33
|
$formatting =~ s/\s+$//s; # nix trailing whitespace |
1954
|
10
|
50
|
33
|
|
|
47
|
unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op |
1955
|
0
|
|
|
|
|
0
|
splice @$p,$i,1; # remove this line |
1956
|
0
|
|
|
|
|
0
|
$i--; # don't consider next line |
1957
|
0
|
|
|
|
|
0
|
next; |
1958
|
|
|
|
|
|
|
} |
1959
|
|
|
|
|
|
|
|
1960
|
10
|
100
|
|
|
|
24
|
if( length($formatting) >= length($p->[$i-1]) ) { |
1961
|
3
|
|
|
|
|
8
|
$formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; |
1962
|
|
|
|
|
|
|
} else { |
1963
|
7
|
|
|
|
|
25
|
$formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); |
1964
|
|
|
|
|
|
|
} |
1965
|
|
|
|
|
|
|
# Make $formatting and the previous line be exactly the same length, |
1966
|
|
|
|
|
|
|
# with $formatting having a " " as the last character. |
1967
|
|
|
|
|
|
|
|
1968
|
10
|
|
|
|
|
14
|
DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n"; |
1969
|
|
|
|
|
|
|
|
1970
|
|
|
|
|
|
|
|
1971
|
10
|
|
|
|
|
16
|
my @new_line; |
1972
|
10
|
|
|
|
|
44
|
while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { |
1973
|
|
|
|
|
|
|
#print STDERR "Format matches $1\n"; |
1974
|
|
|
|
|
|
|
|
1975
|
54
|
100
|
|
|
|
120
|
if($2) { |
1976
|
|
|
|
|
|
|
#print STDERR "SKIPPING <$2>\n"; |
1977
|
32
|
|
|
|
|
139
|
push @new_line, |
1978
|
|
|
|
|
|
|
substr($p->[$i-1], pos($formatting)-length($1), length($1)); |
1979
|
|
|
|
|
|
|
} else { |
1980
|
|
|
|
|
|
|
#print STDERR "SNARING $+\n"; |
1981
|
22
|
50
|
|
|
|
157
|
push @new_line, [ |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
( |
1983
|
|
|
|
|
|
|
$3 ? 'VerbatimB' : |
1984
|
|
|
|
|
|
|
$4 ? 'VerbatimI' : |
1985
|
|
|
|
|
|
|
$5 ? 'VerbatimBI' : die("Should never get called") |
1986
|
|
|
|
|
|
|
), {}, |
1987
|
|
|
|
|
|
|
substr($p->[$i-1], pos($formatting)-length($1), length($1)) |
1988
|
|
|
|
|
|
|
]; |
1989
|
|
|
|
|
|
|
#print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; |
1990
|
|
|
|
|
|
|
} |
1991
|
|
|
|
|
|
|
} |
1992
|
10
|
|
|
|
|
40
|
my @nixed = |
1993
|
|
|
|
|
|
|
splice @$p, $i-1, 2, @new_line; # replace myself and the next line |
1994
|
10
|
|
|
|
|
15
|
DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n"; |
1995
|
|
|
|
|
|
|
|
1996
|
10
|
|
|
|
|
14
|
DEBUG > 6 and print STDERR "New version of the above line is these tokens (", |
1997
|
|
|
|
|
|
|
scalar(@new_line), "):", |
1998
|
|
|
|
|
|
|
map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; |
1999
|
10
|
|
|
|
|
31
|
$i--; # So the next line we scrutinize is the line before the one |
2000
|
|
|
|
|
|
|
# that we just went and formatted |
2001
|
|
|
|
|
|
|
} |
2002
|
|
|
|
|
|
|
|
2003
|
103
|
|
|
|
|
195
|
$p->[0] = 'VerbatimFormatted'; |
2004
|
|
|
|
|
|
|
|
2005
|
|
|
|
|
|
|
# Collapse adjacent text nodes, just for kicks. |
2006
|
103
|
|
|
|
|
271
|
for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last |
2007
|
0
|
0
|
0
|
|
|
0
|
if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { |
2008
|
0
|
|
|
|
|
0
|
DEBUG > 5 and print STDERR "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; |
2009
|
0
|
|
|
|
|
0
|
$p->[$i] .= splice @$p, $i+1, 1; # merge |
2010
|
0
|
|
|
|
|
0
|
--$i; # and back up |
2011
|
|
|
|
|
|
|
} |
2012
|
|
|
|
|
|
|
} |
2013
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
# Now look for the last text token, and remove the terminal newline |
2015
|
103
|
|
|
|
|
286
|
for( my $i = $#$p; $i >= 2; $i-- ) { |
2016
|
|
|
|
|
|
|
# work backwards over the tokens, even the first |
2017
|
103
|
50
|
|
|
|
236
|
if( !ref($p->[$i]) ) { |
2018
|
103
|
50
|
|
|
|
537
|
if($p->[$i] =~ s/\n$//s) { |
2019
|
103
|
|
|
|
|
223
|
DEBUG > 5 and print STDERR "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; |
2020
|
|
|
|
|
|
|
} else { |
2021
|
0
|
|
|
|
|
0
|
DEBUG > 5 and print STDERR |
2022
|
|
|
|
|
|
|
"No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; |
2023
|
|
|
|
|
|
|
} |
2024
|
103
|
|
|
|
|
209
|
last; # we only want the next one |
2025
|
|
|
|
|
|
|
} |
2026
|
|
|
|
|
|
|
} |
2027
|
|
|
|
|
|
|
|
2028
|
103
|
|
|
|
|
202
|
return; |
2029
|
|
|
|
|
|
|
} |
2030
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
2033
|
|
|
|
|
|
|
|
2034
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
sub _treelet_from_formatting_codes { |
2036
|
|
|
|
|
|
|
# Given a paragraph, returns a treelet. Full of scary tokenizing code. |
2037
|
|
|
|
|
|
|
# Like [ '~Top', {'start_line' => $start_line}, |
2038
|
|
|
|
|
|
|
# "I like ", |
2039
|
|
|
|
|
|
|
# [ 'B', {}, "pie" ], |
2040
|
|
|
|
|
|
|
# "!" |
2041
|
|
|
|
|
|
|
# ] |
2042
|
|
|
|
|
|
|
# This illustrates the general format of a treelet. It is an array: |
2043
|
|
|
|
|
|
|
# [0] is a scalar indicating its type. In the example above, the |
2044
|
|
|
|
|
|
|
# types are '~Top' and 'B' |
2045
|
|
|
|
|
|
|
# [1] is a hash of various flags about it, possibly empty |
2046
|
|
|
|
|
|
|
# [2] - [N] are an ordered list of the subcomponents of the treelet. |
2047
|
|
|
|
|
|
|
# Scalars are literal text, refs are sub-treelets, to |
2048
|
|
|
|
|
|
|
# arbitrary levels. Stringifying a treelet will recursively |
2049
|
|
|
|
|
|
|
# stringify the sub-treelets, concatentating everything |
2050
|
|
|
|
|
|
|
# together to form the exact text of the treelet. |
2051
|
|
|
|
|
|
|
|
2052
|
4444
|
|
|
4444
|
|
9790
|
my($self, $para, $start_line, $preserve_space) = @_; |
2053
|
|
|
|
|
|
|
|
2054
|
4444
|
|
|
|
|
13114
|
my $treelet = ['~Top', {'start_line' => $start_line},]; |
2055
|
|
|
|
|
|
|
|
2056
|
4444
|
100
|
100
|
|
|
15852
|
unless ($preserve_space || $self->{'preserve_whitespace'}) { |
2057
|
3577
|
|
|
|
|
28119
|
$para =~ s/\s+/ /g; # collapse and trim all whitespace first. |
2058
|
3577
|
|
|
|
|
6766
|
$para =~ s/ $//; |
2059
|
3577
|
|
|
|
|
5257
|
$para =~ s/^ //; |
2060
|
|
|
|
|
|
|
} |
2061
|
|
|
|
|
|
|
|
2062
|
|
|
|
|
|
|
# Only apparent problem the above code is that N<< >> turns into |
2063
|
|
|
|
|
|
|
# N<< >>. But then, word wrapping does that too! So don't do that! |
2064
|
|
|
|
|
|
|
|
2065
|
|
|
|
|
|
|
|
2066
|
|
|
|
|
|
|
# As a Start-code is encountered, the number of opening bracket '<' |
2067
|
|
|
|
|
|
|
# characters minus 1 is pushed onto @stack (so 0 means a single bracket, |
2068
|
|
|
|
|
|
|
# etc). When closing brackets are found in the text, at least this number |
2069
|
|
|
|
|
|
|
# (plus the 1) will be required to mean the Start-code is terminated. When |
2070
|
|
|
|
|
|
|
# those are found, @stack is popped. |
2071
|
4444
|
|
|
|
|
6308
|
my @stack; |
2072
|
|
|
|
|
|
|
|
2073
|
4444
|
|
|
|
|
8098
|
my @lineage = ($treelet); |
2074
|
4444
|
|
|
|
|
6615
|
my $raw = ''; # raw content of L<> fcode before splitting/processing |
2075
|
|
|
|
|
|
|
# XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed |
2076
|
|
|
|
|
|
|
# into just 1 ' '. Is this the regex's doing or 'raw's? Answer is it's |
2077
|
|
|
|
|
|
|
# the 'collapse and trim all whitespace first' lines just above. |
2078
|
4444
|
|
|
|
|
6063
|
my $inL = 0; |
2079
|
|
|
|
|
|
|
|
2080
|
4444
|
|
|
|
|
5748
|
DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n"; |
2081
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
# Here begins our frightening tokenizer RE. The following regex matches |
2083
|
|
|
|
|
|
|
# text in four main parts: |
2084
|
|
|
|
|
|
|
# |
2085
|
|
|
|
|
|
|
# * Start-codes. The first alternative matches C< or C<<, the latter |
2086
|
|
|
|
|
|
|
# followed by some whitespace. $1 will hold the entire start code |
2087
|
|
|
|
|
|
|
# (including any space following a multiple-angle-bracket delimiter), |
2088
|
|
|
|
|
|
|
# and $2 will hold only the additional brackets past the first in a |
2089
|
|
|
|
|
|
|
# multiple-bracket delimiter. length($2) + 1 will be the number of |
2090
|
|
|
|
|
|
|
# closing brackets we have to find. |
2091
|
|
|
|
|
|
|
# |
2092
|
|
|
|
|
|
|
# * Closing brackets. Match some amount of whitespace followed by |
2093
|
|
|
|
|
|
|
# multiple close brackets. The logic to see if this closes anything |
2094
|
|
|
|
|
|
|
# is down below. Note that in order to parse C<< >> correctly, we |
2095
|
|
|
|
|
|
|
# have to use look-behind (?<=\s\s), since the match of the starting |
2096
|
|
|
|
|
|
|
# code will have consumed the whitespace. |
2097
|
|
|
|
|
|
|
# |
2098
|
|
|
|
|
|
|
# * A single closing bracket, to close a simple code like C<>. |
2099
|
|
|
|
|
|
|
# |
2100
|
|
|
|
|
|
|
# * Something that isn't a start or end code. We have to be careful |
2101
|
|
|
|
|
|
|
# about accepting whitespace, since perlpodspec says that any whitespace |
2102
|
|
|
|
|
|
|
# before a multiple-bracket closing delimiter should be ignored. |
2103
|
|
|
|
|
|
|
# |
2104
|
4444
|
|
|
|
|
43781
|
while($para =~ |
2105
|
|
|
|
|
|
|
m/\G |
2106
|
|
|
|
|
|
|
(?: |
2107
|
|
|
|
|
|
|
# Match starting codes, including the whitespace following a |
2108
|
|
|
|
|
|
|
# multiple-delimiter start code. $1 gets the whole start code and |
2109
|
|
|
|
|
|
|
# $2 gets all but one of the
|
2110
|
|
|
|
|
|
|
([A-Z]<(?:(<+)\s+)?) |
2111
|
|
|
|
|
|
|
| |
2112
|
|
|
|
|
|
|
# Match multiple-bracket end codes. $3 gets the whitespace that |
2113
|
|
|
|
|
|
|
# should be discarded before an end bracket but kept in other cases |
2114
|
|
|
|
|
|
|
# and $4 gets the end brackets themselves. ($3 can be empty if the |
2115
|
|
|
|
|
|
|
# construct is empty, like C<< >>, and all the white-space has been |
2116
|
|
|
|
|
|
|
# gobbled up already, considered to be space after the opening |
2117
|
|
|
|
|
|
|
# bracket. In this case we use look-behind to verify that there are |
2118
|
|
|
|
|
|
|
# at least 2 spaces in a row before the ">".) |
2119
|
|
|
|
|
|
|
(\s+|(?<=\s\s))(>{2,}) |
2120
|
|
|
|
|
|
|
| |
2121
|
|
|
|
|
|
|
(\s?>) # $5: simple end-codes |
2122
|
|
|
|
|
|
|
| |
2123
|
|
|
|
|
|
|
( # $6: stuff containing no start-codes or end-codes |
2124
|
|
|
|
|
|
|
(?: |
2125
|
|
|
|
|
|
|
[^A-Z\s>] |
2126
|
|
|
|
|
|
|
| |
2127
|
|
|
|
|
|
|
(?: |
2128
|
|
|
|
|
|
|
[A-Z](?!<) |
2129
|
|
|
|
|
|
|
) |
2130
|
|
|
|
|
|
|
| |
2131
|
|
|
|
|
|
|
# whitespace is ok, but we don't want to eat the whitespace before |
2132
|
|
|
|
|
|
|
# a multiple-bracket end code. |
2133
|
|
|
|
|
|
|
# NOTE: we may still have problems with e.g. S<< >> |
2134
|
|
|
|
|
|
|
(?: |
2135
|
|
|
|
|
|
|
\s(?!\s*>{2,}) |
2136
|
|
|
|
|
|
|
) |
2137
|
|
|
|
|
|
|
)+ |
2138
|
|
|
|
|
|
|
) |
2139
|
|
|
|
|
|
|
) |
2140
|
|
|
|
|
|
|
/xgo |
2141
|
|
|
|
|
|
|
) { |
2142
|
15883
|
|
|
|
|
21863
|
DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n"; |
2143
|
15883
|
100
|
|
|
|
46732
|
if(defined $1) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
2144
|
3078
|
|
|
|
|
4179
|
my $bracket_count; # How many '<<<' in a row this has. Needed for |
2145
|
|
|
|
|
|
|
# Pod::Simple::JustPod |
2146
|
3078
|
100
|
|
|
|
5301
|
if(defined $2) { |
2147
|
115
|
|
|
|
|
171
|
DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n"; |
2148
|
115
|
|
|
|
|
254
|
$bracket_count = length($2) + 1; |
2149
|
115
|
|
|
|
|
212
|
push @stack, $bracket_count; # length of the necessary complex |
2150
|
|
|
|
|
|
|
# end-code string |
2151
|
|
|
|
|
|
|
} else { |
2152
|
2963
|
|
|
|
|
3547
|
DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n"; |
2153
|
2963
|
|
|
|
|
4307
|
push @stack, 0; # signal that we're looking for simple |
2154
|
2963
|
|
|
|
|
3935
|
$bracket_count = 1; |
2155
|
|
|
|
|
|
|
} |
2156
|
3078
|
|
|
|
|
6282
|
my $code = substr($1,0,1); |
2157
|
3078
|
100
|
|
|
|
5357
|
if ('L' eq $code) { |
2158
|
433
|
100
|
|
|
|
781
|
if ($inL) { |
2159
|
1
|
|
|
|
|
2
|
$raw .= $1; |
2160
|
1
|
|
|
|
|
44
|
$self->scream( $start_line, |
2161
|
|
|
|
|
|
|
'Nested L<> are illegal. Pretending inner one is ' |
2162
|
|
|
|
|
|
|
. 'X<...> so can continue looking for other errors.'); |
2163
|
1
|
|
|
|
|
2
|
$code = "X"; |
2164
|
|
|
|
|
|
|
} |
2165
|
|
|
|
|
|
|
else { |
2166
|
432
|
|
|
|
|
631
|
$raw = ""; # reset raw content accumulator |
2167
|
432
|
|
|
|
|
667
|
$inL = @stack; |
2168
|
|
|
|
|
|
|
} |
2169
|
|
|
|
|
|
|
} else { |
2170
|
2645
|
100
|
|
|
|
4659
|
$raw .= $1 if $inL; |
2171
|
|
|
|
|
|
|
} |
2172
|
3078
|
|
|
|
|
6776
|
push @lineage, [ $code, {}, ]; # new node object |
2173
|
|
|
|
|
|
|
|
2174
|
|
|
|
|
|
|
# Tell Pod::Simple::JustPod how many brackets there were, but to save |
2175
|
|
|
|
|
|
|
# space, not in the most usual case of there was just 1. It can be |
2176
|
|
|
|
|
|
|
# inferred by the absence of this element. Similarly, if there is more |
2177
|
|
|
|
|
|
|
# than one bracket, extract the white space between the final bracket |
2178
|
|
|
|
|
|
|
# and the real beginning of the interior. Save that if it isn't just a |
2179
|
|
|
|
|
|
|
# single space |
2180
|
3078
|
100
|
100
|
|
|
7794
|
if ($self->{'_output_is_for_JustPod'} && $bracket_count > 1) { |
2181
|
18
|
|
|
|
|
48
|
$lineage[-1][1]{'~bracket_count'} = $bracket_count; |
2182
|
18
|
|
|
|
|
43
|
my $lspacer = substr($1, 1 + $bracket_count); |
2183
|
18
|
100
|
|
|
|
50
|
$lineage[-1][1]{'~lspacer'} = $lspacer if $lspacer ne " "; |
2184
|
|
|
|
|
|
|
} |
2185
|
3078
|
|
|
|
|
3938
|
push @{ $lineage[-2] }, $lineage[-1]; |
|
3078
|
|
|
|
|
16802
|
|
2186
|
|
|
|
|
|
|
} elsif(defined $4) { |
2187
|
126
|
|
|
|
|
187
|
DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n"; |
2188
|
|
|
|
|
|
|
# This is where it gets messy... |
2189
|
126
|
100
|
|
|
|
564
|
if(! @stack) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
2190
|
|
|
|
|
|
|
# We saw " >>>>" but needed nothing. This is ALL just stuff then. |
2191
|
1
|
|
|
|
|
4
|
DEBUG > 4 and print STDERR " But it's really just stuff.\n"; |
2192
|
1
|
|
|
|
|
8
|
push @{ $lineage[-1] }, $3, $4; |
|
1
|
|
|
|
|
3
|
|
2193
|
1
|
|
|
|
|
11
|
next; |
2194
|
|
|
|
|
|
|
} elsif(!$stack[-1]) { |
2195
|
|
|
|
|
|
|
# We saw " >>>>" but needed only ">". Back pos up. |
2196
|
3
|
|
|
|
|
5
|
DEBUG > 4 and print STDERR " And that's more than we needed to close simple.\n"; |
2197
|
3
|
|
|
|
|
5
|
push @{ $lineage[-1] }, $3; # That was a for-real space, too. |
|
3
|
|
|
|
|
7
|
|
2198
|
3
|
|
|
|
|
12
|
pos($para) = pos($para) - length($4) + 1; |
2199
|
|
|
|
|
|
|
} elsif($stack[-1] == length($4)) { |
2200
|
|
|
|
|
|
|
# We found " >>>>", and it was exactly what we needed. Commonest case. |
2201
|
115
|
|
|
|
|
174
|
DEBUG > 4 and print STDERR " And that's exactly what we needed to close complex.\n"; |
2202
|
|
|
|
|
|
|
} elsif($stack[-1] < length($4)) { |
2203
|
|
|
|
|
|
|
# We saw " >>>>" but needed only " >>". Back pos up. |
2204
|
0
|
|
|
|
|
0
|
DEBUG > 4 and print STDERR " And that's more than we needed to close complex.\n"; |
2205
|
0
|
|
|
|
|
0
|
pos($para) = pos($para) - length($4) + $stack[-1]; |
2206
|
|
|
|
|
|
|
} else { |
2207
|
|
|
|
|
|
|
# We saw " >>>>" but needed " >>>>>>". So this is all just stuff! |
2208
|
7
|
|
|
|
|
12
|
DEBUG > 4 and print STDERR " But it's really just stuff, because we needed more.\n"; |
2209
|
7
|
|
|
|
|
14
|
push @{ $lineage[-1] }, $3, $4; |
|
7
|
|
|
|
|
26
|
|
2210
|
7
|
|
|
|
|
37
|
next; |
2211
|
|
|
|
|
|
|
} |
2212
|
|
|
|
|
|
|
#print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; |
2213
|
|
|
|
|
|
|
|
2214
|
118
|
50
|
66
|
|
|
306
|
if ($3 ne " " && $self->{'_output_is_for_JustPod'}) { |
2215
|
3
|
100
|
|
|
|
29
|
if ($3 ne "") { |
|
|
100
|
|
|
|
|
|
2216
|
1
|
|
|
|
|
3
|
$lineage[-1][1]{'~rspacer'} = $3; |
2217
|
|
|
|
|
|
|
} |
2218
|
|
|
|
|
|
|
elsif ($lineage[-1][1]{'~lspacer'} eq " ") { |
2219
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
# Here we had something like C<< >> which was a false positive |
2221
|
1
|
|
|
|
|
3
|
delete $lineage[-1][1]{'~lspacer'}; |
2222
|
|
|
|
|
|
|
} |
2223
|
|
|
|
|
|
|
else { |
2224
|
|
|
|
|
|
|
$lineage[-1][1]{'~rspacer'} |
2225
|
1
|
|
|
|
|
15
|
= substr($lineage[-1][1]{'~lspacer'}, -1, 1); |
2226
|
1
|
|
|
|
|
3
|
chop $lineage[-1][1]{'~lspacer'}; |
2227
|
|
|
|
|
|
|
} |
2228
|
|
|
|
|
|
|
} |
2229
|
|
|
|
|
|
|
|
2230
|
118
|
100
|
|
|
|
235
|
push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; |
|
2
|
|
|
|
|
5
|
|
|
118
|
|
|
|
|
256
|
|
2231
|
|
|
|
|
|
|
# Keep the element from being childless |
2232
|
|
|
|
|
|
|
|
2233
|
118
|
100
|
|
|
|
244
|
if ($inL == @stack) { |
2234
|
22
|
|
|
|
|
54
|
$lineage[-1][1]{'raw'} = $raw; |
2235
|
22
|
|
|
|
|
32
|
$inL = 0; |
2236
|
|
|
|
|
|
|
} |
2237
|
|
|
|
|
|
|
|
2238
|
118
|
|
|
|
|
181
|
pop @stack; |
2239
|
118
|
|
|
|
|
152
|
pop @lineage; |
2240
|
|
|
|
|
|
|
|
2241
|
118
|
100
|
|
|
|
646
|
$raw .= $3.$4 if $inL; |
2242
|
|
|
|
|
|
|
|
2243
|
|
|
|
|
|
|
} elsif(defined $5) { |
2244
|
3056
|
|
|
|
|
3821
|
DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n"; |
2245
|
|
|
|
|
|
|
|
2246
|
3056
|
100
|
100
|
|
|
9836
|
if(@stack and ! $stack[-1]) { |
2247
|
|
|
|
|
|
|
# We're indeed expecting a simple end-code |
2248
|
2952
|
|
|
|
|
3758
|
DEBUG > 4 and print STDERR " It's indeed an end-code.\n"; |
2249
|
|
|
|
|
|
|
|
2250
|
2952
|
50
|
|
|
|
5726
|
if(length($5) == 2) { # There was a space there: " >" |
|
|
100
|
|
|
|
|
|
2251
|
0
|
|
|
|
|
0
|
push @{ $lineage[-1] }, ' '; |
|
0
|
|
|
|
|
0
|
|
2252
|
2952
|
|
|
|
|
5879
|
} elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element |
2253
|
41
|
|
|
|
|
64
|
push @{ $lineage[-1] }, ''; # keep it from being really childless |
|
41
|
|
|
|
|
109
|
|
2254
|
|
|
|
|
|
|
} |
2255
|
|
|
|
|
|
|
|
2256
|
2952
|
100
|
|
|
|
6001
|
if ($inL == @stack) { |
2257
|
410
|
|
|
|
|
995
|
$lineage[-1][1]{'raw'} = $raw; |
2258
|
410
|
|
|
|
|
554
|
$inL = 0; |
2259
|
|
|
|
|
|
|
} |
2260
|
|
|
|
|
|
|
|
2261
|
2952
|
|
|
|
|
4074
|
pop @stack; |
2262
|
2952
|
|
|
|
|
3983
|
pop @lineage; |
2263
|
|
|
|
|
|
|
} else { |
2264
|
104
|
|
|
|
|
170
|
DEBUG > 4 and print STDERR " It's just stuff.\n"; |
2265
|
104
|
|
|
|
|
197
|
push @{ $lineage[-1] }, $5; |
|
104
|
|
|
|
|
273
|
|
2266
|
|
|
|
|
|
|
} |
2267
|
|
|
|
|
|
|
|
2268
|
3056
|
100
|
|
|
|
21772
|
$raw .= $5 if $inL; |
2269
|
|
|
|
|
|
|
|
2270
|
|
|
|
|
|
|
} elsif(defined $6) { |
2271
|
9623
|
|
|
|
|
11775
|
DEBUG > 3 and print STDERR "Found stuff \"$6\"\n"; |
2272
|
9623
|
|
|
|
|
12129
|
push @{ $lineage[-1] }, $6; |
|
9623
|
|
|
|
|
24960
|
|
2273
|
9623
|
100
|
|
|
|
35449
|
$raw .= $6 if $inL; |
2274
|
|
|
|
|
|
|
# XXX does not capture multiplace whitespaces -- 'raw' ends up with |
2275
|
|
|
|
|
|
|
# at most 1 leading/trailing whitespace, why not all of it? |
2276
|
|
|
|
|
|
|
# Answer, because we deliberately trimmed it above |
2277
|
|
|
|
|
|
|
|
2278
|
|
|
|
|
|
|
} else { |
2279
|
|
|
|
|
|
|
# should never ever ever ever happen |
2280
|
0
|
|
|
|
|
0
|
DEBUG and print STDERR "AYYAYAAAAA at line ", __LINE__, "\n"; |
2281
|
0
|
|
|
|
|
0
|
die "SPORK 512512!"; |
2282
|
|
|
|
|
|
|
} |
2283
|
|
|
|
|
|
|
} |
2284
|
|
|
|
|
|
|
|
2285
|
4444
|
100
|
|
|
|
8701
|
if(@stack) { # Uhoh, some sequences weren't closed. |
2286
|
8
|
|
|
|
|
38
|
my $x= "..."; |
2287
|
8
|
|
|
|
|
20
|
while(@stack) { |
2288
|
8
|
50
|
|
|
|
14
|
push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; |
|
0
|
|
|
|
|
0
|
|
|
8
|
|
|
|
|
22
|
|
2289
|
|
|
|
|
|
|
# Hmmmmm! |
2290
|
|
|
|
|
|
|
|
2291
|
8
|
|
|
|
|
20
|
my $code = (pop @lineage)->[0]; |
2292
|
8
|
|
|
|
|
42
|
my $ender_length = pop @stack; |
2293
|
8
|
50
|
|
|
|
18
|
if($ender_length) { |
2294
|
0
|
|
|
|
|
0
|
--$ender_length; |
2295
|
0
|
|
|
|
|
0
|
$x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); |
2296
|
|
|
|
|
|
|
} else { |
2297
|
8
|
|
|
|
|
29
|
$x = $code . "<$x>"; |
2298
|
|
|
|
|
|
|
} |
2299
|
|
|
|
|
|
|
} |
2300
|
8
|
|
|
|
|
12
|
DEBUG > 1 and print STDERR "Unterminated $x sequence\n"; |
2301
|
8
|
|
|
|
|
39
|
$self->whine($start_line, |
2302
|
|
|
|
|
|
|
"Unterminated $x sequence", |
2303
|
|
|
|
|
|
|
); |
2304
|
|
|
|
|
|
|
} |
2305
|
|
|
|
|
|
|
|
2306
|
4444
|
|
|
|
|
13291
|
return $treelet; |
2307
|
|
|
|
|
|
|
} |
2308
|
|
|
|
|
|
|
|
2309
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
2310
|
|
|
|
|
|
|
|
2311
|
|
|
|
|
|
|
sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) |
2312
|
0
|
|
|
0
|
0
|
0
|
return stringify_lol($_[1]); |
2313
|
|
|
|
|
|
|
} |
2314
|
|
|
|
|
|
|
|
2315
|
|
|
|
|
|
|
sub stringify_lol { # function: stringify_lol($lol) |
2316
|
2587
|
|
|
2587
|
0
|
3865
|
my $string_form = ''; |
2317
|
2587
|
|
|
|
|
5470
|
_stringify_lol( $_[0] => \$string_form ); |
2318
|
2587
|
|
|
|
|
7461
|
return $string_form; |
2319
|
|
|
|
|
|
|
} |
2320
|
|
|
|
|
|
|
|
2321
|
|
|
|
|
|
|
sub _stringify_lol { # the real recursor |
2322
|
2967
|
|
|
2967
|
|
4646
|
my($lol, $to) = @_; |
2323
|
2967
|
|
|
|
|
5765
|
for(my $i = 2; $i < @$lol; ++$i) { |
2324
|
3963
|
100
|
100
|
|
|
9457
|
if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { |
|
|
|
66
|
|
|
|
|
2325
|
380
|
|
|
|
|
667
|
_stringify_lol( $lol->[$i], $to); # recurse! |
2326
|
|
|
|
|
|
|
} else { |
2327
|
3583
|
|
|
|
|
7879
|
$$to .= $lol->[$i]; |
2328
|
|
|
|
|
|
|
} |
2329
|
|
|
|
|
|
|
} |
2330
|
2967
|
|
|
|
|
4596
|
return; |
2331
|
|
|
|
|
|
|
} |
2332
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
2334
|
|
|
|
|
|
|
|
2335
|
|
|
|
|
|
|
sub _dump_curr_open { # return a string representation of the stack |
2336
|
3
|
|
|
3
|
|
7
|
my $curr_open = $_[0]{'curr_open'}; |
2337
|
|
|
|
|
|
|
|
2338
|
3
|
50
|
|
|
|
6
|
return '[empty]' unless @$curr_open; |
2339
|
|
|
|
|
|
|
return join '; ', |
2340
|
3
|
|
|
|
|
6
|
map {; |
2341
|
|
|
|
|
|
|
($_->[0] eq '=for') |
2342
|
|
|
|
|
|
|
? ( ($_->[1]{'~really'} || '=over') |
2343
|
4
|
50
|
50
|
|
|
45
|
. ' ' . $_->[1]{'target'}) |
2344
|
|
|
|
|
|
|
: $_->[0] |
2345
|
|
|
|
|
|
|
} |
2346
|
|
|
|
|
|
|
@$curr_open |
2347
|
|
|
|
|
|
|
; |
2348
|
|
|
|
|
|
|
} |
2349
|
|
|
|
|
|
|
|
2350
|
|
|
|
|
|
|
########################################################################### |
2351
|
|
|
|
|
|
|
my %pretty_form = ( |
2352
|
|
|
|
|
|
|
"\a" => '\a', # ding! |
2353
|
|
|
|
|
|
|
"\b" => '\b', # BS |
2354
|
|
|
|
|
|
|
"\e" => '\e', # ESC |
2355
|
|
|
|
|
|
|
"\f" => '\f', # FF |
2356
|
|
|
|
|
|
|
"\t" => '\t', # tab |
2357
|
|
|
|
|
|
|
"\cm" => '\cm', |
2358
|
|
|
|
|
|
|
"\cj" => '\cj', |
2359
|
|
|
|
|
|
|
"\n" => '\n', # probably overrides one of either \cm or \cj |
2360
|
|
|
|
|
|
|
'"' => '\"', |
2361
|
|
|
|
|
|
|
'\\' => '\\\\', |
2362
|
|
|
|
|
|
|
'$' => '\\$', |
2363
|
|
|
|
|
|
|
'@' => '\\@', |
2364
|
|
|
|
|
|
|
'%' => '\\%', |
2365
|
|
|
|
|
|
|
'#' => '\\#', |
2366
|
|
|
|
|
|
|
); |
2367
|
|
|
|
|
|
|
|
2368
|
|
|
|
|
|
|
sub pretty { # adopted from Class::Classless |
2369
|
|
|
|
|
|
|
# Not the most brilliant routine, but passable. |
2370
|
|
|
|
|
|
|
# Don't give it a cyclic data structure! |
2371
|
502
|
|
|
502
|
0
|
5726
|
my @stuff = @_; # copy |
2372
|
502
|
|
|
|
|
667
|
my $x; |
2373
|
|
|
|
|
|
|
my $out = |
2374
|
|
|
|
|
|
|
# join ",\n" . |
2375
|
|
|
|
|
|
|
join ", ", |
2376
|
502
|
|
|
|
|
778
|
map {; |
2377
|
592
|
50
|
100
|
|
|
4392
|
if(!defined($_)) { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2378
|
0
|
|
|
|
|
0
|
"undef"; |
2379
|
|
|
|
|
|
|
} elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { |
2380
|
58
|
|
|
|
|
146
|
$x = "[ " . pretty(@$_) . " ]" ; |
2381
|
58
|
|
|
|
|
148
|
$x; |
2382
|
|
|
|
|
|
|
} elsif(ref($_) eq 'SCALAR') { |
2383
|
0
|
|
|
|
|
0
|
$x = "\\" . pretty($$_) ; |
2384
|
0
|
|
|
|
|
0
|
$x; |
2385
|
|
|
|
|
|
|
} elsif(ref($_) eq 'HASH') { |
2386
|
48
|
|
|
|
|
86
|
my $hr = $_; |
2387
|
|
|
|
|
|
|
$x = "{" . join(", ", |
2388
|
48
|
|
|
|
|
470
|
map(pretty($_) . '=>' . pretty($hr->{$_}), |
2389
|
|
|
|
|
|
|
sort keys %$hr ) ) . "}" ; |
2390
|
48
|
|
|
|
|
176
|
$x; |
2391
|
2
|
|
|
|
|
5
|
} elsif(!length($_)) { q{''} # empty string |
2392
|
|
|
|
|
|
|
} elsif( |
2393
|
|
|
|
|
|
|
$_ eq '0' # very common case |
2394
|
|
|
|
|
|
|
or( |
2395
|
|
|
|
|
|
|
m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s |
2396
|
|
|
|
|
|
|
and $_ ne '-0' # the strange case that RE lets thru |
2397
|
|
|
|
|
|
|
) |
2398
|
26
|
|
|
|
|
82
|
) { $_; |
2399
|
|
|
|
|
|
|
} else { |
2400
|
|
|
|
|
|
|
# Yes, explicitly name every character desired. There are shorcuts one |
2401
|
|
|
|
|
|
|
# could make, but I (Karl Williamson) was afraid that some Perl |
2402
|
|
|
|
|
|
|
# releases would have bugs in some of them. For example [A-Z] works |
2403
|
|
|
|
|
|
|
# even on EBCDIC platforms to match exactly the 26 uppercase English |
2404
|
|
|
|
|
|
|
# letters, but I don't know if it has always worked without bugs. It |
2405
|
|
|
|
|
|
|
# seemed safest just to list the characters. |
2406
|
|
|
|
|
|
|
# s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> |
2407
|
458
|
0
|
|
|
|
1058
|
s<([^ !"#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])> |
|
0
|
|
|
|
|
0
|
|
2408
|
|
|
|
|
|
|
<$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; |
2409
|
458
|
|
|
|
|
1379
|
#<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; |
2410
|
|
|
|
|
|
|
qq{"$_"}; |
2411
|
|
|
|
|
|
|
} |
2412
|
|
|
|
|
|
|
} @stuff; |
2413
|
502
|
|
|
|
|
2401
|
# $out =~ s/\n */ /g if length($out) < 75; |
2414
|
|
|
|
|
|
|
return $out; |
2415
|
|
|
|
|
|
|
} |
2416
|
|
|
|
|
|
|
|
2417
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
2418
|
|
|
|
|
|
|
|
2419
|
|
|
|
|
|
|
# A rather unsubtle method of blowing away all the state information |
2420
|
|
|
|
|
|
|
# from a parser object so it can be reused. Provided as a utility for |
2421
|
|
|
|
|
|
|
# backward compatibility in Pod::Man, etc. but not recommended for |
2422
|
|
|
|
|
|
|
# general use. |
2423
|
|
|
|
|
|
|
|
2424
|
5
|
|
|
5
|
0
|
10348
|
sub reinit { |
2425
|
5
|
|
|
|
|
20
|
my $self = shift; |
2426
|
|
|
|
|
|
|
foreach (qw(source_dead source_filename doc_has_started |
2427
|
|
|
|
|
|
|
start_of_pod_block content_seen last_was_blank paras curr_open |
2428
|
|
|
|
|
|
|
line_count pod_para_count in_pod ~tried_gen_errata all_errata errata errors_seen |
2429
|
|
|
|
|
|
|
Title)) { |
2430
|
80
|
|
|
|
|
142
|
|
2431
|
|
|
|
|
|
|
delete $self->{$_}; |
2432
|
|
|
|
|
|
|
} |
2433
|
|
|
|
|
|
|
} |
2434
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
2436
|
|
|
|
|
|
|
1; |
2437
|
|
|
|
|
|
|
|