| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
#Time-stamp: "2004-12-29 18:34:27 AST" |
|
3
|
|
|
|
|
|
|
#TODO: for xml2pod: |
|
4
|
|
|
|
|
|
|
# Make utf8/Latin-1 an option (default utf8?) |
|
5
|
|
|
|
|
|
|
# Make E<>ification an option (default to all) |
|
6
|
|
|
|
|
|
|
# Option for whether to delete highbit things in codeblocks (default: no?) |
|
7
|
|
|
|
|
|
|
#TODO: for pod2xml: |
|
8
|
|
|
|
|
|
|
# Option: choice of XML encoding (Latin-1 or UTF-8) |
|
9
|
|
|
|
|
|
|
# Option: whether to represent things as literals, or as numeric entities. |
|
10
|
|
|
|
|
|
|
# (and whether to use decimal entities, or hex??) |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
require 5; |
|
13
|
|
|
|
|
|
|
package Pod::PXML; |
|
14
|
2
|
|
|
2
|
|
14321
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
91
|
|
|
15
|
2
|
|
|
|
|
386
|
use vars qw($VERSION $XMLNS %Char2podent %Char2xmlent |
|
16
|
|
|
|
|
|
|
$LATIN_1 $XML_VALIDATE $LINK_TEXT_INFER $FUSE_ADJACENT_PRES |
|
17
|
|
|
|
|
|
|
$HIGH_BIT_OK |
|
18
|
2
|
|
|
2
|
|
10
|
); |
|
|
2
|
|
|
|
|
3
|
|
|
19
|
|
|
|
|
|
|
$XMLNS = 'http://www.perl.com/CPAN/authors/id/S/SB/SBURKE/pxml_0.01.dtd'; |
|
20
|
|
|
|
|
|
|
$VERSION = '0.12'; |
|
21
|
|
|
|
|
|
|
# I'm going to try to keep the major version numbers in the DTD and the |
|
22
|
|
|
|
|
|
|
# module in synch. I dunno about the fractional part, tho. |
|
23
|
|
|
|
|
|
|
$LATIN_1 = 1; |
|
24
|
|
|
|
|
|
|
$XML_VALIDATE = 1; |
|
25
|
|
|
|
|
|
|
$HIGH_BIT_OK = 0; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
$LINK_TEXT_INFER = 0; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$FUSE_ADJACENT_PRES = 1; |
|
30
|
|
|
|
|
|
|
# Whether to make " foo\n\n bar" as a single PRE, |
|
31
|
|
|
|
|
|
|
# as if it were from " foo\n \n bar\n\n" |
|
32
|
|
|
|
|
|
|
# TODO: set to 1 |
|
33
|
|
|
|
|
|
|
|
|
34
|
2
|
50
|
|
2
|
|
72
|
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG } |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $nil = []; |
|
37
|
|
|
|
|
|
|
|
|
38
|
2
|
|
|
2
|
|
20
|
use Carp; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
138
|
|
|
39
|
2
|
|
|
2
|
|
2023
|
use utf8; |
|
|
2
|
|
|
|
|
20
|
|
|
|
2
|
|
|
|
|
11
|
|
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# POD entities are just HTML entities plus verbar and sol |
|
42
|
|
|
|
|
|
|
#------------------------------------------------------------------------ |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# Fill out Char2podent, Char2xmlent. |
|
45
|
|
|
|
|
|
|
{ |
|
46
|
2
|
|
|
2
|
|
1689
|
use HTML::Entities (); |
|
|
2
|
|
|
|
|
13813
|
|
|
|
2
|
|
|
|
|
7573
|
|
|
47
|
|
|
|
|
|
|
die "\%HTML::Entities::char2entity is empty?" |
|
48
|
|
|
|
|
|
|
unless keys %HTML::Entities::char2entity; |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my($c,$e); |
|
51
|
|
|
|
|
|
|
while(($c,$e) = each(%HTML::Entities::char2entity)) { |
|
52
|
|
|
|
|
|
|
if($e =~ m{^(\d+);$}s) { |
|
53
|
|
|
|
|
|
|
$Char2podent{ord $c} = "E<$1>"; |
|
54
|
|
|
|
|
|
|
#print "num $e => E<$1>\n"; |
|
55
|
|
|
|
|
|
|
# { => E<123> |
|
56
|
|
|
|
|
|
|
# $Char2xmlent{ord $c} = $e; |
|
57
|
|
|
|
|
|
|
} elsif($e =~ m{^&([^;]+);$}s) { |
|
58
|
|
|
|
|
|
|
$Char2podent{ord $c} = "E<$1>"; |
|
59
|
|
|
|
|
|
|
#print "eng $e => E<$1>\n"; |
|
60
|
|
|
|
|
|
|
# é => E |
|
61
|
|
|
|
|
|
|
# $Char2xmlent{ord $c} = $e; |
|
62
|
|
|
|
|
|
|
} else { |
|
63
|
|
|
|
|
|
|
warn "Unknown thingy in %HTML::Entities::char2entity: $c => $e" |
|
64
|
|
|
|
|
|
|
# if $^W; |
|
65
|
|
|
|
|
|
|
} |
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# Points of difference between HTML entities and POD entities: |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
$Char2podent{ord "\xA0"} = "E<160>"; # there is no E |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
$Char2podent{ord "\xAB"} = "E"; |
|
73
|
|
|
|
|
|
|
$Char2podent{ord "\xBB"} = "E"; |
|
74
|
|
|
|
|
|
|
# Altho new POD processors also know E and E |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# Old POD processors don't know these two -- so leave numeric |
|
77
|
|
|
|
|
|
|
# $Char2podent{ord '/'} = 'E'; |
|
78
|
|
|
|
|
|
|
# $Char2podent{ord '|'} = 'E'; |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# And a few that we have to make completely sure are present. |
|
81
|
|
|
|
|
|
|
$Char2xmlent{ord '"'} = '"' ; |
|
82
|
|
|
|
|
|
|
$Char2xmlent{ord '<'} = '<' ; |
|
83
|
|
|
|
|
|
|
$Char2xmlent{ord '>'} = '>' ; |
|
84
|
|
|
|
|
|
|
$Char2podent{ord '<'} = 'E' ; |
|
85
|
|
|
|
|
|
|
$Char2podent{ord '>'} = 'E' ; |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
#print STDERR "Sanity: 214 is ", $Char2podent{214}, "\n"; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
#------------------------------------------------------------------------ |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub pod2xml ($) { |
|
93
|
0
|
|
|
0
|
1
|
|
require Pod::Tree; |
|
94
|
|
|
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
my $content = $_[0]; |
|
96
|
|
|
|
|
|
|
|
|
97
|
0
|
|
|
|
|
|
my $tree = Pod::Tree->new; |
|
98
|
0
|
0
|
|
|
|
|
if(ref($content) eq 'SCALAR') { |
|
99
|
0
|
|
|
|
|
|
$tree->load_string($$content); |
|
100
|
|
|
|
|
|
|
} else { |
|
101
|
0
|
|
|
|
|
|
$tree->load_file($content); |
|
102
|
|
|
|
|
|
|
} |
|
103
|
0
|
0
|
|
|
|
|
unless($tree->loaded) { croak("Couldn't load pod") } |
|
|
0
|
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
return _pod_tree_as_xml($tree); |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
#------------------------------------------------------------------------ |
|
108
|
|
|
|
|
|
|
# Real work: |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub _pod_tree_as_xml { |
|
111
|
0
|
|
|
0
|
|
|
my $root = $_[0]->get_root; |
|
112
|
0
|
|
|
|
|
|
DEBUG > 2 and print "TREE DUMP: <<\n", $_[0]->dump, ">>\n\n"; |
|
113
|
|
|
|
|
|
|
|
|
114
|
0
|
0
|
|
|
|
|
return "\n\n" unless $root; |
|
115
|
0
|
|
|
|
|
|
my $out = ''; |
|
116
|
|
|
|
|
|
|
|
|
117
|
0
|
|
|
|
|
|
my $trav; |
|
118
|
|
|
|
|
|
|
my $x; # scratch |
|
119
|
|
|
|
|
|
|
$trav = sub { |
|
120
|
0
|
|
|
0
|
|
|
my $it = $_[0]; |
|
121
|
0
|
|
|
|
|
|
my $type = $it->get_type; |
|
122
|
0
|
|
|
|
|
|
my $post = ''; |
|
123
|
0
|
|
|
|
|
|
DEBUG and print "Hitting $type\n"; |
|
124
|
0
|
0
|
|
|
|
|
if($type eq 'root') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
$out .= join "\n", |
|
126
|
|
|
|
|
|
|
qq{}, |
|
127
|
|
|
|
|
|
|
qq{
|
|
128
|
|
|
|
|
|
|
qq{ "$XMLNS">}, |
|
129
|
|
|
|
|
|
|
qq{}, |
|
130
|
|
|
|
|
|
|
"", |
|
131
|
|
|
|
|
|
|
'', |
|
132
|
|
|
|
|
|
|
'', |
|
133
|
|
|
|
|
|
|
; |
|
134
|
|
|
|
|
|
|
|
|
135
|
0
|
|
|
|
|
|
$post = "\n"; # harmless newline, I figure. |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
} elsif($type eq 'for') { |
|
138
|
0
|
|
|
|
|
|
$out .= "get_arg) . "\">"; |
|
139
|
0
|
|
|
|
|
|
$out .= xml_escape_maybe_cdata($it->get_text); |
|
140
|
0
|
|
|
|
|
|
$out .= "\n\n"; |
|
141
|
0
|
|
|
|
|
|
return; |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
} elsif($type eq 'sequence') { |
|
144
|
0
|
|
|
|
|
|
$type = lc($it->get_letter); |
|
145
|
0
|
|
|
|
|
|
DEBUG and print "Sequence type \"$type\"\n"; |
|
146
|
0
|
0
|
|
|
|
|
if($type eq 'e') { |
|
|
|
0
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# An unresolved entity. |
|
148
|
0
|
|
|
|
|
|
$x = $it->get_children; |
|
149
|
0
|
0
|
0
|
|
|
|
if($x and @$x ==1 and $x->[0]->get_type eq 'text') { |
|
|
|
|
0
|
|
|
|
|
|
150
|
0
|
|
|
|
|
|
$x = $x->[0]->get_text; |
|
151
|
0
|
0
|
|
|
|
|
die "Impossible entity name \"$x\"" if $x =~ m/[ \t<>]/s; |
|
152
|
|
|
|
|
|
|
# minimal sanity |
|
153
|
0
|
|
|
|
|
|
$out .= '&' . $x . ';'; |
|
154
|
|
|
|
|
|
|
} else { |
|
155
|
|
|
|
|
|
|
# $out .= '&WHAT;'; |
|
156
|
0
|
|
|
|
|
|
die "Aberrant E<..> content \"", $it->get_deep_text, "\""; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
0
|
|
|
|
|
|
return; |
|
159
|
|
|
|
|
|
|
} elsif($type eq 'l') { |
|
160
|
|
|
|
|
|
|
# At time of writing, Pod::Tree is less than sterling in its |
|
161
|
|
|
|
|
|
|
# treatment of L<...> sequences. |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
#use Data::Dumper; |
|
164
|
|
|
|
|
|
|
#print "LINK DUMP: {{\n", Dumper($it), "}}\n"; |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# Some special treatment... |
|
167
|
0
|
|
0
|
|
|
|
my $target = $it->get_target || die 'targetless link?'; |
|
168
|
|
|
|
|
|
|
|
|
169
|
0
|
|
|
|
|
|
my($page, $section); |
|
170
|
0
|
|
|
|
|
|
$out .= "
|
|
171
|
0
|
|
|
|
|
|
$page = xml_attr_escape( $target->get_page ); |
|
172
|
0
|
0
|
|
|
|
|
$out .= " page=\"$page\"" if length $page; |
|
173
|
0
|
|
|
|
|
|
$section = xml_attr_escape( $target->get_section ); |
|
174
|
0
|
0
|
|
|
|
|
$out .= " section=\"$section\"" if length $section; |
|
175
|
0
|
|
|
|
|
|
$out .= ">"; |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
#if(!$LINK_TEXT_INFER and not(($x = $target->get_children) and @$x)) { |
|
178
|
0
|
0
|
0
|
|
|
|
unless(($x = $target->get_children) and @$x) { |
|
179
|
|
|
|
|
|
|
# There was no gloss (i.e., the bit after the "|"). |
|
180
|
0
|
0
|
|
|
|
|
if(! $LINK_TEXT_INFER) { |
|
181
|
|
|
|
|
|
|
# subvert the normal processing of children of this sequence. |
|
182
|
0
|
|
|
|
|
|
$out .= ""; |
|
183
|
0
|
|
|
|
|
|
return; |
|
184
|
|
|
|
|
|
|
} else { |
|
185
|
|
|
|
|
|
|
# Infer the text instead. |
|
186
|
0
|
|
|
|
|
|
my $ch; |
|
187
|
0
|
0
|
0
|
|
|
|
if(($ch = $it->get_children) and @$ch == 1 |
|
|
|
|
0
|
|
|
|
|
|
188
|
|
|
|
|
|
|
and $ch->[0]->get_type eq 'text' |
|
189
|
|
|
|
|
|
|
) { |
|
190
|
|
|
|
|
|
|
# So this /is/ just some text bit that Pod::Tree implicated. |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# To replicate Pod::Text's inscrutible weirdness as |
|
193
|
|
|
|
|
|
|
# best we can, for sake of continuity if not actual |
|
194
|
|
|
|
|
|
|
# good sense or clarity. |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# The moral of the story is to always have L !!! |
|
197
|
|
|
|
|
|
|
|
|
198
|
0
|
|
|
|
|
|
$x = ''; |
|
199
|
0
|
0
|
|
|
|
|
if (!length $section) { |
|
|
|
0
|
|
|
|
|
|
|
200
|
0
|
0
|
|
|
|
|
$x = "the $page manpage" if length $page; |
|
201
|
|
|
|
|
|
|
} elsif ($section =~ m/^[:\w]+(?:\(\))?/) { |
|
202
|
0
|
|
|
|
|
|
$x .= "the $section entry"; |
|
203
|
0
|
0
|
|
|
|
|
$x .= (length $page) ? " in the $page manpage" |
|
204
|
|
|
|
|
|
|
: " elsewhere in this document"; |
|
205
|
|
|
|
|
|
|
} else { |
|
206
|
0
|
|
|
|
|
|
$section =~ s/^\"\s*//; |
|
207
|
0
|
|
|
|
|
|
$section =~ s/\s*\"$//; |
|
208
|
0
|
|
|
|
|
|
$x .= 'the section on "' . $section . '"'; |
|
209
|
0
|
0
|
|
|
|
|
$x .= " in the $page manpage" if length $page; |
|
210
|
|
|
|
|
|
|
} |
|
211
|
0
|
|
|
|
|
|
$out .= "$x"; |
|
212
|
0
|
|
|
|
|
|
return; # subvert the usual processing. |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
# Else it's complicated and scary. Fall thru. |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
} |
|
217
|
0
|
|
|
|
|
|
$post = ''; |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
} else { |
|
220
|
|
|
|
|
|
|
# Unknown sequence. Ahwell, pass thru. |
|
221
|
0
|
|
|
|
|
|
$out .= "<$type>"; |
|
222
|
0
|
|
|
|
|
|
$post = "$type>"; |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
} elsif($type eq 'list') { |
|
225
|
0
|
|
|
|
|
|
$x = xml_attr_escape($it->get_arg); |
|
226
|
0
|
0
|
|
|
|
|
$out .= length($x) ? "\n\n" : "\n\n"; |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# used to have: |
|
229
|
|
|
|
|
|
|
# sprintf "\n\n", |
|
230
|
|
|
|
|
|
|
# xml_attr_escape($it->get_list_type), |
|
231
|
|
|
|
|
|
|
# xml_attr_escape($it->get_arg) ; |
|
232
|
|
|
|
|
|
|
|
|
233
|
0
|
|
|
|
|
|
$post = "\n\n"; |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
} elsif($type eq 'ordinary') { |
|
236
|
0
|
|
|
|
|
|
$out .= " "; |
|
237
|
0
|
|
|
|
|
|
$post = "\n\n"; |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} elsif($type eq 'command') { |
|
240
|
0
|
|
|
|
|
|
$x = $it->get_command(); |
|
241
|
0
|
0
|
|
|
|
|
if($x =~ m/^head[1234]$/is) { |
|
242
|
0
|
|
|
|
|
|
$x = lc($x); |
|
243
|
0
|
|
|
|
|
|
$out .= "<$x>"; |
|
244
|
0
|
|
|
|
|
|
$post = "$x>\n\n"; |
|
245
|
|
|
|
|
|
|
} else { |
|
246
|
0
|
|
|
|
|
|
die "Unknown POD command \"$x\""; |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
} elsif($type eq 'item') { |
|
250
|
|
|
|
|
|
|
# Needs special recursion! |
|
251
|
0
|
|
|
|
|
|
$out .= '- ';
|
|
252
|
|
|
|
|
|
|
# used to have: sprintf '- ',
|
|
253
|
|
|
|
|
|
|
# xml_attr_escape($it->get_item_type); |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# Recurse for the item's children: |
|
256
|
0
|
0
|
|
|
|
|
foreach my $c (@{ $it->get_children || $nil }) { $trav->($c) } |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
257
|
0
|
|
|
|
|
|
$out .= "\n\n"; |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# Then recurse for the bastards further down... |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
} elsif($type eq 'verbatim') { |
|
262
|
0
|
0
|
0
|
|
|
|
( $FUSE_ADJACENT_PRES and $out =~ s/<\/pre>\n\n$//s ) |
|
263
|
|
|
|
|
|
|
or $out .= ""; |
|
264
|
|
|
|
|
|
|
# possibly combine adjacent verbatims into a single 'pre' |
|
265
|
0
|
|
|
|
|
|
$out .= xml_escape_maybe_cdata("\n" . $it->get_text . "\n"); |
|
266
|
0
|
0
|
|
|
|
|
$out =~ s/]]>$/s; |
|
267
|
|
|
|
|
|
|
# combining adjacent CDATA sections is nice, and always harmless |
|
268
|
0
|
|
|
|
|
|
$out .= "\n\n"; |
|
269
|
0
|
|
|
|
|
|
return; |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
} elsif($type eq 'text') { |
|
272
|
0
|
|
|
|
|
|
$out .= xml_escape($it->get_text); |
|
273
|
0
|
|
|
|
|
|
return; |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
} else { |
|
276
|
0
|
|
|
|
|
|
$out .= "\n\n"; |
|
277
|
0
|
|
|
|
|
|
return; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
|
|
280
|
0
|
0
|
|
|
|
|
foreach my $c (@{ # Recurse... |
|
|
0
|
0
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
(($type eq 'item') ? $it->get_siblings() : $it->get_children()) |
|
282
|
|
|
|
|
|
|
|| $nil |
|
283
|
0
|
|
|
|
|
|
}) { $trav->($c) } |
|
284
|
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
$out .= $post; |
|
286
|
0
|
|
|
|
|
|
return; |
|
287
|
0
|
|
|
|
|
|
}; |
|
288
|
0
|
|
|
|
|
|
$trav->($root); |
|
289
|
0
|
|
|
|
|
|
undef $trav; # break cyclicity |
|
290
|
0
|
|
|
|
|
|
print "\n\n" if DEBUG; |
|
291
|
|
|
|
|
|
|
|
|
292
|
0
|
|
|
|
|
|
sanitize_newlines($out); |
|
293
|
|
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
|
return $out; |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
sub xml_escape_maybe_cdata { # not destructive |
|
300
|
0
|
|
|
0
|
0
|
|
my $x; |
|
301
|
0
|
0
|
|
|
|
|
$x = '' unless defined($x = $_[0]); |
|
302
|
0
|
0
|
0
|
|
|
|
if($x =~ m/[&<>]/ and not $x =~ m/[^\x00-\x80]/) { |
|
303
|
|
|
|
|
|
|
# CDATA only if uses those [&<>], and does not use anything highbit. |
|
304
|
0
|
|
|
|
|
|
$x =~ s/]]>/]]>]]>
|
|
305
|
0
|
|
|
|
|
|
$x = ""; |
|
306
|
|
|
|
|
|
|
} else { |
|
307
|
|
|
|
|
|
|
# Otherwise escape things. |
|
308
|
0
|
|
|
|
|
|
$x =~ s/&/&/g; |
|
309
|
0
|
|
|
|
|
|
$x =~ s/</g; |
|
310
|
0
|
|
|
|
|
|
$x =~ s/>/>/g; |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
#$x =~ s/([^\x00-\x7E])/$Char2xmlent{ord $1} or "".ord($1).";"/eg; |
|
313
|
0
|
0
|
|
|
|
|
$x =~ s/([^\x00-\x7E])/"".ord($1).";"/eg unless $HIGH_BIT_OK; |
|
|
0
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# Why care about highbittyness? Even tho we're declaring this content |
|
316
|
|
|
|
|
|
|
# to be in UTF8, might as well entitify what we can. |
|
317
|
|
|
|
|
|
|
} |
|
318
|
0
|
|
|
|
|
|
return $x; |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
sub xml_escape { # not destructive |
|
322
|
0
|
|
|
0
|
0
|
|
my $x; |
|
323
|
0
|
0
|
|
|
|
|
return '' unless defined($x = $_[0]); |
|
324
|
0
|
0
|
|
|
|
|
if($HIGH_BIT_OK) { |
|
325
|
0
|
0
|
|
|
|
|
$x =~ s/([&<>])/$Char2xmlent{ord $1} or "".ord($1).";"/eg; |
|
|
0
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
# Encode '&', and '<' and '>' |
|
327
|
|
|
|
|
|
|
} else { |
|
328
|
0
|
0
|
|
|
|
|
$x =~ s/([^\cm\cj\f\t !-%'-;=?-~])/$Char2xmlent{ord $1} or "".ord($1).";"/eg; |
|
|
0
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# Encode control chars, high bit chars, '&', and '<' and '>' |
|
330
|
|
|
|
|
|
|
} |
|
331
|
0
|
|
|
|
|
|
return $x; |
|
332
|
|
|
|
|
|
|
} |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub xml_attr_escape { # not destructive |
|
335
|
0
|
|
|
0
|
0
|
|
my $x; |
|
336
|
0
|
0
|
|
|
|
|
return '' unless defined($x = $_[0]); |
|
337
|
|
|
|
|
|
|
|
|
338
|
0
|
0
|
|
|
|
|
if($HIGH_BIT_OK) { |
|
339
|
0
|
0
|
|
|
|
|
$x =~ s/([&<>"])/$Char2xmlent{ord $1} or "".ord($1).";"/eg; |
|
|
0
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# Encode '&', '"', and '<' and '>' |
|
341
|
|
|
|
|
|
|
} else { |
|
342
|
0
|
0
|
|
|
|
|
$x =~ s/([^\cm\cj\f\t !\#-\%'-;=?-~])/$Char2xmlent{ord $1} or "".ord($1).";"/eg; |
|
|
0
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
# Encode control chars, high bit chars, '"', '&', and '<' and '>' |
|
344
|
|
|
|
|
|
|
} |
|
345
|
0
|
|
|
|
|
|
return $x; |
|
346
|
|
|
|
|
|
|
} |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
349
|
|
|
|
|
|
|
sub sanitize_newlines { # DESTRUCTIVE |
|
350
|
0
|
|
|
0
|
0
|
|
if("\n" eq "\cm") { |
|
351
|
|
|
|
|
|
|
$_[0] =~ s/\cm?\cj/\n/g; # turn \cj and \cm\cj into \n |
|
352
|
|
|
|
|
|
|
} elsif("\n" eq "\cj") { |
|
353
|
0
|
|
|
|
|
|
$_[0] =~ s/\cm\cj/\n/g; # turn \cm and \cm\cj into \n |
|
354
|
|
|
|
|
|
|
} else { |
|
355
|
|
|
|
|
|
|
$_[0] =~ s/(?:(?:\cm?\cj)|\cm)/\n/g; |
|
356
|
|
|
|
|
|
|
# turn \cm\cj, \cj, or \cm into \n |
|
357
|
|
|
|
|
|
|
} |
|
358
|
0
|
|
|
|
|
|
return; |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
########################################################################### |
|
362
|
|
|
|
|
|
|
########################################################################### |
|
363
|
|
|
|
|
|
|
|
|
364
|
2
|
|
|
2
|
|
22
|
use vars qw(%Acceptable_children); |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
5855
|
|
|
365
|
|
|
|
|
|
|
{ |
|
366
|
|
|
|
|
|
|
# This just recapitulates what's in the DTD: |
|
367
|
|
|
|
|
|
|
my $style = {map{;$_,1} qw(b i c x f s link)}; |
|
368
|
|
|
|
|
|
|
my $pstyle = {'#PCDATA',1, %$style}; |
|
369
|
|
|
|
|
|
|
my $pcdata = {'#PCDATA',1}; |
|
370
|
|
|
|
|
|
|
%Acceptable_children = ( |
|
371
|
|
|
|
|
|
|
'pod' => {map{;$_,1} qw(head1 head2 head3 head4 p pre list for)}, |
|
372
|
|
|
|
|
|
|
map(($_=>$pstyle), qw(head1 head2 head3 head4 p)), |
|
373
|
|
|
|
|
|
|
'pre' => $pcdata, |
|
374
|
|
|
|
|
|
|
'list' => {map{;$_,1} qw(item p pre list for)}, |
|
375
|
|
|
|
|
|
|
'item' => $pstyle, |
|
376
|
|
|
|
|
|
|
'for' => $pcdata, |
|
377
|
|
|
|
|
|
|
map(($_=>$pstyle), qw(link b i c f x s)), |
|
378
|
|
|
|
|
|
|
); |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
sub xml2pod ($) { |
|
382
|
0
|
|
|
0
|
1
|
|
my $content = $_[0]; |
|
383
|
0
|
|
|
|
|
|
require XML::Parser; |
|
384
|
|
|
|
|
|
|
|
|
385
|
0
|
|
|
|
|
|
my $out; |
|
386
|
0
|
|
|
|
|
|
my($gi, %attr, $text, $cm_set); # scratch |
|
387
|
|
|
|
|
|
|
|
|
388
|
0
|
|
|
|
|
|
my(@stack); |
|
389
|
0
|
|
|
|
|
|
my @paragraph_stack; |
|
390
|
|
|
|
|
|
|
# pop/pushed only by paragraph-containing elements, and link |
|
391
|
0
|
|
|
|
|
|
my @for_stack; # kept by 'for' elements |
|
392
|
0
|
|
|
|
|
|
my @link_stack; # kept by 'link' elements |
|
393
|
|
|
|
|
|
|
my $xml = XML::Parser->new( 'Handlers' => { |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
## |
|
396
|
|
|
|
|
|
|
## |
|
397
|
|
|
|
|
|
|
## On the way in... |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
'Start' => sub { |
|
400
|
0
|
|
|
0
|
|
|
(undef, $gi, %attr) = @_; |
|
401
|
0
|
|
|
|
|
|
push @stack, $gi; |
|
402
|
0
|
|
|
|
|
|
DEBUG > 1 and print ' ', join('.', @stack), "+\n"; |
|
403
|
|
|
|
|
|
|
|
|
404
|
0
|
0
|
|
|
|
|
if($XML_VALIDATE) { |
|
405
|
0
|
0
|
|
|
|
|
if(@stack < 2) { |
|
|
|
0
|
|
|
|
|
|
|
406
|
0
|
0
|
|
|
|
|
unless($gi eq 'pod') { |
|
407
|
|
|
|
|
|
|
# I think XML::Parser would catch this, but anyway. |
|
408
|
0
|
|
|
|
|
|
die "Can't have a childless \"$gi\" element, in $content"; |
|
409
|
|
|
|
|
|
|
} |
|
410
|
|
|
|
|
|
|
} elsif(defined($cm_set = $Acceptable_children{$stack[-2]})) { |
|
411
|
0
|
0
|
|
|
|
|
die "Can't have a \"$gi\" in a \"$stack[-2]\", in $content (stack @stack)" |
|
412
|
|
|
|
|
|
|
unless $cm_set->{$gi}; |
|
413
|
|
|
|
|
|
|
} else { |
|
414
|
0
|
|
|
|
|
|
die "Unknown element \"$gi\""; |
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
# TODO: attribute validation! |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
|
|
419
|
0
|
0
|
0
|
|
|
|
if($gi =~ m/^[bicxfs]$/s) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
420
|
0
|
|
|
|
|
|
$paragraph_stack[-1] .= "\U$gi<"; |
|
421
|
|
|
|
|
|
|
} elsif($gi eq 'p' or $gi eq 'pre') { |
|
422
|
0
|
|
|
|
|
|
push @paragraph_stack, ''; |
|
423
|
|
|
|
|
|
|
} elsif($gi eq 'for') { |
|
424
|
0
|
|
0
|
|
|
|
$text = $attr{'target'} || '????'; |
|
425
|
0
|
|
|
|
|
|
push @for_stack, $text; |
|
426
|
0
|
|
|
|
|
|
push @paragraph_stack, ''; |
|
427
|
|
|
|
|
|
|
} elsif($gi eq 'list') { |
|
428
|
0
|
|
|
|
|
|
$text = $attr{'indent'}; |
|
429
|
0
|
0
|
0
|
|
|
|
$out .= (defined($text) && length($text)) |
|
430
|
|
|
|
|
|
|
? "=over $text\n\n" : "=over\n\n"; |
|
431
|
|
|
|
|
|
|
} elsif($gi eq 'item') { |
|
432
|
0
|
|
|
|
|
|
$out .= '=item '; |
|
433
|
0
|
|
|
|
|
|
push @paragraph_stack, ''; |
|
434
|
|
|
|
|
|
|
} elsif($gi =~ m/^head[1234]$/s) { |
|
435
|
0
|
|
|
|
|
|
push @paragraph_stack, '=' . $gi . ' '; |
|
436
|
|
|
|
|
|
|
} elsif($gi eq 'link') { # a hack |
|
437
|
0
|
|
|
|
|
|
push @link_stack, [$attr{'page'}, $attr{'section'}]; |
|
438
|
0
|
|
|
|
|
|
push @paragraph_stack, ''; |
|
439
|
|
|
|
|
|
|
} elsif($gi eq 'pod') { |
|
440
|
0
|
|
0
|
|
|
|
my $text = $attr{'xmlns'} || $XMLNS; |
|
441
|
0
|
0
|
|
|
|
|
die "pod has a foreign namespace: \"$text\" instead of \"$XMLNS\"" |
|
442
|
|
|
|
|
|
|
unless $text eq $XMLNS; |
|
443
|
|
|
|
|
|
|
} else { |
|
444
|
0
|
|
|
|
|
|
DEBUG and print "Opening unknown element \"$gi\"\n"; |
|
445
|
|
|
|
|
|
|
} |
|
446
|
0
|
|
|
|
|
|
return; |
|
447
|
|
|
|
|
|
|
}, |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
## |
|
450
|
|
|
|
|
|
|
## |
|
451
|
|
|
|
|
|
|
## And on the way out... |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
'End' => sub { |
|
454
|
0
|
|
|
0
|
|
|
$gi = $_[1]; |
|
455
|
0
|
|
|
|
|
|
DEBUG > 1 and print ' ', join('.', @stack), "-\n"; |
|
456
|
0
|
0
|
|
|
|
|
die "INSANE! Stack mismatch! $text ne $gi" |
|
457
|
|
|
|
|
|
|
unless $gi eq ($text = pop @stack); |
|
458
|
|
|
|
|
|
|
|
|
459
|
0
|
0
|
|
|
|
|
if($gi =~ m/^[bicxfs]$/s) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
460
|
0
|
|
|
|
|
|
$paragraph_stack[-1] .= ">"; |
|
461
|
|
|
|
|
|
|
} elsif($gi eq 'p') { |
|
462
|
|
|
|
|
|
|
# A paragraph must start with non-WS, non-=, and must contain |
|
463
|
|
|
|
|
|
|
# no \n\n's until its very end. |
|
464
|
|
|
|
|
|
|
|
|
465
|
0
|
|
|
|
|
|
$text = pop @paragraph_stack; |
|
466
|
0
|
|
|
|
|
|
$text =~ s/^(\s)/Z<>$1/s; # make sure we're NOT indented |
|
467
|
0
|
|
|
|
|
|
$text =~ s/^=/Z<>=/s; # make sure we're NOT =-initial |
|
468
|
0
|
|
|
|
|
|
$text =~ s/\n+$//s; # nix terminal newlines! |
|
469
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; # separate double-newlines |
|
470
|
0
|
0
|
|
|
|
|
unless(length $text) { |
|
471
|
0
|
|
|
|
|
|
DEBUG and print "Odd, null p-paragraph\n"; |
|
472
|
0
|
|
|
|
|
|
return; |
|
473
|
|
|
|
|
|
|
} |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
# These don't beautify /everything/ beautifiable, but they try. |
|
476
|
0
|
|
|
|
|
|
while($text =~ s/([^a-zA-Z<])E/$1
|
|
|
0
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# Turn E's that obviously don't need escaping, back into <'s |
|
478
|
0
|
|
|
|
|
|
while($text =~ s/^([^<]*)E/$1>/) {1} |
|
|
0
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# Turn obviously harmless E's back into ">"'s. |
|
480
|
|
|
|
|
|
|
|
|
481
|
0
|
|
|
|
|
|
$text .= "\n\n"; |
|
482
|
0
|
|
|
|
|
|
$out .= $text; |
|
483
|
|
|
|
|
|
|
} elsif($gi eq 'pre') { |
|
484
|
|
|
|
|
|
|
# A verbatim paragraph must start with WS, and must contain |
|
485
|
|
|
|
|
|
|
# no \n\n's until its very end. |
|
486
|
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
|
$text = pop @paragraph_stack; |
|
488
|
0
|
|
|
|
|
|
$text =~ s/^\n+//s; # nix leading strictly-blank lines |
|
489
|
0
|
|
|
|
|
|
$text =~ s/^(\S)/ \n$1/s; # make sure we ARE indented |
|
490
|
|
|
|
|
|
|
# that means we don't have to make sure we don't start with a '=' |
|
491
|
0
|
|
|
|
|
|
$text =~ s/\n+$//s; # nix terminal newlines! |
|
492
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; # separate double-newlines |
|
493
|
|
|
|
|
|
|
#$text =~ tr/\0-\xFF//CU if $LATIN_1; # since we can't E<..> things |
|
494
|
0
|
0
|
|
|
|
|
unless(length $text) { |
|
495
|
0
|
|
|
|
|
|
DEBUG and print "Odd, null pre-paragraph\n"; |
|
496
|
0
|
|
|
|
|
|
return; |
|
497
|
|
|
|
|
|
|
} |
|
498
|
0
|
|
|
|
|
|
$text .= "\n\n"; |
|
499
|
0
|
|
|
|
|
|
$out .= $text; |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
} elsif($gi eq 'for') { |
|
502
|
0
|
|
|
|
|
|
my $kind = pop @for_stack; |
|
503
|
0
|
|
|
|
|
|
$text = "\n\n=begin $kind\n\n" . pop @paragraph_stack; |
|
504
|
0
|
|
|
|
|
|
$text =~ s/\n+$//s; # nix terminal newlines! |
|
505
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; # separate double-newlines |
|
506
|
0
|
|
|
|
|
|
$text .= "\n\n=end $kind\n\n"; |
|
507
|
0
|
|
|
|
|
|
$out .= $text; |
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
} elsif($gi eq 'list') { |
|
510
|
0
|
|
|
|
|
|
$out .= "=back\n\n"; |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
} elsif($gi eq 'item') { |
|
513
|
0
|
|
|
|
|
|
$text = pop @paragraph_stack; |
|
514
|
0
|
|
|
|
|
|
$text =~ s/^\s*//s; # kill leading space |
|
515
|
0
|
|
|
|
|
|
$text =~ s/\n+$//s; # nix terminal newlines! |
|
516
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; # separate double-newlines |
|
517
|
0
|
|
|
|
|
|
$text .= "\n\n"; |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
# These don't beautify /everything/ beautifiable, but they try. |
|
520
|
0
|
|
|
|
|
|
while($text =~ s/([^a-zA-Z<])E/$1
|
|
|
0
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
# Turn E's that obviously don't need escaping, back into <'s |
|
522
|
0
|
|
|
|
|
|
while($text =~ s/^([^<]*)E/$1>/) {1} |
|
|
0
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# Turn obviously harmless E's back into ">"'s. |
|
524
|
|
|
|
|
|
|
|
|
525
|
0
|
|
|
|
|
|
$out .= $text; |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
} elsif($gi =~ m/^head[1234]$/s) { |
|
528
|
0
|
|
|
|
|
|
$text = pop @paragraph_stack; |
|
529
|
0
|
|
|
|
|
|
$text =~ s/^(\s)/Z<>$1/s; # make sure we're NOT (visibly) indented |
|
530
|
0
|
|
|
|
|
|
$text =~ s/\n+$//s; # nix terminal newlines! |
|
531
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; # nix any double-newlines |
|
532
|
0
|
|
|
|
|
|
$text .= "\n\n"; |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
# These don't beautify /everything/ beautifiable, but they try. |
|
535
|
0
|
|
|
|
|
|
while($text =~ s/([^a-zA-Z<])E/$1
|
|
|
0
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# Turn E's that obviously don't need escaping, back into <'s |
|
537
|
0
|
|
|
|
|
|
while($text =~ s/^([^<]*)E/$1>/) {1} |
|
|
0
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
# Turn obviously harmless E's back into ">"'s. |
|
539
|
|
|
|
|
|
|
|
|
540
|
0
|
|
|
|
|
|
$out .= $text; |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
} elsif($gi eq 'link') { # a hack |
|
543
|
0
|
|
|
|
|
|
$text = pop @paragraph_stack; |
|
544
|
|
|
|
|
|
|
# "Text cannot contain the characters '/' and '|'" |
|
545
|
0
|
|
|
|
|
|
$text =~ s/\|/E<124>/g; # AKA verbar |
|
546
|
0
|
|
|
|
|
|
$text =~ s{/}{E<47>}g; # AKA sol |
|
547
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; |
|
548
|
|
|
|
|
|
|
# nix any double-newlines, just for good measure |
|
549
|
0
|
0
|
|
|
|
|
$text .= '|' if length $text; |
|
550
|
|
|
|
|
|
|
|
|
551
|
0
|
|
|
|
|
|
my($xref, $section) = @{pop @link_stack}; |
|
|
0
|
|
|
|
|
|
|
|
552
|
0
|
0
|
|
|
|
|
$xref = '' unless defined $xref; # "" means 'in this document' |
|
553
|
0
|
0
|
|
|
|
|
$section = '' unless defined $section; |
|
554
|
|
|
|
|
|
|
|
|
555
|
0
|
|
|
|
|
|
$xref = pod_escape($xref); |
|
556
|
0
|
|
|
|
|
|
$xref =~ s{/}{E<47>}g; |
|
557
|
0
|
0
|
|
|
|
|
$section = pod_escape("/\"$section\"") if length $section; |
|
558
|
|
|
|
|
|
|
|
|
559
|
0
|
0
|
0
|
|
|
|
$section = '/"???"' unless length $xref or length $section; |
|
560
|
|
|
|
|
|
|
# signals aberrant input! |
|
561
|
|
|
|
|
|
|
|
|
562
|
0
|
|
|
|
|
|
$paragraph_stack[-1] .= "L<$text$xref$section>"; |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
} elsif($gi eq 'pod') { |
|
565
|
|
|
|
|
|
|
# no-op |
|
566
|
|
|
|
|
|
|
} else { |
|
567
|
0
|
|
|
|
|
|
DEBUG and print "Closing unknown element \"$gi\"\n"; |
|
568
|
|
|
|
|
|
|
} |
|
569
|
0
|
|
|
|
|
|
return; |
|
570
|
|
|
|
|
|
|
}, |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
## |
|
573
|
|
|
|
|
|
|
## |
|
574
|
|
|
|
|
|
|
## Character data! MATANGA!!! |
|
575
|
|
|
|
|
|
|
'Char' => sub { |
|
576
|
0
|
|
|
0
|
|
|
shift; |
|
577
|
0
|
0
|
0
|
|
|
|
return unless defined $_[0] and length $_[0]; # sanity |
|
578
|
|
|
|
|
|
|
|
|
579
|
0
|
0
|
|
|
|
|
if(!@stack) { |
|
580
|
0
|
0
|
|
|
|
|
die "Non-WS text on empty stack: \"$_[0]\"" |
|
581
|
|
|
|
|
|
|
unless $_[0] =~ m/^\s+$/s; |
|
582
|
|
|
|
|
|
|
} else { |
|
583
|
0
|
0
|
0
|
|
|
|
if(($Acceptable_children{$stack[-1]} |
|
584
|
|
|
|
|
|
|
|| die "Putting text under unknown element \"$stack[-1]\"" |
|
585
|
|
|
|
|
|
|
)->{'#PCDATA'}) { |
|
586
|
|
|
|
|
|
|
# This is the only case where we can add: |
|
587
|
0
|
0
|
|
|
|
|
die "\@paragraph_stack is empty? (stack: @stack)" |
|
588
|
|
|
|
|
|
|
unless @paragraph_stack; |
|
589
|
0
|
0
|
|
|
|
|
if($stack[-1] eq 'pre') { |
|
590
|
0
|
|
|
|
|
|
$paragraph_stack[-1] .= $_[0]; |
|
591
|
|
|
|
|
|
|
} else { |
|
592
|
0
|
|
|
|
|
|
$paragraph_stack[-1] .= pod_escape($_[0]); |
|
593
|
|
|
|
|
|
|
} |
|
594
|
|
|
|
|
|
|
} else { |
|
595
|
|
|
|
|
|
|
# doesn't allow PCDATA |
|
596
|
0
|
0
|
|
|
|
|
die "Can't have non-WS text in a \"$stack[-1]\"" |
|
597
|
|
|
|
|
|
|
unless $_[0] =~ m/^\s+$/s; |
|
598
|
|
|
|
|
|
|
# Else it's just ignorable whitespace. |
|
599
|
|
|
|
|
|
|
} |
|
600
|
|
|
|
|
|
|
} |
|
601
|
|
|
|
|
|
|
|
|
602
|
0
|
|
|
|
|
|
return; |
|
603
|
|
|
|
|
|
|
}, |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
# 'Comment' => sub { }, |
|
606
|
|
|
|
|
|
|
# 'Proc' => sub { }, |
|
607
|
|
|
|
|
|
|
# 'Attlist' => sub { }, |
|
608
|
|
|
|
|
|
|
# 'Element' => sub { }, |
|
609
|
|
|
|
|
|
|
# 'Doctype' => sub { }, |
|
610
|
0
|
|
|
|
|
|
}); |
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
# Now actually process... |
|
613
|
0
|
|
|
|
|
|
$out = ""; |
|
614
|
0
|
0
|
|
|
|
|
if(ref($content) eq 'SCALAR') { |
|
615
|
0
|
|
|
|
|
|
$xml->parse($$content); |
|
616
|
|
|
|
|
|
|
} else { |
|
617
|
0
|
|
|
|
|
|
$xml->parsefile($content); |
|
618
|
|
|
|
|
|
|
} |
|
619
|
|
|
|
|
|
|
|
|
620
|
0
|
|
|
|
|
|
$out =~ s/^([^=])/=pod\n\n$1/; |
|
621
|
|
|
|
|
|
|
# make sure that we start with a =-thingie, one way or another. |
|
622
|
|
|
|
|
|
|
|
|
623
|
0
|
|
|
|
|
|
$out .= "=cut\n\n"; |
|
624
|
|
|
|
|
|
|
|
|
625
|
0
|
|
|
|
|
|
sanitize_newlines($out); |
|
626
|
0
|
|
|
|
|
|
return $out; |
|
627
|
|
|
|
|
|
|
} |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
{ |
|
632
|
|
|
|
|
|
|
my %e = ('<' => 'E', '>' => 'E' ); |
|
633
|
|
|
|
|
|
|
sub pod_escape { |
|
634
|
|
|
|
|
|
|
#print STDERR "IN: <$_[0]>\n"; |
|
635
|
0
|
|
|
0
|
0
|
|
my $it = $_[0]; |
|
636
|
0
|
0
|
|
|
|
|
$it =~ s/([^\cm\cj\f\t !-;=?-~])/$Char2podent{ord $1} or "E<".ord($1).">"/eg; |
|
|
0
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
# Encode control chars, high bit chars and '<' and '>' |
|
638
|
|
|
|
|
|
|
#print STDERR "OUT: <$_[0]>\n\n"; |
|
639
|
0
|
|
|
|
|
|
return $it; |
|
640
|
|
|
|
|
|
|
} |
|
641
|
|
|
|
|
|
|
} |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
########################################################################### |
|
644
|
|
|
|
|
|
|
########################################################################### |
|
645
|
|
|
|
|
|
|
1; |
|
646
|
|
|
|
|
|
|
|