blib/lib/Pod/HTML2Pod.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 307 | 540 | 56.8 |
branch | 77 | 256 | 30.0 |
condition | 14 | 92 | 15.2 |
subroutine | 40 | 42 | 95.2 |
pod | 1 | 30 | 3.3 |
total | 439 | 960 | 45.7 |
line | stmt | bran | cond | sub | pod | time | code | ||
---|---|---|---|---|---|---|---|---|---|
1 | |||||||||
2 | require 5; | ||||||||
3 | # Time-stamp: "2004-12-29 18:41:19 AST" | ||||||||
4 | |||||||||
5 | package Pod::HTML2Pod; | ||||||||
6 | 2 | 2 | 27892 | use strict; | |||||
2 | 4 | ||||||||
2 | 72 | ||||||||
7 | 2 | 2 | 2387 | use integer; # haul aaaaaaaaaass! | |||||
2 | 22 | ||||||||
2 | 12 | ||||||||
8 | 2 | 2 | 2746 | use UNIVERSAL (); | |||||
2 | 31 | ||||||||
2 | 47 | ||||||||
9 | 2 | 2 | 15 | use Carp (); | |||||
2 | 3 | ||||||||
2 | 46 | ||||||||
10 | 2 | 2 | 2510 | use HTML::TreeBuilder 3.01 (); | |||||
2 | 125915 | ||||||||
2 | 62 | ||||||||
11 | 2 | 2 | 25 | use HTML::Element 3.05 (); | |||||
2 | 27 | ||||||||
2 | 33 | ||||||||
12 | 2 | 2 | 9 | use HTML::Tagset (); # presumably used by HTML::TreeBuilder anyhow | |||||
2 | 6 | ||||||||
2 | 28 | ||||||||
13 | 2 | 2 | 11 | use HTML::Entities (); # presumably used by HTML::Parser anyhow | |||||
2 | 5 | ||||||||
2 | 44 | ||||||||
14 | 2 | 39414 | use vars qw($Debug $VERSION %Phrasal %Char2ent | ||||||
15 | 2 | 2 | 9 | $nbsp $E_slash $E_vbar $counter); | |||||
2 | 5 | ||||||||
16 | |||||||||
17 | $VERSION = '4.05'; | ||||||||
18 | $Debug = 0 unless defined $Debug; | ||||||||
19 | |||||||||
20 | =head1 NAME | ||||||||
21 | |||||||||
22 | Pod::HTML2Pod -- translate HTML into POD | ||||||||
23 | |||||||||
24 | =head1 SYNOPSIS | ||||||||
25 | |||||||||
26 | # Use the program 'html2pod' that comes in this dist, or: | ||||||||
27 | use Pod::HTML2Pod; | ||||||||
28 | print Pod::HTML2Pod::convert( | ||||||||
29 | 'file' => 'my_stuff.html', # input file | ||||||||
30 | 'a_href' => 1, # try converting links | ||||||||
31 | ); | ||||||||
32 | |||||||||
33 | =head1 DESCRIPTION | ||||||||
34 | |||||||||
35 | Larry Wall once said (1999-08-27, on the C |
||||||||
36 | do believe): "The whole point of pod is to get people to document stuff | ||||||||
37 | they wouldn't document in any other form." | ||||||||
38 | |||||||||
39 | To that end, I wrote this module so that people who are unpracticed | ||||||||
40 | with POD but in a hurry to simply document their programs or modules, | ||||||||
41 | could write their documentation in simple HTML, and convert that to | ||||||||
42 | POD. That's what this module does. | ||||||||
43 | |||||||||
44 | Specifically, this module bends over backwards to try to turn even | ||||||||
45 | vaguely plausable HTML into POD -- and when in doubt, it simply ignores | ||||||||
46 | things that it doesn't know about, or can't render. | ||||||||
47 | |||||||||
48 | =head1 FUNCTIONS | ||||||||
49 | |||||||||
50 | This module provides one documented function, which it does not export: | ||||||||
51 | |||||||||
52 | =over | ||||||||
53 | |||||||||
54 | =item Pod::HTML2Pod::convert( ...options... ) | ||||||||
55 | |||||||||
56 | =back | ||||||||
57 | |||||||||
58 | This returns a single scalar value containing the converted POD text, | ||||||||
59 | with some comments after the end. | ||||||||
60 | |||||||||
61 | This function takes options: | ||||||||
62 | |||||||||
63 | =over | ||||||||
64 | |||||||||
65 | =item 'file' => FILENAME, | ||||||||
66 | |||||||||
67 | Specifies that the HTML code is to be read from the filename given. | ||||||||
68 | |||||||||
69 | =item 'handle' => *HANDLE, | ||||||||
70 | |||||||||
71 | Specifies that the HTML code is to be read from the open filehandle | ||||||||
72 | given (e.g., C<$fh_obj>, C<*HANDLE>, C<*HANDLE{IO}>, etc.) If you | ||||||||
73 | specify this, but fail to specify an actual handle object, inscrutible | ||||||||
74 | errors may result. | ||||||||
75 | |||||||||
76 | =item 'content' => STRING, | ||||||||
77 | |||||||||
78 | Specifies that the HTML code is in the string given. (Alternately, | ||||||||
79 | pass a reference to the scalar: C<'content' =E |
||||||||
80 | |||||||||
81 | =item 'tree' => OBJ, | ||||||||
82 | |||||||||
83 | Specifies that the HTML document is contained in the given | ||||||||
84 | HTML::TreeBuilder object (or HTML::Element object, at least). | ||||||||
85 | |||||||||
86 | =item 'a_name' => BOOLEAN, | ||||||||
87 | |||||||||
88 | Specifies whether you want to try converting C |
||||||||
89 | elements. By default this is off -- i.e., such elements are ignored. | ||||||||
90 | |||||||||
91 | =item 'a_href' => BOOLEAN, | ||||||||
92 | |||||||||
93 | Specifies whether you want to try converting C |
||||||||
94 | elements. By default this is off -- i.e., such elements are ignored. | ||||||||
95 | If on, bear in mind that relative URLs cannot be properly converted to | ||||||||
96 | POD -- any relative URLs will be complained about in comments after | ||||||||
97 | the end of the document. Normal absolute URLs will be treated as best | ||||||||
98 | they can be. Note that URLs beginning "pod:..." will be turned into | ||||||||
99 | POD links to whatever follows; that is, "pod:Getopt::Std" is turned | ||||||||
100 | into C |
||||||||
101 | |||||||||
102 | =item 'debug' => INTEGER, | ||||||||
103 | |||||||||
104 | Puts Pod::HTML2Pod into verbose debug mode for the duration of | ||||||||
105 | processing this this HTML document. INTEGER can be 0 for no debug | ||||||||
106 | output, 1 for a moderate amount that will cause the HTML syntax tree | ||||||||
107 | to be be dumped at the start of the conversion, and 2 for that plus a | ||||||||
108 | dump of the intermediate POD doctree, plus a few more inscrutible | ||||||||
109 | diagnostic messages. Looking at the trees dumped might be helpful in | ||||||||
110 | making sense of error messages that refer to a particular node in the | ||||||||
111 | parse tree. | ||||||||
112 | |||||||||
113 | =item | ||||||||
114 | |||||||||
115 | =back | ||||||||
116 | |||||||||
117 | =head1 GUIDELINES | ||||||||
118 | |||||||||
119 | Don't write crappy HTML and expect this module to understand it. | ||||||||
120 | |||||||||
121 | Don't take the output of C |
||||||||
122 | you think it'd be neat to try it. You'll just learn really unpleasant | ||||||||
123 | things about C |
||||||||
124 | it to improve C |
||||||||
125 | |||||||||
126 | However, I |
||||||||
127 | bearing in mind these simple truths: | ||||||||
128 | |||||||||
129 | POD can't do tables, images, forms, imagemaps, layers, CSS, embedded | ||||||||
130 | Java applets or any other kind of object, FONT, or BLINK. So don't | ||||||||
131 | try to do any of these things. | ||||||||
132 | |||||||||
133 | Use C |
||||||||
134 | |||||||||
135 | If you want to have a block of literal example code, put it in a | ||||||||
136 | C |
||||||||
137 | |||||||||
138 | Keep things simple. | ||||||||
139 | |||||||||
140 | Remember: Just because it comes I |
||||||||
141 | it's happy normal pod. You can do lots of things in HTML that will | ||||||||
142 | produce POD that is strange but technically legal (like having huge | ||||||||
143 | and complex content in a C |
||||||||
144 | perldoc scream bloody murder about nroff macros stretched past their | ||||||||
145 | limit. | ||||||||
146 | |||||||||
147 | Try to avoid using a WYSIWYG HTML editor, as they often produce scary | ||||||||
148 | source. Ditto for taking selecting "Save as... HTML" in your word | ||||||||
149 | processor. You can always try it, but look at the HTML to survey the | ||||||||
150 | damage before you try converting it to POD. | ||||||||
151 | |||||||||
152 | Always look at the POD that's been output by HTML2Pod -- never just | ||||||||
153 | blindly include it. | ||||||||
154 | |||||||||
155 | Consider starting from this template: | ||||||||
156 | |||||||||
157 | |||||||||
158 | |||||||||
159 | |
||||||||
160 | |||||||||
161 | |||||||||
162 | |||||||||
163 | NAME |
||||||||
164 | |||||||||
165 | Things::Stuff -- does some things with stuff | ||||||||
166 | |||||||||
167 | SYNOPSIS |
||||||||
168 | |||||||||
169 | |
||||||||
170 | use HTML::Stuff; | ||||||||
171 | do some more stuff; | ||||||||
172 | la la la la la; | ||||||||
173 | oogah; | ||||||||
174 | |||||||||
175 | |||||||||
176 | DESCRIPTION |
||||||||
177 | |||||||||
178 | This module does things with stuff. It exports these functions: | ||||||||
179 | |||||||||
180 | |
||||||||
181 | thingify( ... ) |
||||||||
182 | |
||||||||
183 | |||||||||
184 | destuffulate( ... ) |
||||||||
185 | |
||||||||
186 | It will throw a fatal exception if applied to things. |
||||||||
187 | So don't do that. |
||||||||
188 | |||||||||
189 | enthinction( ... ) |
||||||||
190 | |
||||||||
191 | involving "thing" and "stuff". Mostly. | ||||||||
192 | |||||||||
193 | |||||||||
194 | |||||||||
195 | Caveats and WYA's |
||||||||
196 | |||||||||
197 | Things to be wary of: | ||||||||
198 | |||||||||
199 | |
||||||||
200 | |
||||||||
201 | |
||||||||
202 | Don't forget about that stuff. Gotta keep an eye on that. |
||||||||
203 | |||||||||
204 | |||||||||
205 | BUGS |
||||||||
206 | |||||||||
207 | Stuff is hard. | ||||||||
208 | |||||||||
209 | SEE ALSO |
||||||||
210 | |||||||||
211 | Class::Classless, | ||||||||
212 | strict, | ||||||||
213 | |||||||||
214 | >Lingua::EN::Numbers::Ordinate, | ||||||||
215 | perlvar, | ||||||||
216 | |||||||||
217 | |||||||||
219 | |||||||||
220 | COPYRIGHT |
||||||||
221 | |||||||||
222 | Copyright 2000, Joey Jo-Jo Jr. Shabadoo. | ||||||||
223 | |||||||||
224 | |||||||||
225 | This library is free software; you can redistribute it and/or modify |
||||||||
226 | it under the same terms as Perl itself. | ||||||||
227 | |||||||||
228 | AUTHOR |
||||||||
229 | Joey Jo-Jo Jr. Shabadoo, jojojo@shabadoo.int |
||||||||
230 | |||||||||
231 | |||||||||
232 | |||||||||
233 | =head1 BUG REPORTS | ||||||||
234 | |||||||||
235 | If you do find a case where this converter misinterprets what you | ||||||||
236 | consider straightforward HTML (which you should really really have run | ||||||||
237 | thru an HTML syntax checker, by the way!), report it to me as a bug, at | ||||||||
238 | C |
||||||||
239 | |||||||||
240 | Be sure to include the entire document that causes the error -- then | ||||||||
241 | specify exactly what you consider the error to be. | ||||||||
242 | |||||||||
243 | =head1 BUGS AND CAVEATS | ||||||||
244 | |||||||||
245 | * Doesn't try to turn "smart quotes" characters into simple " and '. | ||||||||
246 | Maybe should? | ||||||||
247 | |||||||||
248 | * Fails to turn | ||||||||
249 | |||||||||
250 | foo thing bar baz quux | ||||||||
251 | |||||||||
252 | into | ||||||||
253 | |||||||||
254 | foo S |
||||||||
255 | |||||||||
256 | I.e., currently just turns C< >'s into normal spaces. | ||||||||
257 | |||||||||
258 | * Numeric entities (C |
||||||||
259 | are not understood by some older POD converters. | ||||||||
260 | |||||||||
261 | * No HTML that you provide will turn into C |
||||||||
262 | |||||||||
263 | * Currently maps | ||||||||
264 | |||||||||
265 | bar | ||||||||
266 | |||||||||
267 | to | ||||||||
268 | |||||||||
269 | X |
||||||||
270 | |||||||||
271 | but is this correct? | ||||||||
272 | |||||||||
273 | =head1 SEE ALSO | ||||||||
274 | |||||||||
275 | L |
||||||||
276 | |||||||||
277 | And HTML Tidy, at C |
||||||||
278 | |||||||||
279 | =head1 COPYRIGHT | ||||||||
280 | |||||||||
281 | Copyright (c) 2000 Sean M. Burke. All rights reserved. | ||||||||
282 | |||||||||
283 | This library is free software; you can redistribute it and/or modify | ||||||||
284 | it under the same terms as Perl itself. | ||||||||
285 | |||||||||
286 | =head1 AUTHOR | ||||||||
287 | |||||||||
288 | Sean M. Burke C |
||||||||
289 | |||||||||
290 | =cut | ||||||||
291 | |||||||||
292 | # TODO: test whether anchors and references to them actually work | ||||||||
293 | # in extremis? (see what recent pod2html versions do to them?) | ||||||||
294 | |||||||||
295 | #-------------------------------------------------------------------------- | ||||||||
296 | |||||||||
297 | sub convert { | ||||||||
298 | 1 | 50 | 1 | 1 | 607 | Carp::croak(__PACKAGE__ . '::convert needs parameters!')unless @_; | |||
299 | 1 | 50 | 19 | Carp::croak( | |||||
300 | "odd number of elements in options to " . __PACKAGE__ . "::convert") | ||||||||
301 | if @_ % 2; | ||||||||
302 | |||||||||
303 | 1 | 6 | my %o = @_; | ||||||
304 | 1 | 3 | local($Debug) = $Debug; | ||||||
305 | 1 | 50 | 5 | if(exists $o{'debug'}) { $Debug = $o{'debug'} } | |||||
0 | 0 | ||||||||
306 | |||||||||
307 | 1 | 11 | my $tree = HTML::TreeBuilder->new(); | ||||||
308 | |||||||||
309 | 1 | 318 | $tree->ignore_ignorable_whitespace(1); | ||||||
310 | |||||||||
311 | 1 | 11 | my $comments = [ __PACKAGE__ . ' conversion notes:' ]; | ||||||
312 | |||||||||
313 | 1 | 50 | 4 | if(exists $o{'tree'}) { | |||||
314 | 0 | 0 | $tree->delete; # never mind that one | ||||||
315 | 0 | 0 | $tree = $o{'tree'}; | ||||||
316 | 0 | 0 | 0 | die "but the 'tree' value is undef" unless defined $tree; | |||||
317 | 0 | 0 | 0 | die "but the 'tree' value isn't an object" unless ref $tree; | |||||
318 | 0 | 0 | 0 | die "but the 'tree' value object's class isn't based on HTML::Element" | |||||
319 | unless $tree->isa('HTML::Element'); | ||||||||
320 | 0 | 0 | $tree = $tree->clone; | ||||||
321 | |||||||||
322 | } else { | ||||||||
323 | |||||||||
324 | 1 | 50 | 5 | if(exists $o{'file'}) { | |||||
325 | 0 | 0 | 0 | die "File $o{'file'} doesn't exist" unless -e $o{'file'}; | |||||
326 | 0 | 0 | local(*IN); | ||||||
327 | 0 | 0 | 0 | open(IN, "<$o{'file'}") or die "Can't open $o{'file'}: $!"; | |||||
328 | 0 | 0 | $o{'handle'} = *IN{IO}; | ||||||
329 | 0 | 0 | ++$o{'_close_after'}; | ||||||
330 | 0 | 0 | 0 | print "Input from $o{'file'} ($o{'handle'})\n" if $Debug; | |||||
331 | 0 | 0 | push @$comments, "#From file $o{'file'}"; | ||||||
332 | } | ||||||||
333 | |||||||||
334 | 1 | 50 | 4 | if(exists $o{'handle'}) { | |||||
335 | 0 | 0 | local $/; | ||||||
336 | 0 | 0 | my $fh = $o{'handle'}; | ||||||
337 | 0 | 0 | my $x; | ||||||
338 | 0 | 0 | $x = <$fh>; | ||||||
339 | 0 | 0 | 0 | close($fh) if $o{'_close_after'}; | |||||
340 | 0 | 0 | $o{'content'} = \$x; | ||||||
341 | 0 | 0 | 0 | print "Input from handle ($o{'handle'})\n" if $Debug; | |||||
342 | } | ||||||||
343 | |||||||||
344 | 1 | 50 | 5 | if(exists $o{'content'}) { | |||||
345 | 1 | 3 | my($content_r, $is_copy); | ||||||
346 | 1 | 50 | 8 | if(!defined $o{'content'}) { # undef content? | |||||
50 | |||||||||
347 | 0 | 0 | die "content is undef"; | ||||||
348 | } elsif(ref $o{'content'}) { # scalar ref | ||||||||
349 | 0 | 0 | 0 | die "content only accepts scalars or scalar refs" | |||||
350 | unless ref $o{'content'} eq 'SCALAR'; | ||||||||
351 | 0 | 0 | $content_r = $o{'content'}; | ||||||
352 | 0 | 0 | $is_copy = 0; | ||||||
353 | } else { # simple scalar | ||||||||
354 | 1 | 2 | $content_r = \$o{'content'}; | ||||||
355 | 1 | 3 | $is_copy = 1; | ||||||
356 | } | ||||||||
357 | |||||||||
358 | # Nativize newlines, if possible and if need be. | ||||||||
359 | # Otherwise PREs will be hard to reckon. | ||||||||
360 | 1 | 50 | 15 | if("\n" ne "\cm" and "\n" ne "\cm\cj" and "\n" ne "\cj") { | |||||
361 | print "I don't recognize what \"\\n\" means on this system!" if $Debug; | ||||||||
362 | 0 | 0 | } elsif($$content_r =~ m/(\cm\cj|\cm|\cj)/) { | ||||||
363 | 0 | 0 | my $nl = $1; | ||||||
364 | 0 | 0 | 0 | if($nl eq "\n") { | |||||
365 | # no-op | ||||||||
366 | 0 | 0 | 0 | print "# Already in native newline format\n" if $Debug; | |||||
367 | } else { | ||||||||
368 | 0 | 0 | 0 | unless($is_copy) { | |||||
369 | 0 | 0 | my $x = $$content_r; | ||||||
370 | 0 | 0 | $content_r = \$x; # copy | ||||||
371 | 0 | 0 | $is_copy = 1; | ||||||
372 | } | ||||||||
373 | 0 | 0 | 0 | if($nl eq "\cm") { | |||||
0 | |||||||||
0 | |||||||||
374 | 0 | 0 | $$content_r =~ tr/\cm/\n/; | ||||||
375 | 0 | 0 | 0 | print "# Nativizing newlines from \\cm to \\n\n" if $Debug; | |||||
376 | } elsif($nl eq "\cj") { | ||||||||
377 | 0 | 0 | $$content_r =~ tr/\cj/\n/; | ||||||
378 | 0 | 0 | 0 | print "# Nativizing newlines from \\cj to \\n\n" if $Debug; | |||||
379 | } elsif($nl eq "\cm\cj") { | ||||||||
380 | 0 | 0 | $$content_r =~ tr/\cj//d; | ||||||
381 | 0 | 0 | $$content_r =~ tr/\cm/\n/ unless "\cm" eq "\n"; | ||||||
382 | 0 | 0 | 0 | print "# Nativizing newlines from \\cm\\cj to \\n\n" if $Debug; | |||||
383 | } | ||||||||
384 | } | ||||||||
385 | } | ||||||||
386 | |||||||||
387 | 1 | 5 | push @$comments, | ||||||
388 | '# ' . length($$content_r) . ' bytes of input'; | ||||||||
389 | 1 | 31 | $tree->parse($$content_r); | ||||||
390 | 1 | 3086 | $tree->eof; | ||||||
391 | 1 | 127 | delete $o{'content'}; | ||||||
392 | } else { | ||||||||
393 | 0 | 0 | die "No input source specified?"; | ||||||
394 | } | ||||||||
395 | } | ||||||||
396 | |||||||||
397 | { | ||||||||
398 | # The BODY is all we need. Discard the rest. | ||||||||
399 | 1 | 50 | 3 | my $body = $tree->find_by_tag_name('body') || die "No BODY in tree?"; | |||||
1 | 31 | ||||||||
400 | 1 | 49 | $body->detach; | ||||||
401 | 1 | 19 | $tree->delete; | ||||||
402 | 1 | 38 | $tree = $body; | ||||||
403 | } | ||||||||
404 | |||||||||
405 | 1 | 50 | 116 | push @$comments, scalar(localtime) . ' ' . ($ENV{'USER'} || ''); | |||||
406 | 1 | 7 | $tree->attr('_pod_comments', $comments); | ||||||
407 | |||||||||
408 | # More options: | ||||||||
409 | 1 | 50 | 16 | if($o{'a_name'}) { | |||||
410 | 0 | 0 | $tree->attr('_a_name', 1); | ||||||
411 | 0 | 0 | push @$comments, " Will try to render "; | ||||||
412 | } else { | ||||||||
413 | 1 | 3 | push @$comments, | ||||||
414 | " No a_name switch not specified, so will not try to render "; | ||||||||
415 | } | ||||||||
416 | 1 | 50 | 3 | if($o{'a_href'}) { | |||||
417 | 0 | 0 | $tree->attr('_a_href', 1); | ||||||
418 | 0 | 0 | push @$comments, " Will try to render "; | ||||||
419 | } else { | ||||||||
420 | 1 | 3 | push @$comments, | ||||||
421 | " No a_href switch not specified, so will not try to render "; | ||||||||
422 | } | ||||||||
423 | |||||||||
424 | 1 | 4 | twist_tree($tree); | ||||||
425 | |||||||||
426 | 1 | 4 | my $rendering_r = tree_as_pod($tree); | ||||||
427 | 1 | 5 | $tree->delete; | ||||||
428 | 1 | 81 | return $$rendering_r; | ||||||
429 | } | ||||||||
430 | |||||||||
431 | ########################################################################### | ||||||||
432 | # | ||||||||
433 | # The code below this point is not happy nice readable undocumented code. | ||||||||
434 | # It is angry cryptic code, of the sort that you will find little use in | ||||||||
435 | # reading. | ||||||||
436 | # | ||||||||
437 | # When I first thought of writing this module, several years ago, I had | ||||||||
438 | # noble dreams that I could write some sort of universal markup-language | ||||||||
439 | # mixmaster, which would only need be fed some information about the | ||||||||
440 | # source language and the target language, and a few simple facts about | ||||||||
441 | # what constructs are equivalent (that HTML "h1" is POD "head1", for | ||||||||
442 | # example), and then magic would happen, and documents would be converted. | ||||||||
443 | # | ||||||||
444 | # Well, I've not yet found that mixmaster, so I've had to write some | ||||||||
445 | # very spooky crusty strange code. It seems to work rather well when fed | ||||||||
446 | # simple HTML, and seems to degrade gracefully when fed too-complex HTML. | ||||||||
447 | # | ||||||||
448 | # The code can be used as-is, but it's not conceivably adaptable to other | ||||||||
449 | # tasks, or even easily maintainable, regrettably. However, as HTML or | ||||||||
450 | # POD are not likely to mutate significantly any time soon, I think | ||||||||
451 | # substantial maintenance will not be needed -- just minor tweaking or | ||||||||
452 | # bugfixes on my part. | ||||||||
453 | # | ||||||||
454 | ########################################################################### | ||||||||
455 | # SO STOP READING NOW, IF YOU VALUE YOUR SANITY | ||||||||
456 | ########################################################################### | ||||||||
457 | # | ||||||||
458 | # Stay away! | ||||||||
459 | # STAY AWAY! | ||||||||
460 | # Stay away! | ||||||||
461 | # You might end up like me! | ||||||||
462 | # | ||||||||
463 | # It's the pain | ||||||||
464 | # that keeps us alive, | ||||||||
465 | # but that beauty is all that we need to survive. | ||||||||
466 | # | ||||||||
467 | # That damned beauty is all that we need to survive. | ||||||||
468 | # | ||||||||
469 | # -- David Byrne, "They Are In Love" | ||||||||
470 | # | ||||||||
471 | ########################################################################### | ||||||||
472 | |||||||||
473 | # Initialization code: | ||||||||
474 | |||||||||
475 | # TODO: replace this with a hardwired table? | ||||||||
476 | %Phrasal = %HTML::Tagset::isPhraseMarkup; | ||||||||
477 | delete @Phrasal{'br', 'hr'}; | ||||||||
478 | for (qw(~literal ~texticle)) { $Phrasal{$_} = 1 } | ||||||||
479 | $counter = 0 unless defined $counter; | ||||||||
480 | |||||||||
481 | $Debug = 2 unless defined $Debug; | ||||||||
482 | |||||||||
483 | # Fill out Char2ent: | ||||||||
484 | { | ||||||||
485 | die "\%HTML::Entities::char2entity is empty?" | ||||||||
486 | unless keys %HTML::Entities::char2entity; | ||||||||
487 | |||||||||
488 | my($c,$e); | ||||||||
489 | while(($c,$e) = each(%HTML::Entities::char2entity)) { | ||||||||
490 | if($e =~ m{^(\d+);$}s) { | ||||||||
491 | $Char2ent{$c} = "E<$1>"; | ||||||||
492 | #print "num $e => E<$1>\n"; | ||||||||
493 | # { => E<123> | ||||||||
494 | } elsif($e =~ m{^&([^;]+);$}s) { | ||||||||
495 | $Char2ent{$c} = "E<$1>"; | ||||||||
496 | #print "eng $e => E<$1>\n"; | ||||||||
497 | # é => E |
||||||||
498 | } else { | ||||||||
499 | warn "Unknown thingy in %HTML::Entities::char2entity: $e" | ||||||||
500 | # if $^W; | ||||||||
501 | } | ||||||||
502 | } | ||||||||
503 | |||||||||
504 | # Points of difference between HTML entities and POD entities: | ||||||||
505 | |||||||||
506 | $Char2ent{"\xA0"} = "E<160>"; # there is no E |
||||||||
507 | |||||||||
508 | $Char2ent{"\xAB"} = "E |
||||||||
509 | $Char2ent{"\xBB"} = "E |
||||||||
510 | # Altho new POD processors also know E |
||||||||
511 | |||||||||
512 | # Old POD processors don't know these two -- so leave numeric | ||||||||
513 | # $Char2ent{'/'} = 'E |
||||||||
514 | # $Char2ent{'|'} = 'E |
||||||||
515 | } | ||||||||
516 | |||||||||
517 | # Set up some initial values we'll need later. | ||||||||
518 | unless(defined $nbsp) { | ||||||||
519 | my $nb = ' '; | ||||||||
520 | HTML::Entities::decode_entities($nb); | ||||||||
521 | if(!defined $nb) { | ||||||||
522 | die " decodes to undef?"; | ||||||||
523 | } elsif($nb eq '') { | ||||||||
524 | die " decodes to empty-string?"; | ||||||||
525 | } elsif($nb eq ' ') { | ||||||||
526 | die " doesn't decode?"; | ||||||||
527 | } elsif($nb eq ' ') { | ||||||||
528 | $nbsp = undef; | ||||||||
529 | } else { | ||||||||
530 | $nbsp = $nb; | ||||||||
531 | } | ||||||||
532 | } | ||||||||
533 | |||||||||
534 | unless(defined $E_slash) { | ||||||||
535 | my $x = '/'; | ||||||||
536 | encode_entities_harder($x); | ||||||||
537 | if(!defined $x or !length $x) { | ||||||||
538 | die "'/' encodes to nothing??"; | ||||||||
539 | } elsif($x eq '/') { | ||||||||
540 | # no-op | ||||||||
541 | } elsif($x =~ m{^E<[^>]+>$}s) { | ||||||||
542 | $E_slash = $x; | ||||||||
543 | } else { | ||||||||
544 | die "'/' encodes as $x?!"; | ||||||||
545 | } | ||||||||
546 | } | ||||||||
547 | |||||||||
548 | unless(defined $E_vbar) { | ||||||||
549 | my $x = '|'; | ||||||||
550 | encode_entities_harder($x); | ||||||||
551 | if(!defined $x or !length $x) { | ||||||||
552 | die "'|' encodes to nothing??"; | ||||||||
553 | } elsif($x eq '|') { | ||||||||
554 | # no-op | ||||||||
555 | } elsif($x =~ m{^E<[^>]+>$}s) { | ||||||||
556 | $E_vbar = $x; | ||||||||
557 | } else { | ||||||||
558 | die "'|' encodes as $x?!"; | ||||||||
559 | } | ||||||||
560 | } | ||||||||
561 | |||||||||
562 | # Last chance to save your sanity: stop reading now... | ||||||||
563 | |||||||||
564 | #-------------------------------------------------------------------------- | ||||||||
565 | |||||||||
566 | # TODO: make all P's go byebye once we've texticulated? | ||||||||
567 | |||||||||
568 | sub twist_tree { | ||||||||
569 | 1 | 1 | 0 | 3 | my $tree = $_[0]; | ||||
570 | |||||||||
571 | 1 | 5 | html_node_name($tree); | ||||||
572 | |||||||||
573 | 1 | 5 | delete_unknowns($tree); | ||||||
574 | |||||||||
575 | 1 | 4 | special_splice_div($tree); | ||||||
576 | |||||||||
577 | 1 | 50 | 14 | print("Input tree:\n"), $tree->dump, sleep(0) if $Debug; | |||||
578 | |||||||||
579 | 1 | 9 | prune_by_tag_name( $tree, | ||||||
580 | [qw~ script style ~], | ||||||||
581 | [qw~ map style isindex select textarea del input embed bgsound basefont ~], | ||||||||
582 | ); | ||||||||
583 | |||||||||
584 | 1 | 13 | splice_by_tag_name($tree, | ||||||
585 | [qw~ | ||||||||
586 | big small acronym sub sup multicol | ||||||||
587 | applet param object | ||||||||
588 | table tr caption col thead tbody tfoot colgroup | ||||||||
589 | noscript center font bdo fieldset ins | ||||||||
590 | form label legend button link layer object | ||||||||
591 | span abbr blink strike wbr | ||||||||
592 | frame frameset ilayer layer nolayer | ||||||||
593 | address nobr | ||||||||
594 | ~], | ||||||||
595 | ); | ||||||||
596 | |||||||||
597 | 1 | 17 | remap_tags($tree, {qw~ | ||||||
598 | td p | ||||||||
599 | th p | ||||||||
600 | em i | ||||||||
601 | strong b | ||||||||
602 | cite i | ||||||||
603 | code code | ||||||||
604 | tt code | ||||||||
605 | kbd code | ||||||||
606 | samp code | ||||||||
607 | var i | ||||||||
608 | dfn b | ||||||||
609 | listing pre | ||||||||
610 | plaintext pre | ||||||||
611 | xmp pre | ||||||||
612 | dd p | ||||||||
613 | ~}); | ||||||||
614 | # CODE for C<> | ||||||||
615 | # I for I<> | ||||||||
616 | # B for B<> | ||||||||
617 | |||||||||
618 | # TODO: Warn of cases where heading has too-complex text in it? | ||||||||
619 | |||||||||
620 | 1 | 6 | p_unnest($tree); | ||||||
621 | |||||||||
622 | 1 | 32 | pre_render($tree); | ||||||
623 | 1 | 4 | q_render($tree); | ||||||
624 | |||||||||
625 | 1 | 3 | images_render($tree); | ||||||
626 | 1 | 4 | hr_render($tree); | ||||||
627 | 1 | 3 | br_render($tree); | ||||||
628 | 1 | 4 | lists_render($tree); | ||||||
629 | #wrangle_body_children($tree); | ||||||||
630 | |||||||||
631 | 1 | 4 | literalize_text_under($tree); | ||||||
632 | |||||||||
633 | 1 | 4 | winge_about_phrasal_paradoxes($tree); | ||||||
634 | |||||||||
635 | 1 | 4 | texticulate($tree); | ||||||
636 | 1 | 3 | promote_some_secondary_children($tree); | ||||||
637 | 1 | 5 | goodify_p_elements($tree); | ||||||
638 | |||||||||
639 | 1 | 3 | render_headings($tree); # busts up the headings | ||||||
640 | |||||||||
641 | 1 | 4 | a_tweak($tree); | ||||||
642 | #bust_up($tree, qw~h1 h2 h3 h4 h5 h6 p~); | ||||||||
643 | |||||||||
644 | 1 | 4 | pod_node_name($tree); | ||||||
645 | 1 | 50 | 8 | $tree->dump, sleep(0) if $Debug > 1; | |||||
646 | 1 | 2 | return; | ||||||
647 | } | ||||||||
648 | |||||||||
649 | #========================================================================== | ||||||||
650 | # Subs below here are in no particular order. Ahwell. | ||||||||
651 | |||||||||
652 | sub a_tweak { | ||||||||
653 | |||||||||
654 | #Scratch: | ||||||||
655 | 1 | 1 | 0 | 2 | my($a_name, $parent, $grandparent, $gptag, @cl, $text); | ||||
656 | |||||||||
657 | 1 | 4 | foreach my $a ($_[0]->find_by_tag_name('a')) { | ||||||
658 | # The configuration we're after looks like this: | ||||||||
659 | # @0.0 |
||||||||
660 | # <~texticle -pod-id="~texticle_1" id="``G55"> @0.0.0 | ||||||||
661 | # @0.0.0.0 | ||||||||
662 | # NAME @0.0.0.0.0 | ||||||||
663 | 0 | 0 | $a_name = $a->attr('name'); | ||||||
664 | 0 | 0 | 0 | next unless defined $a_name; | |||||
665 | |||||||||
666 | 0 | 0 | 0 | $parent = $a->parent || next; | |||||
667 | 0 | 0 | 0 | next unless $parent->tag eq '~texticle'; | |||||
668 | 0 | 0 | 0 | $grandparent = $parent->parent || next; | |||||
669 | 0 | 0 | $gptag = $grandparent->tag; | ||||||
670 | 0 | 0 | 0 | 0 | next unless $gptag eq 'h1' or $gptag eq 'h2' or $gptag eq 'item'; | ||||
0 | |||||||||
671 | 0 | 0 | 0 | 0 | next unless $parent->content_list == 1 | ||||
672 | and $grandparent->content_list == 1; # only child of an only child | ||||||||
673 | 0 | 0 | @cl = $a->content_list; # with one child, a texticle | ||||||
674 | 0 | 0 | 0 | 0 | next unless @cl == 1 and ref $cl[0] and $cl[0]->tag eq '~literal'; | ||||
0 | |||||||||
675 | 0 | 0 | $text = $cl[0]->attr('text'); | ||||||
676 | 0 | 0 | 0 | next unless defined $text; | |||||
677 | 0 | 0 | $text =~ s/^\s+//s; | ||||||
678 | 0 | 0 | $text =~ s/\s+$//s; | ||||||
679 | 0 | 0 | 0 | if($a_name eq $text) { | |||||
680 | 0 | 0 | $a->replace_with_content; | ||||||
681 | 0 | 0 | 0 | print "a_tweak applies to ", $a->attr('id'), "\n" if $Debug > 1 | |||||
682 | } else { | ||||||||
683 | 0 | 0 | 0 | print "a_tweak can't apply to ", | |||||
684 | $a->attr('id'), ": [$a_name] ne [$text]\n" | ||||||||
685 | if $Debug > 1; | ||||||||
686 | # hack can't apply | ||||||||
687 | } | ||||||||
688 | } | ||||||||
689 | |||||||||
690 | 1 | 41 | return; | ||||||
691 | } | ||||||||
692 | |||||||||
693 | sub p_unnest { | ||||||||
694 | 1 | 1 | 0 | 2 | my $tree = $_[0]; | ||||
695 | # Now, p's can't nest in HTML, but once we've spliced out and remapped | ||||||||
696 | # things, we can end up with p's containing p's in our parse tree: | ||||||||
697 | #
|
||||||||
698 | # = Foo Bar Baz |
||||||||
699 | 1 | 4 | foreach my $p (reverse $tree->find_by_tag_name('p')) { | ||||||
700 | 0 | 0 | 0 | if($p->parent->tag eq 'p') { | |||||
701 | 0 | 0 | my @c = $p->detach_content; | ||||||
702 | 0 | 0 | $p->replace_with( | ||||||
703 | HTML::Element->new( 'br', | ||||||||
704 | 'id', '``G' . ++$counter), | ||||||||
705 | @c, | ||||||||
706 | HTML::Element->new( 'br', | ||||||||
707 | 'id', '``G' . ++$counter), | ||||||||
708 | ); | ||||||||
709 | } | ||||||||
710 | } | ||||||||
711 | } | ||||||||
712 | |||||||||
713 | #========================================================================== | ||||||||
714 | |||||||||
715 | sub delete_unknowns { | ||||||||
716 | 1 | 1 | 0 | 3 | my $tree = $_[0]; | ||||
717 | 1 | 3 | my $map_r = $tree->tagname_map; | ||||||
718 | 1 | 104 | delete @$map_r{keys %HTML::Tagset::isKnown}; | ||||||
719 | 1 | 8 | my($tag, $elements); | ||||||
720 | 1 | 6 | while(($tag,$elements) = each %$map_r) { | ||||||
721 | 0 | 0 | commentate($tree, join ", ", | ||||||
722 | "# Unknown \"$tag\" elements deleted: ", | ||||||||
723 | map $_->attr('id'), @$elements | ||||||||
724 | ); | ||||||||
725 | 0 | 0 | foreach my $e (@$elements) { $e->replace_with_content } | ||||||
0 | 0 | ||||||||
726 | } | ||||||||
727 | 1 | 3 | return; | ||||||
728 | } | ||||||||
729 | |||||||||
730 | #========================================================================== | ||||||||
731 | sub special_splice_div { | ||||||||
732 | 1 | 1 | 0 | 6 | foreach my $div ($_[0]->find_by_tag_name('div', 'iframe')) { | ||||
733 | 0 | 0 | $div->replace_with( | ||||||
734 | HTML::Element->new( 'br', | ||||||||
735 | 'id', '``G' . ++$counter), | ||||||||
736 | $div->content_list(), | ||||||||
737 | HTML::Element->new( 'br', | ||||||||
738 | 'id', '``G' . ++$counter), | ||||||||
739 | ); | ||||||||
740 | } | ||||||||
741 | 1 | 36 | return; | ||||||
742 | } | ||||||||
743 | |||||||||
744 | #========================================================================== | ||||||||
745 | |||||||||
746 | sub winge_about_phrasal_paradoxes { | ||||||||
747 | 1 | 1 | 0 | 3 | my $tree = $_[0]; | ||||
748 | 1 | 1 | my @non_phrasal_children; | ||||||
749 | 1 | 12 | foreach my $p (reverse $tree->find_by_tag_name(keys %Phrasal)) { | ||||||
750 | 1 | 238 | @non_phrasal_children = (); | ||||||
751 | 1 | 5 | foreach my $c ($p->content_list) { | ||||||
752 | 0 | 0 | 0 | 0 | push @non_phrasal_children, $c | ||||
753 | if ref $c and not $Phrasal{$c->tag}; | ||||||||
754 | } | ||||||||
755 | 1 | 50 | 56 | if(@non_phrasal_children) { | |||||
756 | 0 | 0 | my $tag = $p->tag; | ||||||
757 | 0 | 0 | commentate( $tree, | ||||||
758 | join '', | ||||||||
759 | " Deleting phrasal \"$tag\" element (", | ||||||||
760 | $p->attr('id'), | ||||||||
761 | ") because it has super-phrasal elements (", | ||||||||
762 | join(", ", | ||||||||
763 | map $_->attr('id'), @non_phrasal_children | ||||||||
764 | ), ") as children.", | ||||||||
765 | ) | ||||||||
766 | ; | ||||||||
767 | 0 | 0 | $p->replace_with_content; | ||||||
768 | } | ||||||||
769 | } | ||||||||
770 | 1 | 5 | return; | ||||||
771 | } | ||||||||
772 | |||||||||
773 | #========================================================================== | ||||||||
774 | |||||||||
775 | sub commentate { | ||||||||
776 | 0 | 0 | 0 | 0 | my $tree = shift; | ||||
777 | 0 | 0 | push @{ $tree->attr('_pod_comments') }, @_; | ||||||
0 | 0 | ||||||||
778 | 0 | 0 | return; | ||||||
779 | } | ||||||||
780 | |||||||||
781 | #========================================================================== | ||||||||
782 | |||||||||
783 | sub html_node_name { | ||||||||
784 | 1 | 1 | 0 | 8 | my $map_r = $_[0]->tagname_map; | ||||
785 | |||||||||
786 | 1 | 28 | my($name, $nodes); | ||||||
787 | 1 | 11 | while(($name, $nodes) = each %$map_r) { | ||||||
788 | 2 | 34 | my $counter = 0; | ||||||
789 | 2 | 5 | foreach my $node (@$nodes) { | ||||||
790 | 2 | 3 | ++$counter; | ||||||
791 | 2 | 33 | 14 | $node->attr('id', | |||||
792 | $node->attr('id') || ( '`' . $name . '_' . $counter ) | ||||||||
793 | ) | ||||||||
794 | ; | ||||||||
795 | } | ||||||||
796 | } | ||||||||
797 | |||||||||
798 | 1 | 27 | return; | ||||||
799 | } | ||||||||
800 | |||||||||
801 | sub pod_node_name { | ||||||||
802 | 1 | 1 | 0 | 4 | my $map_r = $_[0]->tagname_map; | ||||
803 | |||||||||
804 | 1 | 34 | my($name, $nodes); | ||||||
805 | 1 | 6 | while(($name, $nodes) = each %$map_r) { | ||||||
806 | 4 | 46 | my $counter = 0; | ||||||
807 | 4 | 7 | foreach my $node (@$nodes) { | ||||||
808 | 4 | 4 | ++$counter; | ||||||
809 | 4 | 21 | $node->attr('-pod-id', | ||||||
810 | $name . '_' . $counter | ||||||||
811 | ) | ||||||||
812 | ; | ||||||||
813 | } | ||||||||
814 | } | ||||||||
815 | |||||||||
816 | 1 | 16 | return; | ||||||
817 | } | ||||||||
818 | |||||||||
819 | #========================================================================== | ||||||||
820 | |||||||||
821 | sub render_headings { | ||||||||
822 | 1 | 1 | 0 | 2 | my $tree = $_[0]; | ||||
823 | 1 | 4 | my $map_r = $tree->tagname_map; | ||||||
824 | 1 | 47 | my @levels = sort grep m/^h[1-9]+$/s, keys %$map_r; | ||||||
825 | 1 | 3 | my @headings; | ||||||
826 | |||||||||
827 | 1 | 50 | 4 | if(@levels == 0) { # no headings!?! | |||||
828 | # TODO: insert something? | ||||||||
829 | } else { | ||||||||
830 | 1 | 50 | 4 | print "# Highest heading level: $levels[0] Making that =head1\n" | |||||
831 | if $Debug; | ||||||||
832 | 1 | 2 | foreach my $h (@{$map_r->{shift @levels}}) { | ||||||
1 | 3 | ||||||||
833 | 1 | 1 | push @headings, $h; | ||||||
834 | 1 | 4 | $h->attr('was-tag', $h->tag); | ||||||
835 | 1 | 26 | $h->attr('_tag', 'h1'); | ||||||
836 | } | ||||||||
837 | # And, for any sub-primary levels... | ||||||||
838 | 1 | 0 | 33 | 15 | print "# Lower levels: @levels. Making those =head2\n" | ||||
839 | if @levels and $Debug; | ||||||||
840 | 1 | 3 | foreach my $h (map @{$map_r->{$_}}, @levels) { | ||||||
0 | 0 | ||||||||
841 | 0 | 0 | push @headings, $h; | ||||||
842 | 0 | 0 | $h->attr('was-tag', $h->tag); | ||||||
843 | 0 | 0 | $h->attr('_tag', 'h2'); | ||||||
844 | } | ||||||||
845 | } | ||||||||
846 | |||||||||
847 | 1 | 3 | foreach my $h (@headings) { | ||||||
848 | 1 | 50 | 9 | if($h->parent->is_inside('h1', 'h2')) { | |||||
849 | # Don't put headings inside other headings. It's just stupid. | ||||||||
850 | 0 | 0 | $h->replace_with_content; | ||||||
851 | 0 | 0 | undef($h); | ||||||
852 | } | ||||||||
853 | } | ||||||||
854 | |||||||||
855 | 1 | 33 | foreach my $h (grep defined($_), @headings) { | ||||||
856 | 1 | 4 | my @c = $h->content_list; | ||||||
857 | 1 | 50 | 10 | if(!@c) { | |||||
50 | |||||||||
858 | 0 | 0 | $h->delete; | ||||||
859 | } elsif($c[0]->tag ne '~texticle') { | ||||||||
860 | 0 | 0 | $h->replace_with_content; | ||||||
861 | # Don't have things other than texticles in headings | ||||||||
862 | } else { | ||||||||
863 | 1 | 50 | 10 | if(@c > 1) { | |||||
864 | # promote all but the first element | ||||||||
865 | 0 | 0 | $h->detach_content; | ||||||
866 | 0 | 0 | $h->push_content(shift @c); | ||||||
867 | 0 | 0 | $h->postinsert(@c); | ||||||
868 | # SHOULD HAVE HAPPENED ANYWAY. | ||||||||
869 | } | ||||||||
870 | # else @c is just one element, a texticle -- which is ideal. | ||||||||
871 | 1 | 50 | 3 | commentate($tree, | |||||
872 | "# Icky: heading " . $h->attr('id') | ||||||||
873 | . " not immediately under body." | ||||||||
874 | ) unless $h->parent eq $tree; | ||||||||
875 | } | ||||||||
876 | } | ||||||||
877 | |||||||||
878 | 1 | 13 | return; | ||||||
879 | } | ||||||||
880 | |||||||||
881 | #-------------------------------------------------------------------------- | ||||||||
882 | |||||||||
883 | sub goodify_p_elements { | ||||||||
884 | 1 | 1 | 0 | 5 | foreach my $x ($_[0], $_[0]->find_by_tag_name('over', 'item')) { | ||||
885 | 1 | 44 | my $dirty; | ||||||
886 | 1 | 4 | my @children = $x->content_list; | ||||||
887 | |||||||||
888 | 1 | 9 | for(my $i = 0; $i < @children; ++$i) { | ||||||
889 | 1 | 50 | 3 | if($children[$i]->tag eq 'p') { | |||||
890 | 0 | 0 | my $p = $children[$i]; | ||||||
891 | 0 | 0 | my @p_content = $p->detach_content; | ||||||
892 | 0 | 0 | $p->delete; | ||||||
893 | 0 | 0 | $dirty = 1; | ||||||
894 | |||||||||
895 | # Replace the p in the list with its content, and update $i: | ||||||||
896 | 0 | 0 | splice @children, $i, 1, @p_content; | ||||||
897 | 0 | 0 | $i += scalar(@p_content) - 1; | ||||||
898 | # Properly, | ||||||||
899 | # Leaves $i alone if @p_content == 1. | ||||||||
900 | # Decrements $i if @p_content == 0. | ||||||||
901 | # Adds to $i appropriately for other sizes of @p_content. | ||||||||
902 | } | ||||||||
903 | } | ||||||||
904 | |||||||||
905 | 1 | 50 | 12 | if($dirty) { | |||||
906 | 0 | 0 | $x->detach_content; | ||||||
907 | 0 | 0 | $x->push_content(@children); | ||||||
908 | } | ||||||||
909 | } | ||||||||
910 | |||||||||
911 | 1 | 2 | my @c; | ||||||
912 | # /Try/ to delete all p's | ||||||||
913 | 1 | 4 | foreach my $p ($_[0]->find_by_tag_name('p')) { | ||||||
914 | 0 | 0 | @c = $p->content_list; | ||||||
915 | 0 | 0 | 0 | if(!@c) { | |||||
0 | |||||||||
916 | 0 | 0 | $p->delete; # always right? | ||||||
917 | |||||||||
918 | 0 | 0 | } elsif(@c == grep {; $_->tag eq '~texticle'} @c) { | ||||||
919 | #all texticles! | ||||||||
920 | 0 | 0 | $p->replace_with_content; | ||||||
921 | } else { | ||||||||
922 | 0 | 0 | |||||||
923 | "# Odd: content of p (", | ||||||||
924 | $p->attr('id'), | ||||||||
925 | ") is not all texticles: [", | ||||||||
926 | join(' ', map $_->tag, @c), "]\n" | ||||||||
927 | ; | ||||||||
928 | # Shouldn't happen, I think. | ||||||||
929 | } | ||||||||
930 | } | ||||||||
931 | |||||||||
932 | 1 | 40 | return; | ||||||
933 | } | ||||||||
934 | |||||||||
935 | #-------------------------------------------------------------------------- | ||||||||
936 | |||||||||
937 | sub promote_some_secondary_children { | ||||||||
938 | 1 | 1 | 0 | 9 | foreach my $x (reverse($_[0]->find_by_tag_name('item', 'h1' .. 'h6'))) { | ||||
939 | 1 | 66 | my @c = $x->content_list; | ||||||
940 | 1 | 50 | 20 | if(@c > 1) { | |||||
941 | # Take all children after the first, and move them up to | ||||||||
942 | # being right sisters of this node. | ||||||||
943 | 0 | 0 | 0 | ||||||
944 | "# Promote_some_secondary_children applies to ", | ||||||||
945 | $x->attr('id'), | ||||||||
946 | ": (", | ||||||||
947 | join(", ", map $_->attr('id'), @c), ")\n" if $Debug; | ||||||||
948 | 0 | 0 | $x->detach_content; | ||||||
949 | 0 | 0 | $x->push_content(shift @c); | ||||||
950 | 0 | 0 | $x->postinsert(@c); | ||||||
951 | #print "Done\n" if $Debug; | ||||||||
952 | } | ||||||||
953 | } | ||||||||
954 | #print "Returning\n" if $Debug; | ||||||||
955 | 1 | 23 | return; | ||||||
956 | } | ||||||||
957 | |||||||||
958 | sub literalize_text_under { | ||||||||
959 | # Traverse tree, turning text segments into ~literal pseudoelements | ||||||||
960 | 2 | 2 | 0 | 4 | my $node = $_[0]; | ||||
961 | 2 | 4 | my(@children, $dirty); | ||||||
962 | 2 | 9 | foreach my $c (@children = $node->content_list) { | ||||||
963 | 2 | 100 | 16 | if(ref $c) { | |||||
964 | 1 | 14 | literalize_text_under($c); | ||||||
965 | } else { | ||||||||
966 | 1 | 2 | $dirty = 1; | ||||||
967 | 1 | 15 | $c = HTML::Element->new('~literal', 'text' => $c, | ||||||
968 | 'id', '``G' . ++$counter); | ||||||||
969 | } | ||||||||
970 | } | ||||||||
971 | 2 | 100 | 49 | if($dirty) { | |||||
972 | 1 | 5 | $node->detach_content; | ||||||
973 | 1 | 13 | $node->push_content(@children); | ||||||
974 | } | ||||||||
975 | 2 | 18 | return; | ||||||
976 | } | ||||||||
977 | |||||||||
978 | #-------------------------------------------------------------------------- | ||||||||
979 | |||||||||
980 | sub texticulate { | ||||||||
981 | # group ~literals and phrasals into texticles | ||||||||
982 | # -- maximally high-and-merged phrasal/text groups | ||||||||
983 | 3 | 3 | 0 | 5 | my $node = $_[0]; | ||||
984 | 3 | 4 | my $dirty; | ||||||
985 | 3 | 9 | my(@children) = $node->content_list; | ||||||
986 | |||||||||
987 | #foreach my $c (@children) { | ||||||||
988 | # texticulate($c); | ||||||||
989 | #} | ||||||||
990 | |||||||||
991 | #print "Applying to $node = ", $node->tag, "\n"; | ||||||||
992 | |||||||||
993 | 3 | 100 | 19 | if(! $Phrasal{$node->tag}) { | |||||
994 | # Only non-phrasals can have texticles as children! | ||||||||
995 | 2 | 14 | my $last_tag; | ||||||
996 | 2 | 8 | for(my $i = 0; $i < @children; $i++) { | ||||||
997 | 2 | 9 | texticulate($children[$i]); # RECURSE! | ||||||
998 | 2 | 100 | 18 | next unless $Phrasal{$children[$i]->tag}; | |||||
999 | |||||||||
1000 | 1 | 50 | 33 | 18 | if($i == 0 | ||||
0 | |||||||||
1001 | or | ||||||||
1002 | !$Phrasal{ | ||||||||
1003 | $last_tag = $children[$i - 1]->tag | ||||||||
1004 | } | ||||||||
1005 | ) { | ||||||||
1006 | # start a new texticle group | ||||||||
1007 | 1 | 2 | $dirty = 1; | ||||||
1008 | 1 | 2 | my $old = $children[$i]; | ||||||
1009 | 1 | 7 | $children[$i] = HTML::Element->new('~texticle', | ||||||
1010 | 'id', '``G' . ++$counter); | ||||||||
1011 | 1 | 59 | $children[$i]->push_content($old); # and demote the phrasal to under it | ||||||
1012 | } elsif($last_tag eq '~texticle') { | ||||||||
1013 | # move this under preceding texticle | ||||||||
1014 | 0 | 0 | $dirty = 1; | ||||||
1015 | 0 | 0 | $children[$i - 1]->push_content( splice @children, $i, 1 ); | ||||||
1016 | 0 | 0 | --$i; | ||||||
1017 | } else { | ||||||||
1018 | 0 | 0 | die "SPORK 1231233312!"; | ||||||
1019 | } | ||||||||
1020 | } | ||||||||
1021 | |||||||||
1022 | #if(0) { | ||||||||
1023 | # foreach my $c (@children) { | ||||||||
1024 | # # Now fold the texticular content up | ||||||||
1025 | # if($c->tag eq '~texticle') { | ||||||||
1026 | # $c->attr('~folded' => [$c->detach_content]); | ||||||||
1027 | # } | ||||||||
1028 | # } | ||||||||
1029 | #} | ||||||||
1030 | } | ||||||||
1031 | |||||||||
1032 | # Now delete all br's! | ||||||||
1033 | # (Would it be better to delete BRs only adjacent to a texticle?) | ||||||||
1034 | 3 | 49 | for(my $i = 0; $i < @children; $i++) { | ||||||
1035 | 2 | 50 | 7 | if($children[$i]->tag eq 'br') { | |||||
1036 | 0 | 0 | splice @children, $i, 1; | ||||||
1037 | 0 | 0 | --$i; | ||||||
1038 | 0 | 0 | $dirty = 1; | ||||||
1039 | } | ||||||||
1040 | } | ||||||||
1041 | # So, the only purpose/effect of BRs is that they serve as barriers | ||||||||
1042 | # to unifying adjacent phrasal elements under a common texticle. | ||||||||
1043 | # Once we've unified things, we just delete them from the tree. | ||||||||
1044 | |||||||||
1045 | 3 | 100 | 23 | if($dirty) { | |||||
1046 | 1 | 3 | $node->detach_content; | ||||||
1047 | 1 | 8 | $node->push_content(@children); | ||||||
1048 | } | ||||||||
1049 | } | ||||||||
1050 | |||||||||
1051 | #========================================================================== | ||||||||
1052 | |||||||||
1053 | sub remap_tags { | ||||||||
1054 | 1 | 1 | 0 | 3 | my($tree, $hr) = @_; | ||||
1055 | 1 | 50 | 33 | 11 | die unless $hr and ref($hr) eq 'HASH'; | ||||
1056 | 1 | 133 | my($recursor, $tag); | ||||||
1057 | $recursor = sub { | ||||||||
1058 | 2 | 2 | 10 | foreach my $c ($_[0]->content_list) { | |||||
1059 | 2 | 100 | 67 | if(ref $c) { | |||||
1060 | 1 | 50 | 33 | 4 | if(($tag = $c->tag) and defined $tag and exists $hr->{$tag}) { | ||||
33 | |||||||||
1061 | 0 | 0 | $c->attr('_tag', $hr->{$tag}); | ||||||
1062 | } | ||||||||
1063 | 1 | 19 | $recursor->($c); # recurse! | ||||||
1064 | } | ||||||||
1065 | } | ||||||||
1066 | 2 | 31 | return; | ||||||
1067 | 1 | 10 | }; | ||||||
1068 | |||||||||
1069 | 1 | 3 | $recursor->($tree); # Run the recursion. | ||||||
1070 | |||||||||
1071 | 1 | 2 | undef $recursor; # So the lambda's refcount can hit 0, and can GC. | ||||||
1072 | 1 | 6 | return; | ||||||
1073 | } | ||||||||
1074 | |||||||||
1075 | #-------------------------------------------------------------------------- | ||||||||
1076 | |||||||||
1077 | sub wrangle_body_children { | ||||||||
1078 | 0 | 0 | 0 | 0 | my $tree = $_[0]; | ||||
1079 | 0 | 0 | my @children = $tree->content_list; | ||||||
1080 | 0 | 0 | my $dirty = 0; | ||||||
1081 | |||||||||
1082 | 0 | 0 | my $c; | ||||||
1083 | 0 | 0 | $tree->normalize_content; # NB: doesn't recurse | ||||||
1084 | |||||||||
1085 | 0 | 0 | for(my $i = 0; $i < @children; ++$i) { | ||||||
1086 | 0 | 0 | my $c = $children[$i]; | ||||||
1087 | 0 | 0 | 0 | if(!ref($c)) { | |||||
1088 | # put under a new p | ||||||||
1089 | 0 | 0 | $dirty = 1; | ||||||
1090 | ( | ||||||||
1091 | 0 | 0 | $children[$i] = HTML::Element->new('p', 'superimplicit' => 1, | ||||||
1092 | 'id', '``G' . ++$counter | ||||||||
1093 | ) | ||||||||
1094 | )->push_content($c); | ||||||||
1095 | #} elsif($c->tag eq 'hr') { | ||||||||
1096 | # # do anything special? | ||||||||
1097 | } | ||||||||
1098 | } | ||||||||
1099 | |||||||||
1100 | 0 | 0 | 0 | if($dirty) { | |||||
1101 | 0 | 0 | $tree->detach_content; | ||||||
1102 | 0 | 0 | $tree->push_content(@children); | ||||||
1103 | } | ||||||||
1104 | |||||||||
1105 | 0 | 0 | return; | ||||||
1106 | } | ||||||||
1107 | |||||||||
1108 | #-------------------------------------------------------------------------- | ||||||||
1109 | |||||||||
1110 | sub lists_render { # Recursive. | ||||||||
1111 | 2 | 2 | 0 | 19 | my $node = $_[0]; | ||||
1112 | 2 | 4 | my $tag; | ||||||
1113 | 2 | 50 | 33 | 6 | if(($tag = $node->tag) eq 'ul' or $tag eq 'menu') { | ||||
50 | |||||||||
50 | |||||||||
50 | |||||||||
1114 | 0 | 0 | $node->attr('was-tag', $tag); | ||||||
1115 | 0 | 0 | $node->attr('_tag', 'over'); | ||||||
1116 | 0 | 0 | foreach my $c ($node->content_list) { | ||||||
1117 | 0 | 0 | 0 | 0 | next unless ref($c) and $c->tag eq 'li'; | ||||
1118 | 0 | 0 | $c->attr('_tag', 'item'); | ||||||
1119 | 0 | 0 | $c->unshift_content('* '); | ||||||
1120 | # TODO: support bullet types other than this? | ||||||||
1121 | } | ||||||||
1122 | |||||||||
1123 | } elsif($tag eq 'ol') { | ||||||||
1124 | 0 | 0 | $node->attr('was-tag', $tag); | ||||||
1125 | 0 | 0 | $node->attr('_tag', 'over'); | ||||||
1126 | 0 | 0 | my $x = 0; | ||||||
1127 | 0 | 0 | foreach my $c ($node->content_list) { | ||||||
1128 | 0 | 0 | 0 | 0 | next unless ref($c) and $c->tag eq 'li'; | ||||
1129 | 0 | 0 | $c->attr('_tag', 'item'); | ||||||
1130 | 0 | 0 | $c->unshift_content(++$x . '. '); | ||||||
1131 | # TODO: support number styles other than this? | ||||||||
1132 | } | ||||||||
1133 | |||||||||
1134 | } elsif($tag eq 'dl') { | ||||||||
1135 | 0 | 0 | $node->attr('was-tag', $tag); | ||||||
1136 | 0 | 0 | $node->attr('_tag', 'over'); | ||||||
1137 | 0 | 0 | my $tag; | ||||||
1138 | 0 | 0 | foreach my $c ($node->content_list) { | ||||||
1139 | 0 | 0 | 0 | next unless ref($c); | |||||
1140 | 0 | 0 | 0 | if(($tag = $c->tag) eq 'dt') { | |||||
0 | |||||||||
1141 | 0 | 0 | $c->attr('was-tag', $tag); | ||||||
1142 | 0 | 0 | $c->attr('_tag', 'item'); | ||||||
1143 | } elsif($tag eq 'dd') { | ||||||||
1144 | 0 | 0 | $c->attr('was-tag', $tag); | ||||||
1145 | 0 | 0 | $c->attr('_tag', 'item'); | ||||||
1146 | # Altho really, earlier on, we will have turned all dd's into p's! | ||||||||
1147 | # This code is here just in case we decide that that wasn't | ||||||||
1148 | # such a hot idea. | ||||||||
1149 | # Instead of turning dd's into items, consider replacing with | ||||||||
1150 | # content, with a br on each side? Or too late for that? | ||||||||
1151 | } | ||||||||
1152 | # else just moooove along | ||||||||
1153 | } | ||||||||
1154 | |||||||||
1155 | } elsif($tag eq 'blockquote') { # not really a list, but hey. | ||||||||
1156 | 0 | 0 | $node->attr('was-tag', $tag); | ||||||
1157 | 0 | 0 | $node->attr('_tag', 'over'); | ||||||
1158 | } | ||||||||
1159 | |||||||||
1160 | # In any case, recurse... | ||||||||
1161 | 2 | 36 | foreach my $c ($node->content_list) { | ||||||
1162 | 2 | 100 | 19 | lists_render($c) if ref $c; | |||||
1163 | } | ||||||||
1164 | } | ||||||||
1165 | |||||||||
1166 | #-------------------------------------------------------------------------- | ||||||||
1167 | |||||||||
1168 | sub br_render { | ||||||||
1169 | # render BRs. | ||||||||
1170 | |||||||||
1171 | # TODO: anything necessary? | ||||||||
1172 | |||||||||
1173 | 1 | 1 | 0 | 2 | return; | ||||
1174 | } | ||||||||
1175 | |||||||||
1176 | |||||||||
1177 | sub hr_render { | ||||||||
1178 | 1 | 1 | 0 | 2 | my $tree = $_[0]; | ||||
1179 | 1 | 2 | my $alt; | ||||||
1180 | 1 | 4 | foreach my $hr ($tree->find_by_tag_name('hr')) { | ||||||
1181 | 0 | 0 | 0 | if($hr->parent->tag eq 'body') { | |||||
1182 | # Special sauce. SPECIAL SAUCE! | ||||||||
1183 | 0 | 0 | $hr->attr('_tag', 'p'); | ||||||
1184 | 0 | 0 | $hr->attr('was-tag', 'hr'); | ||||||
1185 | 0 | 0 | $hr->push_content('----'); | ||||||
1186 | } else { | ||||||||
1187 | 0 | 0 | $hr->replace_with( | ||||||
1188 | $hr->new('br', 'was-tag' => 'hr', 'id' => '``G' . ++$counter), | ||||||||
1189 | '----', | ||||||||
1190 | $hr->new('br', 'was-tag' => 'hr', 'id' => '``G' . ++$counter), | ||||||||
1191 | ); | ||||||||
1192 | } | ||||||||
1193 | } | ||||||||
1194 | 1 | 29 | return; | ||||||
1195 | } | ||||||||
1196 | |||||||||
1197 | |||||||||
1198 | sub pre_render { | ||||||||
1199 | 1 | 1 | 0 | 2 | my $tree = $_[0]; | ||||
1200 | 1 | 4 | foreach my $p ($tree->find_by_tag_name('pre')) { | ||||||
1201 | # Delete left or right ignorable WS nodes... | ||||||||
1202 | { | ||||||||
1203 | 0 | 0 | my $left = $p->left; | ||||||
0 | 0 | ||||||||
1204 | #print "Left of $p is $left\n"; | ||||||||
1205 | 0 | 0 | 0 | 0 | if(defined $left and !ref $left and $left =~ m<^\s*$>s) { | ||||
0 | |||||||||
1206 | # all nil or WS. | ||||||||
1207 | #print "Delendum left at", $p->attr('id') || $p->address, "!\n"; | ||||||||
1208 | 0 | 0 | $p->parent->splice_content($p->pindex - 1, 1); # delete preceding WS. | ||||||
1209 | } | ||||||||
1210 | } | ||||||||
1211 | { | ||||||||
1212 | 0 | 0 | my $right = $p->right; | ||||||
0 | 0 | ||||||||
1213 | 0 | 0 | 0 | 0 | if(defined $right and !ref $right and $right =~ m<^\s*$>s) { | ||||
0 | |||||||||
1214 | # all nil or WS. | ||||||||
1215 | #print "Delendum right at", $p->attr('id') || $p->address, "!\n"; | ||||||||
1216 | 0 | 0 | $p->parent->splice_content($p->pindex + 1, 1); # delete following WS. | ||||||
1217 | } | ||||||||
1218 | } | ||||||||
1219 | |||||||||
1220 | # Now acually render, simply... | ||||||||
1221 | 0 | 0 | my $text_content = $p->as_text; | ||||||
1222 | 0 | 0 | 0 | unless($text_content =~ m/\S+/) { | |||||
1223 | 0 | 0 | $p->delete; | ||||||
1224 | 0 | 0 | next; | ||||||
1225 | } | ||||||||
1226 | |||||||||
1227 | 0 | 0 | $text_content =~ s/^\n+//s; # Kill leading newlines | ||||||
1228 | 0 | 0 | $text_content =~ s/\n+$//s; # Kill trailing newlines | ||||||
1229 | |||||||||
1230 | 0 | 0 | my $left = $p->left; | ||||||
1231 | 0 | 0 | 0 | 0 | if($left and ref($left) and $left->tag eq 'pre') { | ||||
0 | |||||||||
1232 | # prepend to the immediately preceding pre's content | ||||||||
1233 | 0 | 0 | ${ | ||||||
1234 | 0 | 0 | $left->attr('~pre_content_r') | ||||||
1235 | } .= "\n" . $text_content; | ||||||||
1236 | 0 | 0 | $p->delete; | ||||||
1237 | } else { | ||||||||
1238 | 0 | 0 | $p->delete_content; | ||||||
1239 | 0 | 0 | $p->attr('~pre_content_r', \$text_content); | ||||||
1240 | #print "Pre content [[",$text_content,"]]\n"; | ||||||||
1241 | } | ||||||||
1242 | } | ||||||||
1243 | 1 | 26 | return; | ||||||
1244 | } | ||||||||
1245 | |||||||||
1246 | sub q_render { | ||||||||
1247 | 1 | 1 | 0 | 2 | my $tree = $_[0]; | ||||
1248 | 1 | 4 | foreach my $q ($tree->find_by_tag_name('q')) { | ||||||
1249 | 0 | 0 | $q->push_content('"'); | ||||||
1250 | 0 | 0 | $q->unshift_content('"'); | ||||||
1251 | 0 | 0 | $q->replace_with_content; | ||||||
1252 | } | ||||||||
1253 | 1 | 25 | return; | ||||||
1254 | } | ||||||||
1255 | |||||||||
1256 | sub images_render { | ||||||||
1257 | 1 | 1 | 0 | 13 | my $tree = $_[0]; | ||||
1258 | 1 | 15 | foreach my $img ($tree->find_by_tag_name('img')) { | ||||||
1259 | 0 | 0 | my $alt; | ||||||
1260 | 0 | 0 | 0 | if(defined($alt = $img->attr('alt'))) { | |||||
1261 | 0 | 0 | $img->replace_with($alt); | ||||||
1262 | } else { | ||||||||
1263 | 0 | 0 | 0 | $img->replace_with( | |||||
1264 | $Debug ? | ||||||||
1265 | ('[IMAGE' . $img->attr('id') . ']') : | ||||||||
1266 | '[IMAGE]' | ||||||||
1267 | ); | ||||||||
1268 | #?? $img->delete; | ||||||||
1269 | } | ||||||||
1270 | } | ||||||||
1271 | 1 | 27 | return; | ||||||
1272 | } | ||||||||
1273 | |||||||||
1274 | #-------------------------------------------------------------------------- | ||||||||
1275 | |||||||||
1276 | sub prune_by_tag_name { | ||||||||
1277 | 1 | 1 | 0 | 2 | my($tree, @o) = @_; | ||||
1278 | 1 | 3 | foreach my $o (@o) { | ||||||
1279 | 2 | 50 | 82 | foreach my $x ($tree->find_by_tag_name(ref $o ? @$o : $o)) { | |||||
1280 | 0 | 0 | $x->delete; | ||||||
1281 | } | ||||||||
1282 | } | ||||||||
1283 | 1 | 102 | return; | ||||||
1284 | } | ||||||||
1285 | |||||||||
1286 | sub splice_by_tag_name { | ||||||||
1287 | 1 | 1 | 0 | 3 | my($tree, @o) = @_; | ||||
1288 | 1 | 3 | foreach my $o (@o) { | ||||||
1289 | 1 | 50 | 7 | foreach my $x ($tree->find_by_tag_name(ref $o ? @$o : $o)) { | |||||
1290 | 0 | 0 | $x->replace_with_content; | ||||||
1291 | } | ||||||||
1292 | } | ||||||||
1293 | 1 | 144 | return; | ||||||
1294 | } | ||||||||
1295 | |||||||||
1296 | #-------------------------------------------------------------------------- | ||||||||
1297 | sub tree_as_pod { | ||||||||
1298 | 1 | 1 | 0 | 2 | my $tree = $_[0]; | ||||
1299 | |||||||||
1300 | 1 | 2 | my @lines; | ||||||
1301 | 1 | 3 | my $comments = $tree->attr('_pod_comments'); | ||||||
1302 | |||||||||
1303 | 1 | 11 | my $bender; | ||||||
1304 | |||||||||
1305 | $bender = sub { | ||||||||
1306 | 3 | 3 | 6 | my(@post, $node); | |||||
1307 | 3 | 9 | my $tag = ($node = $_[0])->tag; | ||||||
1308 | |||||||||
1309 | 3 | 100 | 35 | if($tag eq 'body') { | |||||
50 | |||||||||
50 | |||||||||
50 | |||||||||
100 | |||||||||
50 | |||||||||
50 | |||||||||
1310 | # no-op | ||||||||
1311 | } elsif($tag eq 'pre') { | ||||||||
1312 | 0 | 0 | push @lines, ${$node->attr('~pre_content_r')}; | ||||||
0 | 0 | ||||||||
1313 | 0 | 0 | 0 | $lines[-1] =~ s/^/ /gm if $lines[-1] =~ m/^\S/m; | |||||
1314 | # bump everything over if there's any lines that start with | ||||||||
1315 | # anything non-spaceys | ||||||||
1316 | 0 | 0 | while($lines[-1] =~ s/\n\n/\n \n/) { } | ||||||
1317 | # have there be no zero-length lines. | ||||||||
1318 | } elsif($tag eq 'over') { | ||||||||
1319 | 0 | 0 | push @lines, "=over"; | ||||||
1320 | 0 | 0 | push @post, "=back"; | ||||||
1321 | } elsif($tag eq 'item') { | ||||||||
1322 | 0 | 0 | push @lines, "=item"; | ||||||
1323 | } elsif($tag eq 'h1') { | ||||||||
1324 | 1 | 13 | push @lines, "=head1"; | ||||||
1325 | } elsif($tag eq 'h2') { | ||||||||
1326 | 0 | 0 | push @lines, "=head2"; | ||||||
1327 | } elsif($tag eq '~texticle') { | ||||||||
1328 | 1 | 4 | my $text = render_texticle($tree,$node); | ||||||
1329 | 1 | 4 | $text =~ s/^\s+//s; | ||||||
1330 | 1 | 3 | $text =~ s/\s+$//s; | ||||||
1331 | 1 | 2 | $text =~ s/^=/E<61>/s; | ||||||
1332 | # So that this can't be mistaken for a directive -- on the | ||||||||
1333 | # off chance that text content starts with a '=' | ||||||||
1334 | |||||||||
1335 | #$text = "{$text}"; | ||||||||
1336 | |||||||||
1337 | 1 | 50 | 33 | 25 | if( | ||||
33 | |||||||||
33 | |||||||||
1338 | @lines and | ||||||||
1339 | $lines[-1] =~ m/^=(\w{1,10})$/s and | ||||||||
1340 | ( $1 eq 'item' or $1 eq 'head1' or $1 eq 'head2' ) | ||||||||
1341 | ) { | ||||||||
1342 | # Merge this text with the directive: | ||||||||
1343 | 1 | 3 | $text = pop(@lines) . ' ' . $text; | ||||||
1344 | } | ||||||||
1345 | |||||||||
1346 | 1 | 5 | push @lines, wrap72_dammit($text); | ||||||
1347 | 1 | 4 | $lines[-1] =~ s/\s+$//s; # Make REALLY sure there's no tailing WS | ||||||
1348 | 1 | 50 | 4 | pop @lines unless length $lines[-1]; # Sanity check. | |||||
1349 | |||||||||
1350 | 1 | 4 | return; | ||||||
1351 | # Don't recurse under texticles (because nothing should be there!) | ||||||||
1352 | } else { | ||||||||
1353 | 0 | 0 | 0 | print "unrenderable element \"$tag\" in phrasal-pass\n" if $Debug; | |||||
1354 | } | ||||||||
1355 | |||||||||
1356 | 2 | 6 | foreach my $c ($node->content_list) { | ||||||
1357 | 2 | 23 | $bender->($c); | ||||||
1358 | } | ||||||||
1359 | |||||||||
1360 | 2 | 4 | push @lines, @post; | ||||||
1361 | 2 | 5 | return; | ||||||
1362 | 1 | 7 | }; | ||||||
1363 | 1 | 3 | $bender->($tree); | ||||||
1364 | 1 | 1 | undef $bender; | ||||||
1365 | |||||||||
1366 | 1 | 50 | 33 | 24 | unshift @lines, "=pod" unless @lines and $lines[0] =~ m<^=>s; | ||||
1367 | |||||||||
1368 | 1 | 3 | push @lines, "=cut\n\n"; # get extra double-newline at end | ||||||
1369 | |||||||||
1370 | 1 | 4 | my $pod = join "\n\n", @lines; | ||||||
1371 | |||||||||
1372 | 1 | 50 | 33 | 11 | if($comments and @$comments) { | ||||
1373 | 1 | 2 | foreach my $c (@$comments) { | ||||||
1374 | 5 | 11 | $c =~ tr<\cm\cj>< >s; | ||||||
1375 | 5 | 100 | 22 | $c = "#" . $c unless $c =~ m<^\s*#>s; | |||||
1376 | } | ||||||||
1377 | 1 | 5 | $pod .= join "\n", @$comments, ''; | ||||||
1378 | } | ||||||||
1379 | |||||||||
1380 | 1 | 50 | 20 | sleep(0), print("#Start pod\n\n$pod\n"), sleep(0) if $Debug > 1; | |||||
1381 | 1 | 5 | return \$pod; | ||||||
1382 | } | ||||||||
1383 | |||||||||
1384 | #-------------------------------------------------------------------------- | ||||||||
1385 | sub render_texticle { | ||||||||
1386 | 1 | 1 | 0 | 3 | my($tree, $t) = @_; | ||||
1387 | 1 | 2 | my $text = ''; | ||||||
1388 | 1 | 2 | my $bender; | ||||||
1389 | |||||||||
1390 | 1 | 3 | my $a_name = $tree->attr('_a_name'); | ||||||
1391 | 1 | 17 | my $a_href = $tree->attr('_a_href'); | ||||||
1392 | |||||||||
1393 | 1 | 9 | my $under_l_count = 0; | ||||||
1394 | $bender = sub { | ||||||||
1395 | 2 | 2 | 7 | my $tag = (my $node = $_[0])->tag; | |||||
1396 | 2 | 11 | my $post = '>'; | ||||||
1397 | 2 | 3 | my $decr_under_l_count_post = 0; | ||||||
1398 | 2 | 100 | 7 | if($tag eq '~texticle') { | |||||
50 | |||||||||
0 | |||||||||
0 | |||||||||
0 | |||||||||
0 | |||||||||
1399 | # no-op -- just a container | ||||||||
1400 | 1 | 2 | $post = ''; | ||||||
1401 | } elsif($tag eq '~literal') { | ||||||||
1402 | 1 | 4 | my $content = $node->attr('text'); | ||||||
1403 | #print "Text from ~literal : ", $node->attr('text'), "\n"; | ||||||||
1404 | 1 | 50 | 45 | $content =~ s/\Q$nbsp/ /og if defined $nbsp; | |||||
1405 | # Kill nbsps. Why? | ||||||||
1406 | # First off, most of them are lame editor artifacts. | ||||||||
1407 | # Second off, actually treating them correctly (with S<...>) | ||||||||
1408 | # would be a real pain. | ||||||||
1409 | |||||||||
1410 | 1 | 50 | 4 | if($under_l_count) { | |||||
1411 | 0 | 0 | encode_entities_harder($content); | ||||||
1412 | } else { | ||||||||
1413 | 1 | 3 | encode_entities($content); | ||||||
1414 | } | ||||||||
1415 | #if(defined $E_slash) { | ||||||||
1416 | # # Delete at least most of the optional E |
||||||||
1417 | # while( $content =~ s{^([^<>]*)\Q$E_slash\E}{$1/}so ) {} | ||||||||
1418 | # while( $content =~ s{\Q$E_slash\E([^<>]*)$}{/$1}so ) {} | ||||||||
1419 | #} | ||||||||
1420 | #if(defined $E_vbar) { | ||||||||
1421 | # # Delete at least most of the optional E |
||||||||
1422 | # while( $content =~ s{^([^<>]*)\Q$E_vbar\E}{$1|}so ) {} | ||||||||
1423 | # while( $content =~ s{\Q$E_vbar\E([^<>]*)$}{|$1}so ) {} | ||||||||
1424 | #} | ||||||||
1425 | 1 | 50 | 4 | print "\$text is undef?" unless defined $content; | |||||
1426 | 1 | 2 | $text .= $content; | ||||||
1427 | 1 | 2 | $post = ''; | ||||||
1428 | } elsif($tag eq 'code') { | ||||||||
1429 | 0 | 0 | $text .= 'C<'; | ||||||
1430 | } elsif($tag eq 'i') { | ||||||||
1431 | 0 | 0 | $text .= 'I<'; | ||||||
1432 | } elsif($tag eq 'b') { | ||||||||
1433 | 0 | 0 | $text .= 'B<'; | ||||||
1434 | } elsif($tag eq 'a') { | ||||||||
1435 | 0 | 0 | my($name, $href); | ||||||
1436 | 0 | 0 | 0 | $name = $a_name ? $node->attr('name') : undef; | |||||
1437 | 0 | 0 | 0 | $href = $a_href ? $node->attr('href') : undef; | |||||
1438 | 0 | 0 | $post = ''; | ||||||
1439 | |||||||||
1440 | 0 | 0 | 0 | 0 | if(defined $name and length $name) { | ||||
1441 | 0 | 0 | $text .= 'X<' . $name . '>'; | ||||||
1442 | } | ||||||||
1443 | |||||||||
1444 | 0 | 0 | 0 | 0 | if(defined $href and length $href) { | ||||
1445 | 0 | 0 | encode_entities($href); | ||||||
1446 | #print "{Link text:{$href}}\n"; | ||||||||
1447 | 0 | 0 | 0 | if($href =~ s/^#//s) { | |||||
0 | |||||||||
0 | |||||||||
1448 | # internal relative href | ||||||||
1449 | 0 | 0 | $text .= 'L<'; | ||||||
1450 | 0 | 0 | $post .= "|/$href>"; | ||||||
1451 | 0 | 0 | ++$under_l_count; | ||||||
1452 | 0 | 0 | $decr_under_l_count_post = 1; | ||||||
1453 | } elsif($href =~ s/^pod://s) { | ||||||||
1454 | # Pass that thru. | ||||||||
1455 | # A back door for making straightforward pod links. | ||||||||
1456 | 0 | 0 | $text .= 'L<'; | ||||||
1457 | 0 | 0 | $post .= "|$href>"; | ||||||
1458 | 0 | 0 | ++$under_l_count; | ||||||
1459 | 0 | 0 | $decr_under_l_count_post = 1; | ||||||
1460 | } elsif($href =~ m<^[-+.a-z0-9A-Z]+\:[^:]>s) { | ||||||||
1461 | # It matches RFC 1738's idea of an absolute URL. | ||||||||
1462 | # Pass it thru: the podulator should detect that it's a URL | ||||||||
1463 | # and handle appropriately. | ||||||||
1464 | 0 | 0 | $post .= " ($href)"; | ||||||
1465 | } else { | ||||||||
1466 | # a relative link?? | ||||||||
1467 | 0 | 0 | $href = $href; | ||||||
1468 | 0 | 0 | commentate($t->root, "# Untranslatable link: \"$href\""); | ||||||
1469 | } | ||||||||
1470 | } | ||||||||
1471 | } else { | ||||||||
1472 | 0 | 0 | print "Unrenderable sub-phrasal element $tag: ignoring\n"; | ||||||
1473 | 0 | 0 | $post = ''; | ||||||
1474 | } | ||||||||
1475 | |||||||||
1476 | # Recurse! | ||||||||
1477 | 2 | 8 | foreach my $c ($node->content_list) { | ||||||
1478 | 1 | 24 | $bender->($c); | ||||||
1479 | } | ||||||||
1480 | |||||||||
1481 | # Now, post-order things: | ||||||||
1482 | |||||||||
1483 | 2 | 9 | $text .= $post; | ||||||
1484 | 2 | 50 | 6 | $under_l_count-- if $decr_under_l_count_post; | |||||
1485 | 2 | 5 | return; | ||||||
1486 | 1 | 8 | }; | ||||||
1487 | 1 | 3 | $bender->($t); | ||||||
1488 | 1 | 2 | undef $bender; | ||||||
1489 | |||||||||
1490 | 1 | 21 | $text =~ s/\s+/ /g; | ||||||
1491 | |||||||||
1492 | # A weensy bit of cleanup: | ||||||||
1493 | 1 | 3 | $text =~ s/ ?> ?$/>/s; | ||||||
1494 | 1 | 3 | $text =~ s/^((?:\w<)+) ([^>])/$1$2/; | ||||||
1495 | |||||||||
1496 | #print "{$text}\n"; | ||||||||
1497 | |||||||||
1498 | 1 | 3 | return $text; | ||||||
1499 | } | ||||||||
1500 | |||||||||
1501 | #-------------------------------------------------------------------------- | ||||||||
1502 | sub COLMAX () {72} | ||||||||
1503 | |||||||||
1504 | sub wrap72_dammit { | ||||||||
1505 | # All because Text::Wrap::wrap DIES when it hits an unwrappably | ||||||||
1506 | # large text chunk, DAMMIT. | ||||||||
1507 | |||||||||
1508 | # So this is a stupid wrapper: knows nothing about tabs or anything. | ||||||||
1509 | 1 | 1 | 0 | 3 | my $text = ''; | ||||
1510 | 1 | 2 | my $col = 0; | ||||||
1511 | 1 | 12 | foreach my $w (split /\s+/, $_[0]) { | ||||||
1512 | 2 | 50 | 7 | next unless length $w; | |||||
1513 | 2 | 50 | 8 | if(length($w) >= COLMAX) { | |||||
50 | |||||||||
1514 | # Unwrappably large chunk. | ||||||||
1515 | 0 | 0 | 0 | if($col) { | |||||
1516 | 0 | 0 | $text .= "\n$w\n"; | ||||||
1517 | } else { | ||||||||
1518 | 0 | 0 | $text .= "$w\n"; | ||||||
1519 | } | ||||||||
1520 | 0 | 0 | $col = 0; | ||||||
1521 | } elsif ((1 + $col + length $w) < COLMAX) { | ||||||||
1522 | # The word will fit on /this/ line | ||||||||
1523 | 2 | 100 | 5 | if($col) { | |||||
1524 | 1 | 4 | $text .= " $w"; | ||||||
1525 | 1 | 3 | $col += 1 + length $w; | ||||||
1526 | } else { | ||||||||
1527 | 1 | 3 | $text .= $w ; | ||||||
1528 | 1 | 2 | $col += length $w; | ||||||
1529 | } | ||||||||
1530 | } else { | ||||||||
1531 | # Start a new line | ||||||||
1532 | 0 | 0 | 0 | if($col) { | |||||
1533 | 0 | 0 | $text .= "\n$w"; | ||||||
1534 | } else { | ||||||||
1535 | 0 | 0 | $text .= $w; # never applies? | ||||||
1536 | } | ||||||||
1537 | 0 | 0 | $col = length $w; | ||||||
1538 | } | ||||||||
1539 | } | ||||||||
1540 | 1 | 4 | $text =~ s/\n+$//s; # nix and trailing newlines | ||||||
1541 | |||||||||
1542 | 1 | 3 | return $text; | ||||||
1543 | } | ||||||||
1544 | |||||||||
1545 | |||||||||
1546 | #========================================================================== | ||||||||
1547 | # Adapted from Gisle Aas's HTML::Entities::encode_entities: | ||||||||
1548 | |||||||||
1549 | sub encode_entities { | ||||||||
1550 | 1 | 1 | 0 | 4 | $_[0] =~ s/([^\n\t !-;=?-~])/$Char2ent{$1}/g; | ||||
1551 | # Encode control chars, high bit chars and '<' and '>' | ||||||||
1552 | 1 | 3 | return; | ||||||
1553 | } | ||||||||
1554 | |||||||||
1555 | sub encode_entities_harder { | ||||||||
1556 | 4 | 4 | 0 | 31 | $_[0] =~ s/([^\n\t !\#\$%\'-.0-=?-{}~])/$Char2ent{$1}/g; | ||||
1557 | # Encode control chars, high bit chars and '<', '&', '>', '"', | ||||||||
1558 | # '|', '/' | ||||||||
1559 | 4 | 8 | return; | ||||||
1560 | } | ||||||||
1561 | |||||||||
1562 | #-------------------------------------------------------------------------- | ||||||||
1563 | |||||||||
1564 | __END__ |