File Coverage

blib/lib/Pod/Simple/HTML.pm
Criterion Covered Total %
statement 288 359 80.2
branch 123 180 68.3
condition 66 121 54.5
subroutine 36 39 92.3
pod 1 31 3.2
total 514 730 70.4


line stmt bran cond sub pod time code
1             package Pod::Simple::HTML;
2 7     7   260134 use strict;
  7         16  
  7         275  
3 7     7   44 use warnings;
  7         11  
  7         322  
4 7     7   3738 use Pod::Simple::PullParser ();
  7         19  
  7         684  
5             our @ISA = ('Pod::Simple::PullParser');
6             our $VERSION = '3.48';
7             BEGIN {
8 7 50   7   38 if(defined &DEBUG) { } # no-op
    50          
9 7         40806 elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
10 0         0 else { *DEBUG = sub () {0}; }
11             }
12              
13             our $Doctype_decl ||= ''; # No. Just No. Don't even ask me for it.
14             # qq{
15             # "http://www.w3.org/TR/html4/loose.dtd">\n};
16              
17             our $Content_decl ||=
18             q{};
19              
20             our $HTML_EXTENSION;
21             $HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
22             our $Computerese;
23             $Computerese = "" unless defined $Computerese;
24             our $LamePad;
25             $LamePad = '' unless defined $LamePad;
26              
27             our $Linearization_Limit;
28             $Linearization_Limit = 120 unless defined $Linearization_Limit;
29             # headings/items longer than that won't get an
30             our $Perldoc_URL_Prefix;
31             $Perldoc_URL_Prefix = 'https://metacpan.org/pod/'
32             unless defined $Perldoc_URL_Prefix;
33             our $Perldoc_URL_Postfix;
34             $Perldoc_URL_Postfix = ''
35             unless defined $Perldoc_URL_Postfix;
36              
37              
38             our $Man_URL_Prefix = 'https://man7.org/linux/man-pages/man';
39             our $Man_URL_Postfix = '.html';
40              
41             our $Title_Prefix;
42             $Title_Prefix = '' unless defined $Title_Prefix;
43             our $Title_Postfix;
44             $Title_Postfix = '' unless defined $Title_Postfix;
45             our %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
46             # 'item-text' stuff in the index doesn't quite work, and may
47             # not be a good idea anyhow.
48              
49              
50             __PACKAGE__->_accessorize(
51             'perldoc_url_prefix',
52             # In turning L into http://whatever/Foo%3a%3aBar, what
53             # to put before the "Foo%3a%3aBar".
54             # (for singleton mode only?)
55             'perldoc_url_postfix',
56             # what to put after "Foo%3a%3aBar" in the URL. Normally "".
57              
58             'man_url_prefix',
59             # In turning L into http://whatever/man/1/crontab, what
60             # to put before the "1/crontab".
61             'man_url_postfix',
62             # what to put after the "1/crontab" in the URL. Normally ".html".
63              
64             'batch_mode', # whether we're in batch mode
65             'batch_mode_current_level',
66             # When in batch mode, how deep the current module is: 1 for "LWP",
67             # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
68              
69             'title_prefix', 'title_postfix',
70             # What to put before and after the title in the head.
71             # Should already be &-escaped
72              
73             'html_h_level',
74              
75             'html_header_before_title',
76             'html_header_after_title',
77             'html_footer',
78             'top_anchor',
79              
80             'index', # whether to add an index at the top of each page
81             # (actually it's a table-of-contents, but we'll call it an index,
82             # out of apparently longstanding habit)
83              
84             'html_css', # URL of CSS file to point to
85             'html_javascript', # URL of Javascript file to point to
86              
87             'force_title', # should already be &-escaped
88             'default_title', # should already be &-escaped
89             );
90              
91             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
92             my @_to_accept;
93              
94             our %Tagmap = (
95             'Verbatim' => "\n",
96             '/Verbatim' => "\n",
97             'VerbatimFormatted' => "\n",
98             '/VerbatimFormatted' => "\n",
99             'VerbatimB' => "",
100             '/VerbatimB' => "",
101             'VerbatimI' => "",
102             '/VerbatimI' => "",
103             'VerbatimBI' => "",
104             '/VerbatimBI' => "",
105              
106              
107             'Data' => "\n",
108             '/Data' => "\n",
109              
110             'head1' => "\n

", # And also stick in an

111             'head2' => "\n

", # ''

112             'head3' => "\n

", # ''

113             'head4' => "\n

", # ''

114             'head5' => "\n
", # ''
115             'head6' => "\n
", # ''
116             '/head1' => "\n",
117             '/head2' => "\n",
118             '/head3' => "\n",
119             '/head4' => "\n",
120             '/head5' => "\n",
121             '/head6' => "\n",
122              
123             'X' => "",
125              
126             changes(qw(
127             Para=p
128             B=b I=i U=u
129             over-bullet=ul
130             over-number=ol
131             over-text=dl
132             over-block=blockquote
133             item-bullet=li
134             item-number=li
135             item-text=dt
136             )),
137             changes2(
138             map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
139             qw[
140             sample=samp
141             definition=dfn
142             keyboard=kbd
143             variable=var
144             citation=cite
145             abbreviation=abbr
146             acronym=acronym
147             subscript=sub
148             superscript=sup
149             big=big
150             small=small
151             underline=u
152             strikethrough=s
153             preformat=pre
154             teletype=tt
155             ] # no point in providing a way to get ..., I think
156             ),
157              
158             '/item-bullet' => "$LamePad\n",
159             '/item-number' => "$LamePad\n",
160             '/item-text' => "$LamePad\n",
161             'item-body' => "\n
",
162             '/item-body' => "\n",
163              
164              
165             'B' => "", '/B' => "",
166             'I' => "", '/I' => "",
167             'U' => "", '/U' => "",
168             'F' => "", '/F' => "",
169             'C' => "", '/C' => "",
170             'L' => "", # ideally never used!
171             '/L' => "",
172             );
173              
174             sub changes {
175 7 50   7 0 13 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
  77         428  
176             ? ( $1, => "\n<$2>", "/$1", => "\n" ) : die "Funky $_"
177             } @_;
178             }
179             sub changes2 {
180 7 50   7 0 12 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
  105         781  
181             ? ( $1, => "<$2>", "/$1", => "" ) : die "Funky $_"
182             } @_;
183             }
184              
185             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
186 0     0 0 0 sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
  0         0  
187             # Just so we can run from the command line. No options.
188             # For that, use perldoc!
189             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
190              
191             sub new {
192 60     60 1 520 my $new = shift->SUPER::new(@_);
193             #$new->nix_X_codes(1);
194 60         153 $new->nbsp_for_S(1);
195 60         140 $new->accept_targets( 'html', 'HTML' );
196 60         138 $new->accept_codes('VerbatimFormatted');
197 60         149 $new->accept_codes(@_to_accept);
198 60         72 DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
199              
200 60         225 $new->perldoc_url_prefix( $Perldoc_URL_Prefix );
201 60         124 $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
202 60         110 $new->man_url_prefix( $Man_URL_Prefix );
203 60         127 $new->man_url_postfix( $Man_URL_Postfix );
204 60         146 $new->title_prefix( $Title_Prefix );
205 60         123 $new->title_postfix( $Title_Postfix );
206              
207 60         167 $new->html_header_before_title(
208             qq[$Doctype_decl] </td> </tr> <tr> <td class="h" > <a name="209">209</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="210">210</a> </td> <td class="c3" > 60 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 162 </td> <td class="s"> $new->html_header_after_title( join "\n" => </td> </tr> <tr> <td class="h" > <a name="211">211</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> "",
212             $Content_decl,
213             "\n",
214             $new->version_tag_comment,
215             "\n",
216             );
217 60         155 $new->html_footer( qq[\n\n\n\n] );
218 60         116 $new->top_anchor( "\n" );
219              
220 60         2210 $new->{'Tagmap'} = {%Tagmap};
221              
222 60         300 return $new;
223             }
224              
225             sub __adjust_html_h_levels {
226 59     59   87 my ($self) = @_;
227 59         84 my $Tagmap = $self->{'Tagmap'};
228              
229 59         96 my $add = $self->html_h_level;
230 59 100       128 return unless defined $add;
231 1 50 50     5 return if ($self->{'Adjusted_html_h_levels'}||0) == $add;
232              
233 1         1 $add -= 1;
234 1         3 for (1 .. 6) {
235 6         41 $Tagmap->{"head$_"} =~ s/$_/$_ + $add/e;
  6         9  
236 6         35 $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
  6         10  
237             }
238             }
239              
240             sub batch_mode_page_object_init {
241 10     10 0 20 my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
242 10         9 DEBUG and print STDERR "Initting $self\n for $module\n",
243             " in $infile\n out $outfile\n depth $depth\n";
244 10         19 $self->batch_mode(1);
245 10         18 $self->batch_mode_current_level($depth);
246 10         15 return $self;
247             }
248              
249             sub run {
250 60     60 0 68 my $self = $_[0];
251 60 100       135 return $self->do_middle if $self->bare_output;
252             return
253 18   100     35 $self->do_beginning && $self->do_middle && $self->do_end;
254             }
255              
256             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
257              
258             sub do_beginning {
259 18     18 0 21 my $self = $_[0];
260              
261 18         20 my $title;
262              
263 18 50       35 if(defined $self->force_title) {
264 0         0 $title = $self->force_title;
265 0         0 DEBUG and print STDERR "Forcing title to be $title\n";
266             } else {
267             # Actually try looking for the title in the document:
268 18         60 $title = $self->get_short_title();
269 18 100       55 unless($self->content_seen) {
270 1         2 DEBUG and print STDERR "No content seen in search for title.\n";
271 1         5 return;
272             }
273 17         37 $self->{'Title'} = $title;
274              
275 17 100 66     57 if(defined $title and $title =~ m/\S/) {
276 14         35 $title = $self->title_prefix . esc($title) . $self->title_postfix;
277             } else {
278 3         9 $title = $self->default_title;
279 3 50       8 $title = '' unless defined $title;
280 3         5 DEBUG and print STDERR "Title defaults to $title\n";
281             }
282             }
283              
284              
285 17   50     44 my $after = $self->html_header_after_title || '';
286 17 100       50 if($self->html_css) {
287 10 50       12 my $link =
288             $self->html_css =~ m/
289             ? $self->html_css # It's a big blob of markup, let's drop it in
290             : sprintf( # It's just a URL, so let's wrap it up
291             qq[\n],
292             $self->html_css,
293             );
294 10         90 $after =~ s{()}{$link\n$1}i; # otherwise nevermind
295             }
296 17         52 $self->_add_top_anchor(\$after);
297              
298 17 100       34 if($self->html_javascript) {
299 10 50       15 my $link =
300             $self->html_javascript =~ m/
301             ? $self->html_javascript # It's a big blob of markup, let's drop it in
302             : sprintf( # It's just a URL, so let's wrap it up
303             qq[\n],
304             $self->html_javascript,
305             );
306 10         71 $after =~ s{()}{$link\n$1}i; # otherwise nevermind
307             }
308              
309 17   50     21 print {$self->{'output_fh'}}
  17         38  
310             $self->html_header_before_title || '',
311             $title, # already escaped
312             $after,
313             ;
314              
315 17         25 DEBUG and print STDERR "Returning from do_beginning...\n";
316 17         55 return 1;
317             }
318              
319             sub _add_top_anchor {
320 17     17   26 my($self, $text_r) = @_;
321 17 100 66     100 unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
322 7   50     14 $$text_r .= $self->top_anchor || '';
323             }
324 17         24 return;
325             }
326              
327             sub version_tag_comment {
328 60     60 0 79 my $self = shift;
329             return sprintf
330             "\n",
331             esc(
332             ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
333 60   33     1227 $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} || time)),
334             ), $self->_modnote(),
335             ;
336             }
337              
338             sub _modnote {
339 60   33 60   128 my $class = ref($_[0]) || $_[0];
340 60         1035 return join "\n " => grep m/\S/, split "\n",
341              
342             qq{
343             If you want to change this HTML document, you probably shouldn't do that
344             by changing it directly. Instead, see about changing the calling options
345             to $class, and/or subclassing $class,
346             then reconverting this document from the Pod source.
347             When in doubt, email the author of $class for advice.
348             See 'perldoc $class' for more info.
349             };
350              
351             }
352              
353             sub do_end {
354 17     17 0 20 my $self = $_[0];
355 17   50     19 print {$self->{'output_fh'}} $self->html_footer || '';
  17         41  
356 17         197 return 1;
357             }
358              
359             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
360             # Normally this would just be a call to _do_middle_main_loop -- but we
361             # have to do some elaborate things to emit all the content and then
362             # summarize it and output it /before/ the content that it's a summary of.
363              
364             sub do_middle {
365 59     59 0 73 my $self = $_[0];
366 59 100       126 return $self->_do_middle_main_loop unless $self->index;
367              
368 10 50       27 if( $self->output_string ) {
369             # An efficiency hack
370 0         0 my $out = $self->output_string; #it's a reference to it
371 0         0 my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
372 0         0 $$out .= $sneakytag;
373 0         0 $self->_do_middle_main_loop;
374 0         0 $sneakytag = quotemeta($sneakytag);
375 0         0 my $index = $self->index_as_html();
376 0 0       0 if( $$out =~ s/$sneakytag/$index/s ) {
377             # Expected case
378 0         0 DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
379             } else {
380 0         0 DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
381             # I don't think this should ever happen.
382             }
383 0         0 return 1;
384             }
385              
386 10 50       20 unless( $self->output_fh ) {
387 0         0 require Carp;
388 0         0 Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
389             }
390              
391             # If we get here, we're outputting to a FH. So we need to do some magic.
392             # Namely, divert all content to a string, which we output after the index.
393 10         14 my $fh = $self->output_fh;
394 10         15 my $content = '';
395             {
396             # Our horrible bait and switch:
397 10         10 $self->output_string( \$content );
  10         25  
398 10         23 $self->_do_middle_main_loop;
399 10         24 $self->abandon_output_string();
400 10         98 $self->output_fh($fh);
401             }
402 10         15 print $fh $self->index_as_html();
403 10         14 print $fh $content;
404              
405 10         35 return 1;
406             }
407              
408             ###########################################################################
409              
410             sub index_as_html {
411 10     10 0 11 my $self = $_[0];
412             # This is meant to be called AFTER the input document has been parsed!
413              
414 10   50     32 my $points = $self->{'PSHTML_index_points'} || [];
415              
416 10 100       19 @$points > 1 or return qq[
\n];
417             # There's no point in having a 0-item or 1-item index, I dare say.
418              
419 8         18 my(@out) = qq{\n
};
420 8         8 my $level = 0;
421              
422 8         11 my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
423 8         20 foreach my $p (@$points, ['head0', '(end)']) {
424 26         38 ($tagname, $text) = @$p;
425 26         35 $anchorname = $self->section_escape($text);
426 26 50       70 if( $tagname =~ m{^head(\d+)$} ) {
427 26         38 $target_level = 0 + $1;
428             } else { # must be some kinda list item
429 0 0       0 if($previous_tagname =~ m{^head\d+$} ) {
430 0         0 $target_level = $level + 1;
431             } else {
432 0         0 $target_level = $level; # no change needed
433             }
434             }
435              
436             # Get to target_level by opening or closing ULs
437 26         36 while($level > $target_level)
438 9         11 { --$level; push @out, (" " x $level) . ""; }
  9         19  
439 26         33 while($level < $target_level)
440 9         9 { ++$level; push @out, (" " x ($level-1))
  9         29  
441             . "
    "; }
442              
443 26         26 $previous_tagname = $tagname;
444 26 100       38 next unless $level;
445              
446 18         26 $indent = ' ' x $level;
447 18         25 push @out, sprintf
448             "%s
  • %s",
  • 449             $indent, $level, esc($anchorname), esc($text)
    450             ;
    451             }
    452 8         11 push @out, "\n";
    453 8         44 return join "\n", @out;
    454             }
    455              
    456             ###########################################################################
    457              
    458             sub _do_middle_main_loop {
    459 59     59   62 my $self = $_[0];
    460 59         73 my $fh = $self->{'output_fh'};
    461 59         72 my $tagmap = $self->{'Tagmap'};
    462              
    463 59         112 $self->__adjust_html_h_levels;
    464              
    465 59         101 my($token, $type, $tagname, $linkto, $linktype);
    466 59         0 my @stack;
    467 59         63 my $dont_wrap = 0;
    468              
    469 59         150 while($token = $self->get_token) {
    470              
    471             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    472 522 100       864 if( ($type = $token->type) eq 'start' ) {
        100          
        50          
    473 203 100 100     376 if(($tagname = $token->tagname) eq 'L') {
        100          
        100          
    474 22   50     43 $linktype = $token->attr('type') || 'insane';
    475              
    476 22         43 $linkto = $self->do_link($token);
    477              
    478 22 50 33     51 if(defined $linkto and length $linkto) {
    479 22         43 esc($linkto);
    480             # (Yes, SGML-escaping applies on top of %-escaping!
    481             # But it's rarely noticeable in practice.)
    482 22         83 print $fh qq{};
    483             } else {
    484 0         0 print $fh ""; # Yes, an 'a' element with no attributes!
    485             }
    486              
    487             } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
    488 37   50     136 print $fh $tagmap->{$tagname} || next;
    489              
    490 37         39 my @to_unget;
    491 37         33 while(1) {
    492 85         113 push @to_unget, $self->get_token;
    493 85 100 100     151 last if $to_unget[-1]->is_end
    494             and $to_unget[-1]->tagname eq $tagname;
    495              
    496             # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
    497             }
    498              
    499 37         81 my $name = $self->linearize_tokens(@to_unget);
    500 37 50       80 $name = $self->do_section($name, $token) if defined $name;
    501              
    502 37         70 print $fh "
    503 37 100       94 if ($tagname =~ m/^head\d$/s) {
    504 33 100       63 print $fh "class='u'", $self->index
    505             ? " href='#___top' title='click to go to top of document'\n"
    506             : "\n";
    507             }
    508              
    509 37 50       47 if(defined $name) {
    510 37         77 my $esc = esc( $self->section_name_tidy( $name ) );
    511 37         94 print $fh qq[name="$esc"];
    512 37         36 DEBUG and print STDERR "Linearized ", scalar(@to_unget),
    513             " tokens as \"$name\".\n";
    514 31         82 push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
    515 37 100       95 if $ToIndex{ $tagname };
    516             # Obviously, this discards all formatting codes (saving
    517             # just their content), but ahwell.
    518              
    519             } else { # ludicrously long, so nevermind
    520 0         0 DEBUG and print STDERR "Linearized ", scalar(@to_unget),
    521             " tokens, but it was too long, so nevermind.\n";
    522             }
    523 37         77 print $fh "\n>";
    524 37         75 $self->unget_token(@to_unget);
    525              
    526             } elsif ($tagname eq 'Data') {
    527 4         7 my $next = $self->get_token;
    528 4 50       15 next unless defined $next;
    529 4 50       5 unless( $next->type eq 'text' ) {
    530 0         0 $self->unget_token($next);
    531 0         0 next;
    532             }
    533 4         5 DEBUG and print STDERR " raw text ", $next->text, "\n";
    534             # The parser sometimes preserves newlines and sometimes doesn't!
    535 4         6 (my $text = $next->text) =~ s/\n\z//;
    536 4         12 print $fh $text, "\n";
    537 4         11 next;
    538              
    539             } else {
    540 140 100 33     341 if( $tagname =~ m/^over-/s ) {
        50 33        
    541 3         5 push @stack, '';
    542             } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
    543 0         0 print $fh $stack[-1];
    544 0         0 $stack[-1] = '';
    545             }
    546 140   100     483 print $fh $tagmap->{$tagname} || next;
    547 80 100 66     357 ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
          100        
    548             or $tagname eq 'X';
    549             }
    550              
    551             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    552             } elsif( $type eq 'end' ) {
    553 203 100 66     319 if( ($tagname = $token->tagname) =~ m/^over-/s ) {
        100          
    554 3 50       8 if( my $end = pop @stack ) {
    555 3         5 print $fh $end;
    556             }
    557             } elsif( $tagname =~ m/^item-/s and @stack) {
    558 4         25 $stack[-1] = $tagmap->{"/$tagname"};
    559 4 50 33     13 if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
    560 4         25 $self->unget_token($next);
    561 4 100       7 if( $next->type eq 'start' ) {
    562 3         8 print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
    563 3         7 $stack[-1] = $tagmap->{"/item-body"};
    564             }
    565             }
    566 4         9 next;
    567             }
    568 199   100     490 print $fh $tagmap->{"/$tagname"} || next;
    569 139 100 66     409 --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
    570              
    571             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    572             } elsif( $type eq 'text' ) {
    573 116         170 esc($type = $token->text); # reuse $type, why not
    574 116 100       285 $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
    575 116         195 print $fh $type;
    576             }
    577              
    578             }
    579 59         183 return 1;
    580             }
    581              
    582             ###########################################################################
    583             #
    584              
    585             sub do_section {
    586 36     36 0 57 my($self, $name, $token) = @_;
    587 36         46 return $name;
    588             }
    589              
    590             sub do_link {
    591 22     22 0 34 my($self, $token) = @_;
    592 22         27 my $type = $token->attr('type');
    593 22 50       52 if(!defined $type) {
        100          
        100          
        50          
    594 0         0 $self->whine("Typeless L!?", $token->attr('start_line'));
    595 10         20 } elsif( $type eq 'pod') { return $self->do_pod_link($token);
    596 9         13 } elsif( $type eq 'url') { return $self->do_url_link($token);
    597 3         8 } elsif( $type eq 'man') { return $self->do_man_link($token);
    598             } else {
    599 0         0 $self->whine("L of unknown type $type!?", $token->attr('start_line'));
    600             }
    601 0         0 return 'FNORG'; # should never get called
    602             }
    603              
    604             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    605              
    606 9     9 0 17 sub do_url_link { return $_[1]->attr('to') }
    607              
    608             sub do_man_link {
    609 3     3 0 6 my ($self, $link) = @_;
    610 3         4 my $to = $link->attr('to');
    611 3         4 my $frag = $link->attr('section');
    612              
    613 3 50 33     10 return undef unless defined $to and length $to; # should never happen
    614              
    615 3 100 66     8 $frag = $self->section_escape($frag)
    616             if defined $frag and length($frag .= ''); # (stringify)
    617              
    618 3         4 DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
    619              
    620 3         7 return $self->resolve_man_page_link($to, $frag);
    621             }
    622              
    623              
    624             sub do_pod_link {
    625             # And now things get really messy...
    626 10     10 0 17 my($self, $link) = @_;
    627 10         22 my $to = $link->attr('to');
    628 10         15 my $section = $link->attr('section');
    629             return undef unless( # should never happen
    630 10 50 66     67 (defined $to and length $to) or
          33        
          66        
    631             (defined $section and length $section)
    632             );
    633              
    634 10 100 66     47 $section = $self->section_escape($section)
    635             if defined $section and length($section .= ''); # (stringify)
    636              
    637 10         14 DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
    638             $to || "(nil)", $section || "(nil)";
    639              
    640             {
    641             # An early hack:
    642 10         10 my $complete_url = $self->resolve_pod_link_by_table($to, $section);
      10         20  
    643 10 50       16 if( $complete_url ) {
    644 0         0 DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
    645             $complete_url, "\n (Returning that.)\n";
    646 0         0 return $complete_url;
    647             } else {
    648 10         13 DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
    649             " didn't return anything interesting.\n";
    650             }
    651             }
    652              
    653 10 100 66     21 if(defined $to and length $to) {
    654             # Give this routine first hack again
    655 7         30 my $there = $self->resolve_pod_link_by_table($to);
    656 7 50 33     15 if(defined $there and length $there) {
    657 0         0 DEBUG > 1
    658             and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
    659             } else {
    660 7         17 $there =
    661             $self->resolve_pod_page_link($to, $section);
    662             # (I pass it the section value, but I don't see a
    663             # particular reason it'd use it.)
    664 7         8 DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there || "(nil)", "\n";
    665 7 50 33     23 unless( defined $there and length $there ) {
    666 0         0 DEBUG and print STDERR "Can't resolve $to\n";
    667 0         0 return undef;
    668             }
    669             # resolve_pod_page_link returning undef is how it
    670             # can signal that it gives up on making a link
    671             }
    672 7         12 $to = $there;
    673             }
    674              
    675             #DEBUG and print STDERR "So far [", $to||'nil', "] [", $section||'nil', "]\n";
    676              
    677 10 100 66     25 my $out = (defined $to and length $to) ? $to : '';
    678 10 100 66     27 $out .= "#" . $section if defined $section and length $section;
    679              
    680 10 50       15 unless(length $out) { # sanity check
    681 0         0 DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
    682             $to || "(nil)", $section || "(nil)";
    683 0         0 return undef;
    684             }
    685              
    686 10         9 DEBUG and print STDERR "Resolved to $out\n";
    687 10         20 return $out;
    688             }
    689              
    690              
    691             # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
    692              
    693             sub section_escape {
    694 32     32 0 40 my($self, $section) = @_;
    695 32         57 return $self->section_url_escape(
    696             $self->section_name_tidy($section)
    697             );
    698             }
    699              
    700             sub section_name_tidy {
    701 69     69 0 83 my($self, $section) = @_;
    702 69         131 $section =~ s/^\s+//;
    703 69         115 $section =~ s/\s+$//;
    704 69         96 $section =~ tr/ /_/;
    705 69 50       221 if ("$]" >= 5.006) {
    706 69         94 $section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
    707             } elsif ('A' eq chr(65)) { # But not on early EBCDIC
    708 0         0 $section =~ tr/\x00-\x1F\x80-\x9F//d;
    709             }
    710 69         112 $section = $self->unicode_escape_url($section);
    711 69 50       111 $section = '_' unless length $section;
    712 69         132 return $section;
    713             }
    714              
    715 32     32 0 57 sub section_url_escape { shift->general_url_escape(@_) }
    716 7     7 0 12 sub pagepath_url_escape { shift->general_url_escape(@_) }
    717 3     3 0 6 sub manpage_url_escape { shift->general_url_escape(@_) }
    718              
    719             sub general_url_escape {
    720 42     42 0 49 my($self, $string) = @_;
    721              
    722 42         51 $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
      0         0  
    723             # express Unicode things as urlencode(utf(orig)).
    724              
    725             # A pretty conservative escaping, behoovey even for query components
    726             # of a URL (see RFC 2396)
    727              
    728 42 50       91 if ("$]" >= 5.007_003) {
    729 42         60 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
      8         31  
    730             } else { # Is broken for non-ASCII platforms on early perls
    731 0         0 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
      0         0  
    732             }
    733             # Yes, stipulate the list without a range, so that this can work right on
    734             # all charsets that this module happens to run under.
    735              
    736 42         72 return $string;
    737             }
    738              
    739             #--------------------------------------------------------------------------
    740             #
    741             # Oh look, a yawning portal to Hell! Let's play touch football right by it!
    742             #
    743              
    744             sub resolve_pod_page_link {
    745             # resolve_pod_page_link must return a properly escaped URL
    746 7     7 0 9 my $self = shift;
    747 7 50       22 return $self->batch_mode()
    748             ? $self->resolve_pod_page_link_batch_mode(@_)
    749             : $self->resolve_pod_page_link_singleton_mode(@_)
    750             ;
    751             }
    752              
    753             sub resolve_pod_page_link_singleton_mode {
    754 7     7 0 10 my($self, $it) = @_;
    755 7 50 33     16 return undef unless defined $it and length $it;
    756 7         15 my $url = $self->pagepath_url_escape($it);
    757              
    758 7         10 $url =~ s{::$}{}s; # probably never comes up anyway
    759 7 50       14 $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
    760              
    761 7 50       14 return undef unless length $url;
    762 7         17 return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
    763             }
    764              
    765             sub resolve_pod_page_link_batch_mode {
    766 0     0 0 0 my($self, $to) = @_;
    767 0         0 DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
    768 0         0 my @path = grep length($_), split m/::/s, $to, -1;
    769 0 0       0 unless( @path ) { # sanity
    770 0         0 DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n";
    771 0         0 return undef;
    772             }
    773 0         0 $self->batch_mode_rectify_path(\@path);
    774 0         0 my $out = join('/', map $self->pagepath_url_escape($_), @path)
    775             . $HTML_EXTENSION;
    776 0         0 DEBUG > 1 and print STDERR " => $out\n";
    777 0         0 return $out;
    778             }
    779              
    780             sub batch_mode_rectify_path {
    781 0     0 0 0 my($self, $pathbits) = @_;
    782 0         0 my $level = $self->batch_mode_current_level;
    783 0         0 $level--; # how many levels up to go to get to the root
    784 0 0       0 if($level < 1) {
    785 0         0 unshift @$pathbits, '.'; # just to be pretty
    786             } else {
    787 0         0 unshift @$pathbits, ('..') x $level;
    788             }
    789 0         0 return;
    790             }
    791              
    792             sub resolve_man_page_link {
    793 3     3 0 7 my ($self, $to, $frag) = @_;
    794 3         4 my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
    795              
    796 3 50 33     11 return undef unless defined $page and length $page;
    797 3   50     6 $section ||= 1;
    798              
    799 3 100       7 return $self->man_url_prefix . "$section/"
    800             . $self->manpage_url_escape($page) . ".$section"
    801             . $self->man_url_postfix
    802             . (defined $frag ? '#' . $frag : '');
    803             }
    804              
    805             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    806              
    807             sub resolve_pod_link_by_table {
    808             # A crazy hack to allow specifying custom L => URL mappings
    809              
    810 17 50   17 0 33 return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
    811              
    812 0         0 my($self, $to, $section) = @_;
    813              
    814             # TODO: add a method that actually populates podhtml_LOT from a file?
    815              
    816 0 0       0 if(defined $section) {
    817 0 0 0     0 $to = '' unless defined $to and length $to;
    818 0         0 return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
    819             } else {
    820 0         0 return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
    821             }
    822 0         0 return;
    823             }
    824              
    825             ###########################################################################
    826              
    827             sub linearize_tokens { # self, tokens
    828 37     37 0 37 my $self = shift;
    829 37         43 my $out = '';
    830              
    831 37         33 my $t;
    832 37         79 while($t = shift @_) {
    833 79 50 33     281 if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
        100 66        
        100          
    834 0         0 $out .= $t; # a string, or some insane thing
    835             } elsif($t->is_text) {
    836 39         61 $out .= $t->text;
    837             } elsif($t->is_start and $t->tag eq 'X') {
    838             # Ignore until the end of this X<...> sequence:
    839 3         4 my $x_open = 1;
    840 3         4 while($x_open) {
    841 6 100       11 next if( ($t = shift @_)->is_text );
    842 3 50 33     4 if( $t->is_start and $t->tag eq 'X') { ++$x_open }
      0 50 33     0  
    843 3         7 elsif($t->is_end and $t->tag eq 'X') { --$x_open }
    844             }
    845             }
    846             }
    847 37 50       89 return undef if length $out > $Linearization_Limit;
    848 37         59 return $out;
    849             }
    850              
    851             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    852              
    853             sub unicode_escape_url {
    854 69     69 0 90 my($self, $string) = @_;
    855 69         83 $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
      0         0  
    856             # Turn char 1234 into "(1234)"
    857 69         95 return $string;
    858             }
    859              
    860             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    861             sub esc { # a function.
    862 464 100   464 0 686 if(defined wantarray) {
    863 326 100       345 if(wantarray) {
    864 275         547 @_ = splice @_; # break aliasing
    865             } else {
    866 51         71 my $x = shift;
    867 51 50       142 if ("$]" >= 5.007_003) {
    868 51         73 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
      0         0  
    869             } else { # Is broken for non-ASCII platforms on early perls
    870 0         0 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
      0         0  
    871             }
    872 51         114 return $x;
    873             }
    874             }
    875 413         506 foreach my $x (@_) {
    876             # Escape things very cautiously:
    877 1101 50       1254 if (defined $x) {
    878 1101 50       1815 if ("$]" >= 5.007_003) {
    879 1101         1505 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
      52         133  
    880             } else { # Is broken for non-ASCII platforms on early perls
    881 0         0 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
      0         0  
    882             }
    883             }
    884             # Leave out "- so that "--" won't make it thru in X-generated comments
    885             # with text in them.
    886              
    887             # Yes, stipulate the list without a range, so that this can work right on
    888             # all charsets that this module happens to run under.
    889             }
    890 413         894 return @_;
    891             }
    892              
    893             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    894              
    895             1;
    896             __END__