File Coverage

blib/lib/Pod/Simple/HTML.pm
Criterion Covered Total %
statement 288 359 80.2
branch 121 178 67.9
condition 66 121 54.5
subroutine 36 39 92.3
pod 1 31 3.2
total 512 728 70.3


line stmt bran cond sub pod time code
1             package Pod::Simple::HTML;
2 7     7   306646 use strict;
  7         15  
  7         300  
3 7     7   47 use warnings;
  7         12  
  7         404  
4 7     7   4367 use Pod::Simple::PullParser ();
  7         22  
  7         650  
5             our @ISA = ('Pod::Simple::PullParser');
6             our $VERSION = '3.47';
7             BEGIN {
8 7 50   7   39 if(defined &DEBUG) { } # no-op
    50          
9 7         46320 elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
10 0         0 else { *DEBUG = sub () {0}; }
11             }
12              
13             our $Doctype_decl ||= ''; # No. Just No. Don't even ask me for it.
14             # qq{
15             # "http://www.w3.org/TR/html4/loose.dtd">\n};
16              
17             our $Content_decl ||=
18             q{};
19              
20             our $HTML_EXTENSION;
21             $HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
22             our $Computerese;
23             $Computerese = "" unless defined $Computerese;
24             our $LamePad;
25             $LamePad = '' unless defined $LamePad;
26              
27             our $Linearization_Limit;
28             $Linearization_Limit = 120 unless defined $Linearization_Limit;
29             # headings/items longer than that won't get an
30             our $Perldoc_URL_Prefix;
31             $Perldoc_URL_Prefix = 'https://metacpan.org/pod/'
32             unless defined $Perldoc_URL_Prefix;
33             our $Perldoc_URL_Postfix;
34             $Perldoc_URL_Postfix = ''
35             unless defined $Perldoc_URL_Postfix;
36              
37              
38             our $Man_URL_Prefix = 'https://man7.org/linux/man-pages/man';
39             our $Man_URL_Postfix = '.html';
40              
41             our $Title_Prefix;
42             $Title_Prefix = '' unless defined $Title_Prefix;
43             our $Title_Postfix;
44             $Title_Postfix = '' unless defined $Title_Postfix;
45             our %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
46             # 'item-text' stuff in the index doesn't quite work, and may
47             # not be a good idea anyhow.
48              
49              
50             __PACKAGE__->_accessorize(
51             'perldoc_url_prefix',
52             # In turning L into http://whatever/Foo%3a%3aBar, what
53             # to put before the "Foo%3a%3aBar".
54             # (for singleton mode only?)
55             'perldoc_url_postfix',
56             # what to put after "Foo%3a%3aBar" in the URL. Normally "".
57              
58             'man_url_prefix',
59             # In turning L into http://whatever/man/1/crontab, what
60             # to put before the "1/crontab".
61             'man_url_postfix',
62             # what to put after the "1/crontab" in the URL. Normally ".html".
63              
64             'batch_mode', # whether we're in batch mode
65             'batch_mode_current_level',
66             # When in batch mode, how deep the current module is: 1 for "LWP",
67             # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
68              
69             'title_prefix', 'title_postfix',
70             # What to put before and after the title in the head.
71             # Should already be &-escaped
72              
73             'html_h_level',
74              
75             'html_header_before_title',
76             'html_header_after_title',
77             'html_footer',
78             'top_anchor',
79              
80             'index', # whether to add an index at the top of each page
81             # (actually it's a table-of-contents, but we'll call it an index,
82             # out of apparently longstanding habit)
83              
84             'html_css', # URL of CSS file to point to
85             'html_javascript', # URL of Javascript file to point to
86              
87             'force_title', # should already be &-escaped
88             'default_title', # should already be &-escaped
89             );
90              
91             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
92             my @_to_accept;
93              
94             our %Tagmap = (
95             'Verbatim' => "\n",
96             '/Verbatim' => "\n",
97             'VerbatimFormatted' => "\n",
98             '/VerbatimFormatted' => "\n",
99             'VerbatimB' => "",
100             '/VerbatimB' => "",
101             'VerbatimI' => "",
102             '/VerbatimI' => "",
103             'VerbatimBI' => "",
104             '/VerbatimBI' => "",
105              
106              
107             'Data' => "\n",
108             '/Data' => "\n",
109              
110             'head1' => "\n

", # And also stick in an

111             'head2' => "\n

", # ''

112             'head3' => "\n

", # ''

113             'head4' => "\n

", # ''

114             'head5' => "\n
", # ''
115             'head6' => "\n
", # ''
116             '/head1' => "\n",
117             '/head2' => "\n",
118             '/head3' => "\n",
119             '/head4' => "\n",
120             '/head5' => "\n",
121             '/head6' => "\n",
122              
123             'X' => "",
125              
126             changes(qw(
127             Para=p
128             B=b I=i U=u
129             over-bullet=ul
130             over-number=ol
131             over-text=dl
132             over-block=blockquote
133             item-bullet=li
134             item-number=li
135             item-text=dt
136             )),
137             changes2(
138             map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
139             qw[
140             sample=samp
141             definition=dfn
142             keyboard=kbd
143             variable=var
144             citation=cite
145             abbreviation=abbr
146             acronym=acronym
147             subscript=sub
148             superscript=sup
149             big=big
150             small=small
151             underline=u
152             strikethrough=s
153             preformat=pre
154             teletype=tt
155             ] # no point in providing a way to get ..., I think
156             ),
157              
158             '/item-bullet' => "$LamePad\n",
159             '/item-number' => "$LamePad\n",
160             '/item-text' => "$LamePad\n",
161             'item-body' => "\n
",
162             '/item-body' => "\n",
163              
164              
165             'B' => "", '/B' => "",
166             'I' => "", '/I' => "",
167             'U' => "", '/U' => "",
168             'F' => "", '/F' => "",
169             'C' => "", '/C' => "",
170             'L' => "", # ideally never used!
171             '/L' => "",
172             );
173              
174             sub changes {
175 7 50   7 0 20 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
  77         466  
176             ? ( $1, => "\n<$2>", "/$1", => "\n" ) : die "Funky $_"
177             } @_;
178             }
179             sub changes2 {
180 7 50   7 0 16 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
  105         1113  
181             ? ( $1, => "<$2>", "/$1", => "" ) : die "Funky $_"
182             } @_;
183             }
184              
185             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
186 0     0 0 0 sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
  0         0  
187             # Just so we can run from the command line. No options.
188             # For that, use perldoc!
189             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
190              
191             sub new {
192 60     60 1 881 my $new = shift->SUPER::new(@_);
193             #$new->nix_X_codes(1);
194 60         222 $new->nbsp_for_S(1);
195 60         191 $new->accept_targets( 'html', 'HTML' );
196 60         194 $new->accept_codes('VerbatimFormatted');
197 60         185 $new->accept_codes(@_to_accept);
198 60         74 DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
199              
200 60         235 $new->perldoc_url_prefix( $Perldoc_URL_Prefix );
201 60         160 $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
202 60         197 $new->man_url_prefix( $Man_URL_Prefix );
203 60         157 $new->man_url_postfix( $Man_URL_Postfix );
204 60         146 $new->title_prefix( $Title_Prefix );
205 60         163 $new->title_postfix( $Title_Postfix );
206              
207 60         223 $new->html_header_before_title(
208             qq[$Doctype_decl] </td> </tr> <tr> <td class="h" > <a name="209">209</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="210">210</a> </td> <td class="c3" > 60 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 171 </td> <td class="s"> $new->html_header_after_title( join "\n" => </td> </tr> <tr> <td class="h" > <a name="211">211</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> "",
212             $Content_decl,
213             "\n",
214             $new->version_tag_comment,
215             "\n",
216             );
217 60         181 $new->html_footer( qq[\n\n\n\n] );
218 60         141 $new->top_anchor( "\n" );
219              
220 60         2913 $new->{'Tagmap'} = {%Tagmap};
221              
222 60         441 return $new;
223             }
224              
225             sub __adjust_html_h_levels {
226 59     59   133 my ($self) = @_;
227 59         76 my $Tagmap = $self->{'Tagmap'};
228              
229 59         133 my $add = $self->html_h_level;
230 59 100       146 return unless defined $add;
231 1 50 50     7 return if ($self->{'Adjusted_html_h_levels'}||0) == $add;
232              
233 1         2 $add -= 1;
234 1         4 for (1 .. 6) {
235 6         49 $Tagmap->{"head$_"} =~ s/$_/$_ + $add/e;
  6         10  
236 6         40 $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
  6         11  
237             }
238             }
239              
240             sub batch_mode_page_object_init {
241 10     10 0 40 my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
242 10         13 DEBUG and print STDERR "Initting $self\n for $module\n",
243             " in $infile\n out $outfile\n depth $depth\n";
244 10         33 $self->batch_mode(1);
245 10         30 $self->batch_mode_current_level($depth);
246 10         27 return $self;
247             }
248              
249             sub run {
250 60     60 0 119 my $self = $_[0];
251 60 100       160 return $self->do_middle if $self->bare_output;
252             return
253 18   100     52 $self->do_beginning && $self->do_middle && $self->do_end;
254             }
255              
256             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
257              
258             sub do_beginning {
259 18     18 0 30 my $self = $_[0];
260              
261 18         27 my $title;
262              
263 18 50       62 if(defined $self->force_title) {
264 0         0 $title = $self->force_title;
265 0         0 DEBUG and print STDERR "Forcing title to be $title\n";
266             } else {
267             # Actually try looking for the title in the document:
268 18         86 $title = $self->get_short_title();
269 18 100       120 unless($self->content_seen) {
270 1         2 DEBUG and print STDERR "No content seen in search for title.\n";
271 1         8 return;
272             }
273 17         44 $self->{'Title'} = $title;
274              
275 17 100 66     97 if(defined $title and $title =~ m/\S/) {
276 14         44 $title = $self->title_prefix . esc($title) . $self->title_postfix;
277             } else {
278 3         10 $title = $self->default_title;
279 3 50       5 $title = '' unless defined $title;
280 3         3 DEBUG and print STDERR "Title defaults to $title\n";
281             }
282             }
283              
284              
285 17   50     51 my $after = $self->html_header_after_title || '';
286 17 100       55 if($self->html_css) {
287 10 50       27 my $link =
288             $self->html_css =~ m/
289             ? $self->html_css # It's a big blob of markup, let's drop it in
290             : sprintf( # It's just a URL, so let's wrap it up
291             qq[\n],
292             $self->html_css,
293             );
294 10         164 $after =~ s{()}{$link\n$1}i; # otherwise nevermind
295             }
296 17         71 $self->_add_top_anchor(\$after);
297              
298 17 100       50 if($self->html_javascript) {
299 10 50       33 my $link =
300             $self->html_javascript =~ m/
301             ? $self->html_javascript # It's a big blob of markup, let's drop it in
302             : sprintf( # It's just a URL, so let's wrap it up
303             qq[\n],
304             $self->html_javascript,
305             );
306 10         111 $after =~ s{()}{$link\n$1}i; # otherwise nevermind
307             }
308              
309 17   50     49 print {$self->{'output_fh'}}
  17         59  
310             $self->html_header_before_title || '',
311             $title, # already escaped
312             $after,
313             ;
314              
315 17         34 DEBUG and print STDERR "Returning from do_beginning...\n";
316 17         109 return 1;
317             }
318              
319             sub _add_top_anchor {
320 17     17   39 my($self, $text_r) = @_;
321 17 100 66     135 unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
322 7   50     18 $$text_r .= $self->top_anchor || '';
323             }
324 17         34 return;
325             }
326              
327             sub version_tag_comment {
328 60     60 0 104 my $self = shift;
329             return sprintf
330             "\n",
331             esc(
332             ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
333 60   33     1433 $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} || time)),
334             ), $self->_modnote(),
335             ;
336             }
337              
338             sub _modnote {
339 60   33 60   170 my $class = ref($_[0]) || $_[0];
340 60         1229 return join "\n " => grep m/\S/, split "\n",
341              
342             qq{
343             If you want to change this HTML document, you probably shouldn't do that
344             by changing it directly. Instead, see about changing the calling options
345             to $class, and/or subclassing $class,
346             then reconverting this document from the Pod source.
347             When in doubt, email the author of $class for advice.
348             See 'perldoc $class' for more info.
349             };
350              
351             }
352              
353             sub do_end {
354 17     17 0 29 my $self = $_[0];
355 17   50     24 print {$self->{'output_fh'}} $self->html_footer || '';
  17         74  
356 17         254 return 1;
357             }
358              
359             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
360             # Normally this would just be a call to _do_middle_main_loop -- but we
361             # have to do some elaborate things to emit all the content and then
362             # summarize it and output it /before/ the content that it's a summary of.
363              
364             sub do_middle {
365 59     59 0 78 my $self = $_[0];
366 59 100       128 return $self->_do_middle_main_loop unless $self->index;
367              
368 10 50       39 if( $self->output_string ) {
369             # An efficiency hack
370 0         0 my $out = $self->output_string; #it's a reference to it
371 0         0 my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
372 0         0 $$out .= $sneakytag;
373 0         0 $self->_do_middle_main_loop;
374 0         0 $sneakytag = quotemeta($sneakytag);
375 0         0 my $index = $self->index_as_html();
376 0 0       0 if( $$out =~ s/$sneakytag/$index/s ) {
377             # Expected case
378 0         0 DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
379             } else {
380 0         0 DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
381             # I don't think this should ever happen.
382             }
383 0         0 return 1;
384             }
385              
386 10 50       34 unless( $self->output_fh ) {
387 0         0 require Carp;
388 0         0 Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
389             }
390              
391             # If we get here, we're outputting to a FH. So we need to do some magic.
392             # Namely, divert all content to a string, which we output after the index.
393 10         25 my $fh = $self->output_fh;
394 10         22 my $content = '';
395             {
396             # Our horrible bait and switch:
397 10         18 $self->output_string( \$content );
  10         41  
398 10         70 $self->_do_middle_main_loop;
399 10         61 $self->abandon_output_string();
400 10         25 $self->output_fh($fh);
401             }
402 10         39 print $fh $self->index_as_html();
403 10         26 print $fh $content;
404              
405 10         62 return 1;
406             }
407              
408             ###########################################################################
409              
410             sub index_as_html {
411 10     10 0 18 my $self = $_[0];
412             # This is meant to be called AFTER the input document has been parsed!
413              
414 10   50     200 my $points = $self->{'PSHTML_index_points'} || [];
415              
416 10 100       57 @$points > 1 or return qq[
\n];
417             # There's no point in having a 0-item or 1-item index, I dare say.
418              
419 8         31 my(@out) = qq{\n
};
420 8         16 my $level = 0;
421              
422 8         49 my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
423 8         35 foreach my $p (@$points, ['head0', '(end)']) {
424 26         102 ($tagname, $text) = @$p;
425 26         71 $anchorname = $self->section_escape($text);
426 26 50       140 if( $tagname =~ m{^head(\d+)$} ) {
427 26         83 $target_level = 0 + $1;
428             } else { # must be some kinda list item
429 0 0       0 if($previous_tagname =~ m{^head\d+$} ) {
430 0         0 $target_level = $level + 1;
431             } else {
432 0         0 $target_level = $level; # no change needed
433             }
434             }
435              
436             # Get to target_level by opening or closing ULs
437 26         63 while($level > $target_level)
438 9         19 { --$level; push @out, (" " x $level) . ""; }
  9         36  
439 26         65 while($level < $target_level)
440 9         13 { ++$level; push @out, (" " x ($level-1))
  9         53  
441             . "
    "; }
442              
443 26         45 $previous_tagname = $tagname;
444 26 100       58 next unless $level;
445              
446 18         38 $indent = ' ' x $level;
447 18         118 push @out, sprintf
448             "%s
  • %s",
  • 449             $indent, $level, esc($anchorname), esc($text)
    450             ;
    451             }
    452 8         23 push @out, "\n";
    453 8         57 return join "\n", @out;
    454             }
    455              
    456             ###########################################################################
    457              
    458             sub _do_middle_main_loop {
    459 59     59   134 my $self = $_[0];
    460 59         82 my $fh = $self->{'output_fh'};
    461 59         81 my $tagmap = $self->{'Tagmap'};
    462              
    463 59         133 $self->__adjust_html_h_levels;
    464              
    465 59         125 my($token, $type, $tagname, $linkto, $linktype);
    466 59         0 my @stack;
    467 59         74 my $dont_wrap = 0;
    468              
    469 59         169 while($token = $self->get_token) {
    470              
    471             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    472 522 100       1072 if( ($type = $token->type) eq 'start' ) {
        100          
        50          
    473 203 100 100     488 if(($tagname = $token->tagname) eq 'L') {
        100          
        100          
    474 22   50     48 $linktype = $token->attr('type') || 'insane';
    475              
    476 22         45 $linkto = $self->do_link($token);
    477              
    478 22 50 33     55 if(defined $linkto and length $linkto) {
    479 22         44 esc($linkto);
    480             # (Yes, SGML-escaping applies on top of %-escaping!
    481             # But it's rarely noticeable in practice.)
    482 22         94 print $fh qq{};
    483             } else {
    484 0         0 print $fh ""; # Yes, an 'a' element with no attributes!
    485             }
    486              
    487             } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
    488 37   50     229 print $fh $tagmap->{$tagname} || next;
    489              
    490 37         61 my @to_unget;
    491 37         49 while(1) {
    492 85         177 push @to_unget, $self->get_token;
    493 85 100 100     256 last if $to_unget[-1]->is_end
    494             and $to_unget[-1]->tagname eq $tagname;
    495              
    496             # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
    497             }
    498              
    499 37         141 my $name = $self->linearize_tokens(@to_unget);
    500 37 50       134 $name = $self->do_section($name, $token) if defined $name;
    501              
    502 37         123 print $fh "
    503 37 100       177 if ($tagname =~ m/^head\d$/s) {
    504 33 100       117 print $fh "class='u'", $self->index
    505             ? " href='#___top' title='click to go to top of document'\n"
    506             : "\n";
    507             }
    508              
    509 37 50       82 if(defined $name) {
    510 37         115 my $esc = esc( $self->section_name_tidy( $name ) );
    511 37         159 print $fh qq[name="$esc"];
    512 37         63 DEBUG and print STDERR "Linearized ", scalar(@to_unget),
    513             " tokens as \"$name\".\n";
    514 31         142 push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
    515 37 100       133 if $ToIndex{ $tagname };
    516             # Obviously, this discards all formatting codes (saving
    517             # just their content), but ahwell.
    518              
    519             } else { # ludicrously long, so nevermind
    520 0         0 DEBUG and print STDERR "Linearized ", scalar(@to_unget),
    521             " tokens, but it was too long, so nevermind.\n";
    522             }
    523 37         94 print $fh "\n>";
    524 37         164 $self->unget_token(@to_unget);
    525              
    526             } elsif ($tagname eq 'Data') {
    527 4         9 my $next = $self->get_token;
    528 4 50       10 next unless defined $next;
    529 4 50       17 unless( $next->type eq 'text' ) {
    530 0         0 $self->unget_token($next);
    531 0         0 next;
    532             }
    533 4         5 DEBUG and print STDERR " raw text ", $next->text, "\n";
    534             # The parser sometimes preserves newlines and sometimes doesn't!
    535 4         7 (my $text = $next->text) =~ s/\n\z//;
    536 4         12 print $fh $text, "\n";
    537 4         9 next;
    538              
    539             } else {
    540 140 100 33     459 if( $tagname =~ m/^over-/s ) {
        50 33        
    541 3         9 push @stack, '';
    542             } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
    543 0         0 print $fh $stack[-1];
    544 0         0 $stack[-1] = '';
    545             }
    546 140   100     556 print $fh $tagmap->{$tagname} || next;
    547 80 100 66     524 ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
          100        
    548             or $tagname eq 'X';
    549             }
    550              
    551             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    552             } elsif( $type eq 'end' ) {
    553 203 100 66     420 if( ($tagname = $token->tagname) =~ m/^over-/s ) {
        100          
    554 3 50       11 if( my $end = pop @stack ) {
    555 3         9 print $fh $end;
    556             }
    557             } elsif( $tagname =~ m/^item-/s and @stack) {
    558 4         13 $stack[-1] = $tagmap->{"/$tagname"};
    559 4 50 33     19 if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
    560 4         14 $self->unget_token($next);
    561 4 100       10 if( $next->type eq 'start' ) {
    562 3         12 print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
    563 3         32 $stack[-1] = $tagmap->{"/item-body"};
    564             }
    565             }
    566 4         15 next;
    567             }
    568 199   100     696 print $fh $tagmap->{"/$tagname"} || next;
    569 139 100 66     545 --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
    570              
    571             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    572             } elsif( $type eq 'text' ) {
    573 116         274 esc($type = $token->text); # reuse $type, why not
    574 116 100       480 $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
    575 116         339 print $fh $type;
    576             }
    577              
    578             }
    579 59         197 return 1;
    580             }
    581              
    582             ###########################################################################
    583             #
    584              
    585             sub do_section {
    586 36     36 0 88 my($self, $name, $token) = @_;
    587 36         82 return $name;
    588             }
    589              
    590             sub do_link {
    591 22     22 0 33 my($self, $token) = @_;
    592 22         29 my $type = $token->attr('type');
    593 22 50       55 if(!defined $type) {
        100          
        100          
        50          
    594 0         0 $self->whine("Typeless L!?", $token->attr('start_line'));
    595 10         22 } elsif( $type eq 'pod') { return $self->do_pod_link($token);
    596 9         13 } elsif( $type eq 'url') { return $self->do_url_link($token);
    597 3         9 } elsif( $type eq 'man') { return $self->do_man_link($token);
    598             } else {
    599 0         0 $self->whine("L of unknown type $type!?", $token->attr('start_line'));
    600             }
    601 0         0 return 'FNORG'; # should never get called
    602             }
    603              
    604             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    605              
    606 9     9 0 21 sub do_url_link { return $_[1]->attr('to') }
    607              
    608             sub do_man_link {
    609 3     3 0 6 my ($self, $link) = @_;
    610 3         4 my $to = $link->attr('to');
    611 3         6 my $frag = $link->attr('section');
    612              
    613 3 50 33     11 return undef unless defined $to and length $to; # should never happen
    614              
    615 3 100 66     9 $frag = $self->section_escape($frag)
    616             if defined $frag and length($frag .= ''); # (stringify)
    617              
    618 3         4 DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
    619              
    620 3         7 return $self->resolve_man_page_link($to, $frag);
    621             }
    622              
    623              
    624             sub do_pod_link {
    625             # And now things get really messy...
    626 10     10 0 13 my($self, $link) = @_;
    627 10         15 my $to = $link->attr('to');
    628 10         33 my $section = $link->attr('section');
    629             return undef unless( # should never happen
    630 10 50 66     40 (defined $to and length $to) or
          33        
          66        
    631             (defined $section and length $section)
    632             );
    633              
    634 10 100 66     53 $section = $self->section_escape($section)
    635             if defined $section and length($section .= ''); # (stringify)
    636              
    637 10         12 DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
    638             $to || "(nil)", $section || "(nil)";
    639              
    640             {
    641             # An early hack:
    642 10         10 my $complete_url = $self->resolve_pod_link_by_table($to, $section);
      10         23  
    643 10 50       14 if( $complete_url ) {
    644 0         0 DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
    645             $complete_url, "\n (Returning that.)\n";
    646 0         0 return $complete_url;
    647             } else {
    648 10         12 DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
    649             " didn't return anything interesting.\n";
    650             }
    651             }
    652              
    653 10 100 66     22 if(defined $to and length $to) {
    654             # Give this routine first hack again
    655 7         11 my $there = $self->resolve_pod_link_by_table($to);
    656 7 50 33     16 if(defined $there and length $there) {
    657 0         0 DEBUG > 1
    658             and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
    659             } else {
    660 7         18 $there =
    661             $self->resolve_pod_page_link($to, $section);
    662             # (I pass it the section value, but I don't see a
    663             # particular reason it'd use it.)
    664 7         8 DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there || "(nil)", "\n";
    665 7 50 33     20 unless( defined $there and length $there ) {
    666 0         0 DEBUG and print STDERR "Can't resolve $to\n";
    667 0         0 return undef;
    668             }
    669             # resolve_pod_page_link returning undef is how it
    670             # can signal that it gives up on making a link
    671             }
    672 7         9 $to = $there;
    673             }
    674              
    675             #DEBUG and print STDERR "So far [", $to||'nil', "] [", $section||'nil', "]\n";
    676              
    677 10 100 66     28 my $out = (defined $to and length $to) ? $to : '';
    678 10 100 66     30 $out .= "#" . $section if defined $section and length $section;
    679              
    680 10 50       15 unless(length $out) { # sanity check
    681 0         0 DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
    682             $to || "(nil)", $section || "(nil)";
    683 0         0 return undef;
    684             }
    685              
    686 10         11 DEBUG and print STDERR "Resolved to $out\n";
    687 10         20 return $out;
    688             }
    689              
    690              
    691             # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
    692              
    693             sub section_escape {
    694 32     32 0 65 my($self, $section) = @_;
    695 32         73 return $self->section_url_escape(
    696             $self->section_name_tidy($section)
    697             );
    698             }
    699              
    700             sub section_name_tidy {
    701 69     69 0 143 my($self, $section) = @_;
    702 69         223 $section =~ s/^\s+//;
    703 69         190 $section =~ s/\s+$//;
    704 69         142 $section =~ tr/ /_/;
    705 69 50       366 if ("$]" >= 5.006) {
    706 69         164 $section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
    707             } elsif ('A' eq chr(65)) { # But not on early EBCDIC
    708 0         0 $section =~ tr/\x00-\x1F\x80-\x9F//d;
    709             }
    710 69         181 $section = $self->unicode_escape_url($section);
    711 69 50       217 $section = '_' unless length $section;
    712 69         217 return $section;
    713             }
    714              
    715 32     32 0 86 sub section_url_escape { shift->general_url_escape(@_) }
    716 7     7 0 22 sub pagepath_url_escape { shift->general_url_escape(@_) }
    717 3     3 0 6 sub manpage_url_escape { shift->general_url_escape(@_) }
    718              
    719             sub general_url_escape {
    720 42     42 0 77 my($self, $string) = @_;
    721              
    722 42         75 $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
      0         0  
    723             # express Unicode things as urlencode(utf(orig)).
    724              
    725             # A pretty conservative escaping, behoovey even for query components
    726             # of a URL (see RFC 2396)
    727              
    728 42 50       139 if ("$]" >= 5.007_003) {
    729 42         80 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
      8         30  
    730             } else { # Is broken for non-ASCII platforms on early perls
    731 0         0 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
      0         0  
    732             }
    733             # Yes, stipulate the list without a range, so that this can work right on
    734             # all charsets that this module happens to run under.
    735              
    736 42         102 return $string;
    737             }
    738              
    739             #--------------------------------------------------------------------------
    740             #
    741             # Oh look, a yawning portal to Hell! Let's play touch football right by it!
    742             #
    743              
    744             sub resolve_pod_page_link {
    745             # resolve_pod_page_link must return a properly escaped URL
    746 7     7 0 29 my $self = shift;
    747 7 50       19 return $self->batch_mode()
    748             ? $self->resolve_pod_page_link_batch_mode(@_)
    749             : $self->resolve_pod_page_link_singleton_mode(@_)
    750             ;
    751             }
    752              
    753             sub resolve_pod_page_link_singleton_mode {
    754 7     7 0 16 my($self, $it) = @_;
    755 7 50 33     18 return undef unless defined $it and length $it;
    756 7         22 my $url = $self->pagepath_url_escape($it);
    757              
    758 7         15 $url =~ s{::$}{}s; # probably never comes up anyway
    759 7 50       15 $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
    760              
    761 7 50       14 return undef unless length $url;
    762 7         13 return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
    763             }
    764              
    765             sub resolve_pod_page_link_batch_mode {
    766 0     0 0 0 my($self, $to) = @_;
    767 0         0 DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
    768 0         0 my @path = grep length($_), split m/::/s, $to, -1;
    769 0 0       0 unless( @path ) { # sanity
    770 0         0 DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n";
    771 0         0 return undef;
    772             }
    773 0         0 $self->batch_mode_rectify_path(\@path);
    774 0         0 my $out = join('/', map $self->pagepath_url_escape($_), @path)
    775             . $HTML_EXTENSION;
    776 0         0 DEBUG > 1 and print STDERR " => $out\n";
    777 0         0 return $out;
    778             }
    779              
    780             sub batch_mode_rectify_path {
    781 0     0 0 0 my($self, $pathbits) = @_;
    782 0         0 my $level = $self->batch_mode_current_level;
    783 0         0 $level--; # how many levels up to go to get to the root
    784 0 0       0 if($level < 1) {
    785 0         0 unshift @$pathbits, '.'; # just to be pretty
    786             } else {
    787 0         0 unshift @$pathbits, ('..') x $level;
    788             }
    789 0         0 return;
    790             }
    791              
    792             sub resolve_man_page_link {
    793 3     3 0 15 my ($self, $to, $frag) = @_;
    794 3         6 my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
    795              
    796 3 50 33     22 return undef unless defined $page and length $page;
    797 3   50     6 $section ||= 1;
    798              
    799 3         8 return $self->man_url_prefix . "$section/"
    800             . $self->manpage_url_escape($page) . ".$section"
    801             . $self->man_url_postfix;
    802             }
    803              
    804             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    805              
    806             sub resolve_pod_link_by_table {
    807             # A crazy hack to allow specifying custom L => URL mappings
    808              
    809 17 50   17 0 39 return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
    810              
    811 0         0 my($self, $to, $section) = @_;
    812              
    813             # TODO: add a method that actually populates podhtml_LOT from a file?
    814              
    815 0 0       0 if(defined $section) {
    816 0 0 0     0 $to = '' unless defined $to and length $to;
    817 0         0 return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
    818             } else {
    819 0         0 return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
    820             }
    821 0         0 return;
    822             }
    823              
    824             ###########################################################################
    825              
    826             sub linearize_tokens { # self, tokens
    827 37     37 0 62 my $self = shift;
    828 37         64 my $out = '';
    829              
    830 37         57 my $t;
    831 37         91 while($t = shift @_) {
    832 79 50 33     493 if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
        100 66        
        100          
    833 0         0 $out .= $t; # a string, or some insane thing
    834             } elsif($t->is_text) {
    835 39         104 $out .= $t->text;
    836             } elsif($t->is_start and $t->tag eq 'X') {
    837             # Ignore until the end of this X<...> sequence:
    838 3         4 my $x_open = 1;
    839 3         4 while($x_open) {
    840 6 100       10 next if( ($t = shift @_)->is_text );
    841 3 50 33     4 if( $t->is_start and $t->tag eq 'X') { ++$x_open }
      0 50 33     0  
    842 3         7 elsif($t->is_end and $t->tag eq 'X') { --$x_open }
    843             }
    844             }
    845             }
    846 37 50       164 return undef if length $out > $Linearization_Limit;
    847 37         89 return $out;
    848             }
    849              
    850             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    851              
    852             sub unicode_escape_url {
    853 69     69 0 138 my($self, $string) = @_;
    854 69         157 $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
      0         0  
    855             # Turn char 1234 into "(1234)"
    856 69         174 return $string;
    857             }
    858              
    859             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    860             sub esc { # a function.
    861 464 100   464 0 1073 if(defined wantarray) {
    862 326 100       612 if(wantarray) {
    863 275         935 @_ = splice @_; # break aliasing
    864             } else {
    865 51         122 my $x = shift;
    866 51 50       208 if ("$]" >= 5.007_003) {
    867 51         124 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
      0         0  
    868             } else { # Is broken for non-ASCII platforms on early perls
    869 0         0 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
      0         0  
    870             }
    871 51         179 return $x;
    872             }
    873             }
    874 413         751 foreach my $x (@_) {
    875             # Escape things very cautiously:
    876 1101 50       1942 if (defined $x) {
    877 1101 50       3170 if ("$]" >= 5.007_003) {
    878 1101         2319 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
      52         205  
    879             } else { # Is broken for non-ASCII platforms on early perls
    880 0         0 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
      0         0  
    881             }
    882             }
    883             # Leave out "- so that "--" won't make it thru in X-generated comments
    884             # with text in them.
    885              
    886             # Yes, stipulate the list without a range, so that this can work right on
    887             # all charsets that this module happens to run under.
    888             }
    889 413         1646 return @_;
    890             }
    891              
    892             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    893              
    894             1;
    895             __END__