File Coverage

blib/lib/Pod/Simple/HTML.pm

Criterion	Covered	Total	%
statement	288	359	80.2
branch	121	178	67.9
condition	66	121	54.5
subroutine	36	39	92.3
pod	1	31	3.2
total	512	728	70.3

line	stmt	bran	cond	sub	pod	time	code
1							package Pod::Simple::HTML;
2	7			7		306646	use strict;
	7					15
	7					300
3	7			7		47	use warnings;
	7					12
	7					404
4	7			7		4367	use Pod::Simple::PullParser ();
	7					22
	7					650
5							our @ISA = ('Pod::Simple::PullParser');
6							our $VERSION = '3.47';
7							BEGIN {
8	7	50		7		39	if(defined &DEBUG) { } # no-op
		50
9	7					46320	elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
10	0					0	else { *DEBUG = sub () {0}; }
11							}
12
13							our $Doctype_decl \|\|= ''; # No. Just No. Don't even ask me for it.
14							# qq{
15							# "http://www.w3.org/TR/html4/loose.dtd">\n};
16
17							our $Content_decl \|\|=
18							q{};
19
20							our $HTML_EXTENSION;
21							$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
22							our $Computerese;
23							$Computerese = "" unless defined $Computerese;
24							our $LamePad;
25							$LamePad = '' unless defined $LamePad;
26
27							our $Linearization_Limit;
28							$Linearization_Limit = 120 unless defined $Linearization_Limit;
29							# headings/items longer than that won't get an
30							our $Perldoc_URL_Prefix;
31							$Perldoc_URL_Prefix = 'https://metacpan.org/pod/'
32							unless defined $Perldoc_URL_Prefix;
33							our $Perldoc_URL_Postfix;
34							$Perldoc_URL_Postfix = ''
35							unless defined $Perldoc_URL_Postfix;
36
37
38							our $Man_URL_Prefix = 'https://man7.org/linux/man-pages/man';
39							our $Man_URL_Postfix = '.html';
40
41							our $Title_Prefix;
42							$Title_Prefix = '' unless defined $Title_Prefix;
43							our $Title_Postfix;
44							$Title_Postfix = '' unless defined $Title_Postfix;
45							our %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
46							# 'item-text' stuff in the index doesn't quite work, and may
47							# not be a good idea anyhow.
48
49
50							__PACKAGE__->_accessorize(
51							'perldoc_url_prefix',
52							# In turning L into http://whatever/Foo%3a%3aBar, what
53							# to put before the "Foo%3a%3aBar".
54							# (for singleton mode only?)
55							'perldoc_url_postfix',
56							# what to put after "Foo%3a%3aBar" in the URL. Normally "".
57
58							'man_url_prefix',
59							# In turning L into http://whatever/man/1/crontab, what
60							# to put before the "1/crontab".
61							'man_url_postfix',
62							# what to put after the "1/crontab" in the URL. Normally ".html".
63
64							'batch_mode', # whether we're in batch mode
65							'batch_mode_current_level',
66							# When in batch mode, how deep the current module is: 1 for "LWP",
67							# 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
68
69							'title_prefix', 'title_postfix',
70							# What to put before and after the title in the head.
71							# Should already be &-escaped
72
73							'html_h_level',
74
75							'html_header_before_title',
76							'html_header_after_title',
77							'html_footer',
78							'top_anchor',
79
80							'index', # whether to add an index at the top of each page
81							# (actually it's a table-of-contents, but we'll call it an index,
82							# out of apparently longstanding habit)
83
84							'html_css', # URL of CSS file to point to
85							'html_javascript', # URL of Javascript file to point to
86
87							'force_title', # should already be &-escaped
88							'default_title', # should already be &-escaped
89							);
90
91							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
92							my @_to_accept;
93
94							our %Tagmap = (
95							'Verbatim' => "\n",
96							'/Verbatim' => "\n",
97							'VerbatimFormatted' => "\n",
98							'/VerbatimFormatted' => "\n",
99							'VerbatimB' => "",
100							'/VerbatimB' => "",
101							'VerbatimI' => "",
102							'/VerbatimI' => "",
103							'VerbatimBI' => "",
104							'/VerbatimBI' => "",
105
106
107							'Data' => "\n",
108							'/Data' => "\n",
109
110							'head1' => "\n ", # And also stick in an
111							'head2' => "\n ", # ''
112							'head3' => "\n ", # ''
113							'head4' => "\n ", # ''
114							'head5' => "\n ", # ''
115							'head6' => "\n ", # ''
116							'/head1' => "\n",
117							'/head2' => "\n",
118							'/head3' => "\n",
119							'/head4' => "\n",
120							'/head5' => "\n",
121							'/head6' => "\n",
122
123							'X' => "",
125
126							changes(qw(
127							Para=p
128							B=b I=i U=u
129							over-bullet=ul
130							over-number=ol
131							over-text=dl
132							over-block=blockquote
133							item-bullet=li
134							item-number=li
135							item-text=dt
136							)),
137							changes2(
138							map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
139							qw[
140							sample=samp
141							definition=dfn
142							keyboard=kbd
143							variable=var
144							citation=cite
145							abbreviation=abbr
146							acronym=acronym
147							subscript=sub
148							superscript=sup
149							big=big
150							small=small
151							underline=u
152							strikethrough=s
153							preformat=pre
154							teletype=tt
155							] # no point in providing a way to get ..., I think
156							),
157
158							'/item-bullet' => "$LamePad\n",
159							'/item-number' => "$LamePad\n",
160							'/item-text' => "$LamePad\n",
161							'item-body' => "\n ",
162							'/item-body' => "\n",
163
164
165							'B' => "", '/B' => "",
166							'I' => "", '/I' => "",
167							'U' => "", '/U' => "",
168							'F' => "", '/F' => "",
169							'C' => "", '/C' => "",
170							'L' => "", # ideally never used!
171							'/L' => "",
172							);
173
174							sub changes {
175	7	50		7	0	20	return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
	77					466
176							? ( $1, => "\n<$2>", "/$1", => "\n" ) : die "Funky $_"
177							} @_;
178							}
179							sub changes2 {
180	7	50		7	0	16	return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
	105					1113
181							? ( $1, => "<$2>", "/$1", => "" ) : die "Funky $_"
182							} @_;
183							}
184
185							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
186	0			0	0	0	sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
	0					0
187							# Just so we can run from the command line. No options.
188							# For that, use perldoc!
189							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
190
191							sub new {
192	60			60	1	881	my $new = shift->SUPER::new(@_);
193							#$new->nix_X_codes(1);
194	60					222	$new->nbsp_for_S(1);
195	60					191	$new->accept_targets( 'html', 'HTML' );
196	60					194	$new->accept_codes('VerbatimFormatted');
197	60					185	$new->accept_codes(@_to_accept);
198	60					74	DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
199
200	60					235	$new->perldoc_url_prefix( $Perldoc_URL_Prefix );
201	60					160	$new->perldoc_url_postfix( $Perldoc_URL_Postfix );
202	60					197	$new->man_url_prefix( $Man_URL_Prefix );
203	60					157	$new->man_url_postfix( $Man_URL_Postfix );
204	60					146	$new->title_prefix( $Title_Prefix );
205	60					163	$new->title_postfix( $Title_Postfix );
206
207	60					223	$new->html_header_before_title(
208							qq[$Doctype_decl] </td> </tr> <tr> <td class="h" > <a name="209">209</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="210">210</a> </td> <td class="c3" > 60 </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > 171 </td> <td class="s"> $new->html_header_after_title( join "\n" => </td> </tr> <tr> <td class="h" > <a name="211">211</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> "",
212							$Content_decl,
213							"\n",
214							$new->version_tag_comment,
215							"\n",
216							);
217	60					181	$new->html_footer( qq[\n\n\n\n] );
218	60					141	$new->top_anchor( "\n" );
219
220	60					2913	$new->{'Tagmap'} = {%Tagmap};
221
222	60					441	return $new;
223							}
224
225							sub __adjust_html_h_levels {
226	59			59		133	my ($self) = @_;
227	59					76	my $Tagmap = $self->{'Tagmap'};
228
229	59					133	my $add = $self->html_h_level;
230	59	100				146	return unless defined $add;
231	1	50	50			7	return if ($self->{'Adjusted_html_h_levels'}\|\|0) == $add;
232
233	1					2	$add -= 1;
234	1					4	for (1 .. 6) {
235	6					49	$Tagmap->{"head$_"} =~ s/$_/$_ + $add/e;
	6					10
236	6					40	$Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
	6					11
237							}
238							}
239
240							sub batch_mode_page_object_init {
241	10			10	0	40	my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
242	10					13	DEBUG and print STDERR "Initting $self\n for $module\n",
243							" in $infile\n out $outfile\n depth $depth\n";
244	10					33	$self->batch_mode(1);
245	10					30	$self->batch_mode_current_level($depth);
246	10					27	return $self;
247							}
248
249							sub run {
250	60			60	0	119	my $self = $_[0];
251	60	100				160	return $self->do_middle if $self->bare_output;
252							return
253	18		100			52	$self->do_beginning && $self->do_middle && $self->do_end;
254							}
255
256							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
257
258							sub do_beginning {
259	18			18	0	30	my $self = $_[0];
260
261	18					27	my $title;
262
263	18	50				62	if(defined $self->force_title) {
264	0					0	$title = $self->force_title;
265	0					0	DEBUG and print STDERR "Forcing title to be $title\n";
266							} else {
267							# Actually try looking for the title in the document:
268	18					86	$title = $self->get_short_title();
269	18	100				120	unless($self->content_seen) {
270	1					2	DEBUG and print STDERR "No content seen in search for title.\n";
271	1					8	return;
272							}
273	17					44	$self->{'Title'} = $title;
274
275	17	100	66			97	if(defined $title and $title =~ m/\S/) {
276	14					44	$title = $self->title_prefix . esc($title) . $self->title_postfix;
277							} else {
278	3					10	$title = $self->default_title;
279	3	50				5	$title = '' unless defined $title;
280	3					3	DEBUG and print STDERR "Title defaults to $title\n";
281							}
282							}
283
284
285	17		50			51	my $after = $self->html_header_after_title \|\| '';
286	17	100				55	if($self->html_css) {
287	10	50				27	my $link =
288							$self->html_css =~ m/
289							? $self->html_css # It's a big blob of markup, let's drop it in
290							: sprintf( # It's just a URL, so let's wrap it up
291							qq[\n],
292							$self->html_css,
293							);
294	10					164	$after =~ s{()}{$link\n$1}i; # otherwise nevermind
295							}
296	17					71	$self->_add_top_anchor(\$after);
297
298	17	100				50	if($self->html_javascript) {
299	10	50				33	my $link =
300							$self->html_javascript =~ m/
301							? $self->html_javascript # It's a big blob of markup, let's drop it in
302							: sprintf( # It's just a URL, so let's wrap it up
303							qq[\n],
304							$self->html_javascript,
305							);
306	10					111	$after =~ s{()}{$link\n$1}i; # otherwise nevermind
307							}
308
309	17		50			49	print {$self->{'output_fh'}}
	17					59
310							$self->html_header_before_title \|\| '',
311							$title, # already escaped
312							$after,
313							;
314
315	17					34	DEBUG and print STDERR "Returning from do_beginning...\n";
316	17					109	return 1;
317							}
318
319							sub _add_top_anchor {
320	17			17		39	my($self, $text_r) = @_;
321	17	100	66			135	unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
322	7		50			18	$$text_r .= $self->top_anchor \|\| '';
323							}
324	17					34	return;
325							}
326
327							sub version_tag_comment {
328	60			60	0	104	my $self = shift;
329							return sprintf
330							"\n",
331							esc(
332							ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
333	60		33			1433	$], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} \|\| time)),
334							), $self->_modnote(),
335							;
336							}
337
338							sub _modnote {
339	60		33	60		170	my $class = ref($_[0]) \|\| $_[0];
340	60					1229	return join "\n " => grep m/\S/, split "\n",
341
342							qq{
343							If you want to change this HTML document, you probably shouldn't do that
344							by changing it directly. Instead, see about changing the calling options
345							to $class, and/or subclassing $class,
346							then reconverting this document from the Pod source.
347							When in doubt, email the author of $class for advice.
348							See 'perldoc $class' for more info.
349							};
350
351							}
352
353							sub do_end {
354	17			17	0	29	my $self = $_[0];
355	17		50			24	print {$self->{'output_fh'}} $self->html_footer \|\| '';
	17					74
356	17					254	return 1;
357							}
358
359							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
360							# Normally this would just be a call to _do_middle_main_loop -- but we
361							# have to do some elaborate things to emit all the content and then
362							# summarize it and output it /before/ the content that it's a summary of.
363
364							sub do_middle {
365	59			59	0	78	my $self = $_[0];
366	59	100				128	return $self->_do_middle_main_loop unless $self->index;
367
368	10	50				39	if( $self->output_string ) {
369							# An efficiency hack
370	0					0	my $out = $self->output_string; #it's a reference to it
371	0					0	my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
372	0					0	$$out .= $sneakytag;
373	0					0	$self->_do_middle_main_loop;
374	0					0	$sneakytag = quotemeta($sneakytag);
375	0					0	my $index = $self->index_as_html();
376	0	0				0	if( $$out =~ s/$sneakytag/$index/s ) {
377							# Expected case
378	0					0	DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
379							} else {
380	0					0	DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
381							# I don't think this should ever happen.
382							}
383	0					0	return 1;
384							}
385
386	10	50				34	unless( $self->output_fh ) {
387	0					0	require Carp;
388	0					0	Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
389							}
390
391							# If we get here, we're outputting to a FH. So we need to do some magic.
392							# Namely, divert all content to a string, which we output after the index.
393	10					25	my $fh = $self->output_fh;
394	10					22	my $content = '';
395							{
396							# Our horrible bait and switch:
397	10					18	$self->output_string( \$content );
	10					41
398	10					70	$self->_do_middle_main_loop;
399	10					61	$self->abandon_output_string();
400	10					25	$self->output_fh($fh);
401							}
402	10					39	print $fh $self->index_as_html();
403	10					26	print $fh $content;
404
405	10					62	return 1;
406							}
407
408							###########################################################################
409
410							sub index_as_html {
411	10			10	0	18	my $self = $_[0];
412							# This is meant to be called AFTER the input document has been parsed!
413
414	10		50			200	my $points = $self->{'PSHTML_index_points'} \|\| [];
415
416	10	100				57	@$points > 1 or return qq[ \n];
417							# There's no point in having a 0-item or 1-item index, I dare say.
418
419	8					31	my(@out) = qq{\n };
420	8					16	my $level = 0;
421
422	8					49	my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
423	8					35	foreach my $p (@$points, ['head0', '(end)']) {
424	26					102	($tagname, $text) = @$p;
425	26					71	$anchorname = $self->section_escape($text);
426	26	50				140	if( $tagname =~ m{^head(\d+)$} ) {
427	26					83	$target_level = 0 + $1;
428							} else { # must be some kinda list item
429	0	0				0	if($previous_tagname =~ m{^head\d+$} ) {
430	0					0	$target_level = $level + 1;
431							} else {
432	0					0	$target_level = $level; # no change needed
433							}
434							}
435
436							# Get to target_level by opening or closing ULs
437	26					63	while($level > $target_level)
438	9					19	{ --$level; push @out, (" " x $level) . ""; }
	9					36
439	26					65	while($level < $target_level)
440	9					13	{ ++$level; push @out, (" " x ($level-1))
	9					53
441							. " "; }
442
443	26					45	$previous_tagname = $tagname;
444	26	100				58	next unless $level;
445
446	18					38	$indent = ' ' x $level;
447	18					118	push @out, sprintf
448							"%s %s",
449							$indent, $level, esc($anchorname), esc($text)
450							;
451							}
452	8					23	push @out, "\n";
453	8					57	return join "\n", @out;
454							}
455
456							###########################################################################
457
458							sub _do_middle_main_loop {
459	59			59		134	my $self = $_[0];
460	59					82	my $fh = $self->{'output_fh'};
461	59					81	my $tagmap = $self->{'Tagmap'};
462
463	59					133	$self->__adjust_html_h_levels;
464
465	59					125	my($token, $type, $tagname, $linkto, $linktype);
466	59					0	my @stack;
467	59					74	my $dont_wrap = 0;
468
469	59					169	while($token = $self->get_token) {
470
471							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
472	522	100				1072	if( ($type = $token->type) eq 'start' ) {
		100
		50
473	203	100	100			488	if(($tagname = $token->tagname) eq 'L') {
		100
		100
474	22		50			48	$linktype = $token->attr('type') \|\| 'insane';
475
476	22					45	$linkto = $self->do_link($token);
477
478	22	50	33			55	if(defined $linkto and length $linkto) {
479	22					44	esc($linkto);
480							# (Yes, SGML-escaping applies on top of %-escaping!
481							# But it's rarely noticeable in practice.)
482	22					94	print $fh qq{};
483							} else {
484	0					0	print $fh ""; # Yes, an 'a' element with no attributes!
485							}
486
487							} elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
488	37		50			229	print $fh $tagmap->{$tagname} \|\| next;
489
490	37					61	my @to_unget;
491	37					49	while(1) {
492	85					177	push @to_unget, $self->get_token;
493	85	100	100			256	last if $to_unget[-1]->is_end
494							and $to_unget[-1]->tagname eq $tagname;
495
496							# TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
497							}
498
499	37					141	my $name = $self->linearize_tokens(@to_unget);
500	37	50				134	$name = $self->do_section($name, $token) if defined $name;
501
502	37					123	print $fh "
503	37	100				177	if ($tagname =~ m/^head\d$/s) {
504	33	100				117	print $fh "class='u'", $self->index
505							? " href='#___top' title='click to go to top of document'\n"
506							: "\n";
507							}
508
509	37	50				82	if(defined $name) {
510	37					115	my $esc = esc( $self->section_name_tidy( $name ) );
511	37					159	print $fh qq[name="$esc"];
512	37					63	DEBUG and print STDERR "Linearized ", scalar(@to_unget),
513							" tokens as \"$name\".\n";
514	31					142	push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
515	37	100				133	if $ToIndex{ $tagname };
516							# Obviously, this discards all formatting codes (saving
517							# just their content), but ahwell.
518
519							} else { # ludicrously long, so nevermind
520	0					0	DEBUG and print STDERR "Linearized ", scalar(@to_unget),
521							" tokens, but it was too long, so nevermind.\n";
522							}
523	37					94	print $fh "\n>";
524	37					164	$self->unget_token(@to_unget);
525
526							} elsif ($tagname eq 'Data') {
527	4					9	my $next = $self->get_token;
528	4	50				10	next unless defined $next;
529	4	50				17	unless( $next->type eq 'text' ) {
530	0					0	$self->unget_token($next);
531	0					0	next;
532							}
533	4					5	DEBUG and print STDERR " raw text ", $next->text, "\n";
534							# The parser sometimes preserves newlines and sometimes doesn't!
535	4					7	(my $text = $next->text) =~ s/\n\z//;
536	4					12	print $fh $text, "\n";
537	4					9	next;
538
539							} else {
540	140	100	33			459	if( $tagname =~ m/^over-/s ) {
		50	33
541	3					9	push @stack, '';
542							} elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
543	0					0	print $fh $stack[-1];
544	0					0	$stack[-1] = '';
545							}
546	140		100			556	print $fh $tagmap->{$tagname} \|\| next;
547	80	100	66			524	++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
			100
548							or $tagname eq 'X';
549							}
550
551							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
552							} elsif( $type eq 'end' ) {
553	203	100	66			420	if( ($tagname = $token->tagname) =~ m/^over-/s ) {
		100
554	3	50				11	if( my $end = pop @stack ) {
555	3					9	print $fh $end;
556							}
557							} elsif( $tagname =~ m/^item-/s and @stack) {
558	4					13	$stack[-1] = $tagmap->{"/$tagname"};
559	4	50	33			19	if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
560	4					14	$self->unget_token($next);
561	4	100				10	if( $next->type eq 'start' ) {
562	3					12	print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
563	3					32	$stack[-1] = $tagmap->{"/item-body"};
564							}
565							}
566	4					15	next;
567							}
568	199		100			696	print $fh $tagmap->{"/$tagname"} \|\| next;
569	139	100	66			545	--$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
570
571							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
572							} elsif( $type eq 'text' ) {
573	116					274	esc($type = $token->text); # reuse $type, why not
574	116	100				480	$type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
575	116					339	print $fh $type;
576							}
577
578							}
579	59					197	return 1;
580							}
581
582							###########################################################################
583							#
584
585							sub do_section {
586	36			36	0	88	my($self, $name, $token) = @_;
587	36					82	return $name;
588							}
589
590							sub do_link {
591	22			22	0	33	my($self, $token) = @_;
592	22					29	my $type = $token->attr('type');
593	22	50				55	if(!defined $type) {
		100
		100
		50
594	0					0	$self->whine("Typeless L!?", $token->attr('start_line'));
595	10					22	} elsif( $type eq 'pod') { return $self->do_pod_link($token);
596	9					13	} elsif( $type eq 'url') { return $self->do_url_link($token);
597	3					9	} elsif( $type eq 'man') { return $self->do_man_link($token);
598							} else {
599	0					0	$self->whine("L of unknown type $type!?", $token->attr('start_line'));
600							}
601	0					0	return 'FNORG'; # should never get called
602							}
603
604							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
605
606	9			9	0	21	sub do_url_link { return $_[1]->attr('to') }
607
608							sub do_man_link {
609	3			3	0	6	my ($self, $link) = @_;
610	3					4	my $to = $link->attr('to');
611	3					6	my $frag = $link->attr('section');
612
613	3	50	33			11	return undef unless defined $to and length $to; # should never happen
614
615	3	100	66			9	$frag = $self->section_escape($frag)
616							if defined $frag and length($frag .= ''); # (stringify)
617
618	3					4	DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
619
620	3					7	return $self->resolve_man_page_link($to, $frag);
621							}
622
623
624							sub do_pod_link {
625							# And now things get really messy...
626	10			10	0	13	my($self, $link) = @_;
627	10					15	my $to = $link->attr('to');
628	10					33	my $section = $link->attr('section');
629							return undef unless( # should never happen
630	10	50	66			40	(defined $to and length $to) or
			33
			66
631							(defined $section and length $section)
632							);
633
634	10	100	66			53	$section = $self->section_escape($section)
635							if defined $section and length($section .= ''); # (stringify)
636
637	10					12	DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
638							$to \|\| "(nil)", $section \|\| "(nil)";
639
640							{
641							# An early hack:
642	10					10	my $complete_url = $self->resolve_pod_link_by_table($to, $section);
	10					23
643	10	50				14	if( $complete_url ) {
644	0					0	DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
645							$complete_url, "\n (Returning that.)\n";
646	0					0	return $complete_url;
647							} else {
648	10					12	DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
649							" didn't return anything interesting.\n";
650							}
651							}
652
653	10	100	66			22	if(defined $to and length $to) {
654							# Give this routine first hack again
655	7					11	my $there = $self->resolve_pod_link_by_table($to);
656	7	50	33			16	if(defined $there and length $there) {
657	0					0	DEBUG > 1
658							and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
659							} else {
660	7					18	$there =
661							$self->resolve_pod_page_link($to, $section);
662							# (I pass it the section value, but I don't see a
663							# particular reason it'd use it.)
664	7					8	DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there \|\| "(nil)", "\n";
665	7	50	33			20	unless( defined $there and length $there ) {
666	0					0	DEBUG and print STDERR "Can't resolve $to\n";
667	0					0	return undef;
668							}
669							# resolve_pod_page_link returning undef is how it
670							# can signal that it gives up on making a link
671							}
672	7					9	$to = $there;
673							}
674
675							#DEBUG and print STDERR "So far [", $to\|\|'nil', "] [", $section\|\|'nil', "]\n";
676
677	10	100	66			28	my $out = (defined $to and length $to) ? $to : '';
678	10	100	66			30	$out .= "#" . $section if defined $section and length $section;
679
680	10	50				15	unless(length $out) { # sanity check
681	0					0	DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
682							$to \|\| "(nil)", $section \|\| "(nil)";
683	0					0	return undef;
684							}
685
686	10					11	DEBUG and print STDERR "Resolved to $out\n";
687	10					20	return $out;
688							}
689
690
691							# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
692
693							sub section_escape {
694	32			32	0	65	my($self, $section) = @_;
695	32					73	return $self->section_url_escape(
696							$self->section_name_tidy($section)
697							);
698							}
699
700							sub section_name_tidy {
701	69			69	0	143	my($self, $section) = @_;
702	69					223	$section =~ s/^\s+//;
703	69					190	$section =~ s/\s+$//;
704	69					142	$section =~ tr/ /_/;
705	69	50				366	if ("$]" >= 5.006) {
706	69					164	$section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
707							} elsif ('A' eq chr(65)) { # But not on early EBCDIC
708	0					0	$section =~ tr/\x00-\x1F\x80-\x9F//d;
709							}
710	69					181	$section = $self->unicode_escape_url($section);
711	69	50				217	$section = '_' unless length $section;
712	69					217	return $section;
713							}
714
715	32			32	0	86	sub section_url_escape { shift->general_url_escape(@_) }
716	7			7	0	22	sub pagepath_url_escape { shift->general_url_escape(@_) }
717	3			3	0	6	sub manpage_url_escape { shift->general_url_escape(@_) }
718
719							sub general_url_escape {
720	42			42	0	77	my($self, $string) = @_;
721
722	42					75	$string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
	0					0
723							# express Unicode things as urlencode(utf(orig)).
724
725							# A pretty conservative escaping, behoovey even for query components
726							# of a URL (see RFC 2396)
727
728	42	50				139	if ("$]" >= 5.007_003) {
729	42					80	$string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
	8					30
730							} else { # Is broken for non-ASCII platforms on early perls
731	0					0	$string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
	0					0
732							}
733							# Yes, stipulate the list without a range, so that this can work right on
734							# all charsets that this module happens to run under.
735
736	42					102	return $string;
737							}
738
739							#--------------------------------------------------------------------------
740							#
741							# Oh look, a yawning portal to Hell! Let's play touch football right by it!
742							#
743
744							sub resolve_pod_page_link {
745							# resolve_pod_page_link must return a properly escaped URL
746	7			7	0	29	my $self = shift;
747	7	50				19	return $self->batch_mode()
748							? $self->resolve_pod_page_link_batch_mode(@_)
749							: $self->resolve_pod_page_link_singleton_mode(@_)
750							;
751							}
752
753							sub resolve_pod_page_link_singleton_mode {
754	7			7	0	16	my($self, $it) = @_;
755	7	50	33			18	return undef unless defined $it and length $it;
756	7					22	my $url = $self->pagepath_url_escape($it);
757
758	7					15	$url =~ s{::$}{}s; # probably never comes up anyway
759	7	50				15	$url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
760
761	7	50				14	return undef unless length $url;
762	7					13	return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
763							}
764
765							sub resolve_pod_page_link_batch_mode {
766	0			0	0	0	my($self, $to) = @_;
767	0					0	DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
768	0					0	my @path = grep length($_), split m/::/s, $to, -1;
769	0	0				0	unless( @path ) { # sanity
770	0					0	DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n";
771	0					0	return undef;
772							}
773	0					0	$self->batch_mode_rectify_path(\@path);
774	0					0	my $out = join('/', map $self->pagepath_url_escape($_), @path)
775							. $HTML_EXTENSION;
776	0					0	DEBUG > 1 and print STDERR " => $out\n";
777	0					0	return $out;
778							}
779
780							sub batch_mode_rectify_path {
781	0			0	0	0	my($self, $pathbits) = @_;
782	0					0	my $level = $self->batch_mode_current_level;
783	0					0	$level--; # how many levels up to go to get to the root
784	0	0				0	if($level < 1) {
785	0					0	unshift @$pathbits, '.'; # just to be pretty
786							} else {
787	0					0	unshift @$pathbits, ('..') x $level;
788							}
789	0					0	return;
790							}
791
792							sub resolve_man_page_link {
793	3			3	0	15	my ($self, $to, $frag) = @_;
794	3					6	my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
795
796	3	50	33			22	return undef unless defined $page and length $page;
797	3		50			6	$section \|\|= 1;
798
799	3					8	return $self->man_url_prefix . "$section/"
800							. $self->manpage_url_escape($page) . ".$section"
801							. $self->man_url_postfix;
802							}
803
804							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
805
806							sub resolve_pod_link_by_table {
807							# A crazy hack to allow specifying custom L => URL mappings
808
809	17	50		17	0	39	return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
810
811	0					0	my($self, $to, $section) = @_;
812
813							# TODO: add a method that actually populates podhtml_LOT from a file?
814
815	0	0				0	if(defined $section) {
816	0	0	0			0	$to = '' unless defined $to and length $to;
817	0					0	return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
818							} else {
819	0					0	return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
820							}
821	0					0	return;
822							}
823
824							###########################################################################
825
826							sub linearize_tokens { # self, tokens
827	37			37	0	62	my $self = shift;
828	37					64	my $out = '';
829
830	37					57	my $t;
831	37					91	while($t = shift @_) {
832	79	50	33			493	if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
		100	66
		100
833	0					0	$out .= $t; # a string, or some insane thing
834							} elsif($t->is_text) {
835	39					104	$out .= $t->text;
836							} elsif($t->is_start and $t->tag eq 'X') {
837							# Ignore until the end of this X<...> sequence:
838	3					4	my $x_open = 1;
839	3					4	while($x_open) {
840	6	100				10	next if( ($t = shift @_)->is_text );
841	3	50	33			4	if( $t->is_start and $t->tag eq 'X') { ++$x_open }
	0	50	33			0
842	3					7	elsif($t->is_end and $t->tag eq 'X') { --$x_open }
843							}
844							}
845							}
846	37	50				164	return undef if length $out > $Linearization_Limit;
847	37					89	return $out;
848							}
849
850							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
851
852							sub unicode_escape_url {
853	69			69	0	138	my($self, $string) = @_;
854	69					157	$string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
	0					0
855							# Turn char 1234 into "(1234)"
856	69					174	return $string;
857							}
858
859							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
860							sub esc { # a function.
861	464	100		464	0	1073	if(defined wantarray) {
862	326	100				612	if(wantarray) {
863	275					935	@_ = splice @_; # break aliasing
864							} else {
865	51					122	my $x = shift;
866	51	50				208	if ("$]" >= 5.007_003) {
867	51					124	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
	0					0
868							} else { # Is broken for non-ASCII platforms on early perls
869	0					0	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
	0					0
870							}
871	51					179	return $x;
872							}
873							}
874	413					751	foreach my $x (@_) {
875							# Escape things very cautiously:
876	1101	50				1942	if (defined $x) {
877	1101	50				3170	if ("$]" >= 5.007_003) {
878	1101					2319	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
	52					205
879							} else { # Is broken for non-ASCII platforms on early perls
880	0					0	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
	0					0
881							}
882							}
883							# Leave out "- so that "--" won't make it thru in X-generated comments
884							# with text in them.
885
886							# Yes, stipulate the list without a range, so that this can work right on
887							# all charsets that this module happens to run under.
888							}
889	413					1646	return @_;
890							}
891
892							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
893
894							1;
895							__END__

File Coverage

", # And also stick in an

", # ''

", # ''

", # ''

", # ''

", # ''