File Coverage

blib/lib/Pod/Simple/HTML.pm

Criterion	Covered	Total	%
statement	288	359	80.2
branch	123	180	68.3
condition	66	121	54.5
subroutine	36	39	92.3
pod	1	31	3.2
total	514	730	70.4

line	stmt	bran	cond	sub	pod	time	code
1							package Pod::Simple::HTML;
2	7			7		260134	use strict;
	7					16
	7					275
3	7			7		44	use warnings;
	7					11
	7					322
4	7			7		3738	use Pod::Simple::PullParser ();
	7					19
	7					684
5							our @ISA = ('Pod::Simple::PullParser');
6							our $VERSION = '3.48';
7							BEGIN {
8	7	50		7		38	if(defined &DEBUG) { } # no-op
		50
9	7					40806	elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
10	0					0	else { *DEBUG = sub () {0}; }
11							}
12
13							our $Doctype_decl \|\|= ''; # No. Just No. Don't even ask me for it.
14							# qq{
15							# "http://www.w3.org/TR/html4/loose.dtd">\n};
16
17							our $Content_decl \|\|=
18							q{};
19
20							our $HTML_EXTENSION;
21							$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
22							our $Computerese;
23							$Computerese = "" unless defined $Computerese;
24							our $LamePad;
25							$LamePad = '' unless defined $LamePad;
26
27							our $Linearization_Limit;
28							$Linearization_Limit = 120 unless defined $Linearization_Limit;
29							# headings/items longer than that won't get an
30							our $Perldoc_URL_Prefix;
31							$Perldoc_URL_Prefix = 'https://metacpan.org/pod/'
32							unless defined $Perldoc_URL_Prefix;
33							our $Perldoc_URL_Postfix;
34							$Perldoc_URL_Postfix = ''
35							unless defined $Perldoc_URL_Postfix;
36
37
38							our $Man_URL_Prefix = 'https://man7.org/linux/man-pages/man';
39							our $Man_URL_Postfix = '.html';
40
41							our $Title_Prefix;
42							$Title_Prefix = '' unless defined $Title_Prefix;
43							our $Title_Postfix;
44							$Title_Postfix = '' unless defined $Title_Postfix;
45							our %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
46							# 'item-text' stuff in the index doesn't quite work, and may
47							# not be a good idea anyhow.
48
49
50							__PACKAGE__->_accessorize(
51							'perldoc_url_prefix',
52							# In turning L into http://whatever/Foo%3a%3aBar, what
53							# to put before the "Foo%3a%3aBar".
54							# (for singleton mode only?)
55							'perldoc_url_postfix',
56							# what to put after "Foo%3a%3aBar" in the URL. Normally "".
57
58							'man_url_prefix',
59							# In turning L into http://whatever/man/1/crontab, what
60							# to put before the "1/crontab".
61							'man_url_postfix',
62							# what to put after the "1/crontab" in the URL. Normally ".html".
63
64							'batch_mode', # whether we're in batch mode
65							'batch_mode_current_level',
66							# When in batch mode, how deep the current module is: 1 for "LWP",
67							# 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
68
69							'title_prefix', 'title_postfix',
70							# What to put before and after the title in the head.
71							# Should already be &-escaped
72
73							'html_h_level',
74
75							'html_header_before_title',
76							'html_header_after_title',
77							'html_footer',
78							'top_anchor',
79
80							'index', # whether to add an index at the top of each page
81							# (actually it's a table-of-contents, but we'll call it an index,
82							# out of apparently longstanding habit)
83
84							'html_css', # URL of CSS file to point to
85							'html_javascript', # URL of Javascript file to point to
86
87							'force_title', # should already be &-escaped
88							'default_title', # should already be &-escaped
89							);
90
91							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
92							my @_to_accept;
93
94							our %Tagmap = (
95							'Verbatim' => "\n",
96							'/Verbatim' => "\n",
97							'VerbatimFormatted' => "\n",
98							'/VerbatimFormatted' => "\n",
99							'VerbatimB' => "",
100							'/VerbatimB' => "",
101							'VerbatimI' => "",
102							'/VerbatimI' => "",
103							'VerbatimBI' => "",
104							'/VerbatimBI' => "",
105
106
107							'Data' => "\n",
108							'/Data' => "\n",
109
110							'head1' => "\n ", # And also stick in an
111							'head2' => "\n ", # ''
112							'head3' => "\n ", # ''
113							'head4' => "\n ", # ''
114							'head5' => "\n ", # ''
115							'head6' => "\n ", # ''
116							'/head1' => "\n",
117							'/head2' => "\n",
118							'/head3' => "\n",
119							'/head4' => "\n",
120							'/head5' => "\n",
121							'/head6' => "\n",
122
123							'X' => "",
125
126							changes(qw(
127							Para=p
128							B=b I=i U=u
129							over-bullet=ul
130							over-number=ol
131							over-text=dl
132							over-block=blockquote
133							item-bullet=li
134							item-number=li
135							item-text=dt
136							)),
137							changes2(
138							map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
139							qw[
140							sample=samp
141							definition=dfn
142							keyboard=kbd
143							variable=var
144							citation=cite
145							abbreviation=abbr
146							acronym=acronym
147							subscript=sub
148							superscript=sup
149							big=big
150							small=small
151							underline=u
152							strikethrough=s
153							preformat=pre
154							teletype=tt
155							] # no point in providing a way to get ..., I think
156							),
157
158							'/item-bullet' => "$LamePad\n",
159							'/item-number' => "$LamePad\n",
160							'/item-text' => "$LamePad\n",
161							'item-body' => "\n ",
162							'/item-body' => "\n",
163
164
165							'B' => "", '/B' => "",
166							'I' => "", '/I' => "",
167							'U' => "", '/U' => "",
168							'F' => "", '/F' => "",
169							'C' => "", '/C' => "",
170							'L' => "", # ideally never used!
171							'/L' => "",
172							);
173
174							sub changes {
175	7	50		7	0	13	return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
	77					428
176							? ( $1, => "\n<$2>", "/$1", => "\n" ) : die "Funky $_"
177							} @_;
178							}
179							sub changes2 {
180	7	50		7	0	12	return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
	105					781
181							? ( $1, => "<$2>", "/$1", => "" ) : die "Funky $_"
182							} @_;
183							}
184
185							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
186	0			0	0	0	sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
	0					0
187							# Just so we can run from the command line. No options.
188							# For that, use perldoc!
189							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
190
191							sub new {
192	60			60	1	520	my $new = shift->SUPER::new(@_);
193							#$new->nix_X_codes(1);
194	60					153	$new->nbsp_for_S(1);
195	60					140	$new->accept_targets( 'html', 'HTML' );
196	60					138	$new->accept_codes('VerbatimFormatted');
197	60					149	$new->accept_codes(@_to_accept);
198	60					72	DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
199
200	60					225	$new->perldoc_url_prefix( $Perldoc_URL_Prefix );
201	60					124	$new->perldoc_url_postfix( $Perldoc_URL_Postfix );
202	60					110	$new->man_url_prefix( $Man_URL_Prefix );
203	60					127	$new->man_url_postfix( $Man_URL_Postfix );
204	60					146	$new->title_prefix( $Title_Prefix );
205	60					123	$new->title_postfix( $Title_Postfix );
206
207	60					167	$new->html_header_before_title(
208							qq[$Doctype_decl] </td> </tr> <tr> <td class="h" > <a name="209">209</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="210">210</a> </td> <td class="c3" > 60 </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > 162 </td> <td class="s"> $new->html_header_after_title( join "\n" => </td> </tr> <tr> <td class="h" > <a name="211">211</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> "",
212							$Content_decl,
213							"\n",
214							$new->version_tag_comment,
215							"\n",
216							);
217	60					155	$new->html_footer( qq[\n\n\n\n] );
218	60					116	$new->top_anchor( "\n" );
219
220	60					2210	$new->{'Tagmap'} = {%Tagmap};
221
222	60					300	return $new;
223							}
224
225							sub __adjust_html_h_levels {
226	59			59		87	my ($self) = @_;
227	59					84	my $Tagmap = $self->{'Tagmap'};
228
229	59					96	my $add = $self->html_h_level;
230	59	100				128	return unless defined $add;
231	1	50	50			5	return if ($self->{'Adjusted_html_h_levels'}\|\|0) == $add;
232
233	1					1	$add -= 1;
234	1					3	for (1 .. 6) {
235	6					41	$Tagmap->{"head$_"} =~ s/$_/$_ + $add/e;
	6					9
236	6					35	$Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
	6					10
237							}
238							}
239
240							sub batch_mode_page_object_init {
241	10			10	0	20	my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
242	10					9	DEBUG and print STDERR "Initting $self\n for $module\n",
243							" in $infile\n out $outfile\n depth $depth\n";
244	10					19	$self->batch_mode(1);
245	10					18	$self->batch_mode_current_level($depth);
246	10					15	return $self;
247							}
248
249							sub run {
250	60			60	0	68	my $self = $_[0];
251	60	100				135	return $self->do_middle if $self->bare_output;
252							return
253	18		100			35	$self->do_beginning && $self->do_middle && $self->do_end;
254							}
255
256							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
257
258							sub do_beginning {
259	18			18	0	21	my $self = $_[0];
260
261	18					20	my $title;
262
263	18	50				35	if(defined $self->force_title) {
264	0					0	$title = $self->force_title;
265	0					0	DEBUG and print STDERR "Forcing title to be $title\n";
266							} else {
267							# Actually try looking for the title in the document:
268	18					60	$title = $self->get_short_title();
269	18	100				55	unless($self->content_seen) {
270	1					2	DEBUG and print STDERR "No content seen in search for title.\n";
271	1					5	return;
272							}
273	17					37	$self->{'Title'} = $title;
274
275	17	100	66			57	if(defined $title and $title =~ m/\S/) {
276	14					35	$title = $self->title_prefix . esc($title) . $self->title_postfix;
277							} else {
278	3					9	$title = $self->default_title;
279	3	50				8	$title = '' unless defined $title;
280	3					5	DEBUG and print STDERR "Title defaults to $title\n";
281							}
282							}
283
284
285	17		50			44	my $after = $self->html_header_after_title \|\| '';
286	17	100				50	if($self->html_css) {
287	10	50				12	my $link =
288							$self->html_css =~ m/
289							? $self->html_css # It's a big blob of markup, let's drop it in
290							: sprintf( # It's just a URL, so let's wrap it up
291							qq[\n],
292							$self->html_css,
293							);
294	10					90	$after =~ s{()}{$link\n$1}i; # otherwise nevermind
295							}
296	17					52	$self->_add_top_anchor(\$after);
297
298	17	100				34	if($self->html_javascript) {
299	10	50				15	my $link =
300							$self->html_javascript =~ m/
301							? $self->html_javascript # It's a big blob of markup, let's drop it in
302							: sprintf( # It's just a URL, so let's wrap it up
303							qq[\n],
304							$self->html_javascript,
305							);
306	10					71	$after =~ s{()}{$link\n$1}i; # otherwise nevermind
307							}
308
309	17		50			21	print {$self->{'output_fh'}}
	17					38
310							$self->html_header_before_title \|\| '',
311							$title, # already escaped
312							$after,
313							;
314
315	17					25	DEBUG and print STDERR "Returning from do_beginning...\n";
316	17					55	return 1;
317							}
318
319							sub _add_top_anchor {
320	17			17		26	my($self, $text_r) = @_;
321	17	100	66			100	unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
322	7		50			14	$$text_r .= $self->top_anchor \|\| '';
323							}
324	17					24	return;
325							}
326
327							sub version_tag_comment {
328	60			60	0	79	my $self = shift;
329							return sprintf
330							"\n",
331							esc(
332							ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
333	60		33			1227	$], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} \|\| time)),
334							), $self->_modnote(),
335							;
336							}
337
338							sub _modnote {
339	60		33	60		128	my $class = ref($_[0]) \|\| $_[0];
340	60					1035	return join "\n " => grep m/\S/, split "\n",
341
342							qq{
343							If you want to change this HTML document, you probably shouldn't do that
344							by changing it directly. Instead, see about changing the calling options
345							to $class, and/or subclassing $class,
346							then reconverting this document from the Pod source.
347							When in doubt, email the author of $class for advice.
348							See 'perldoc $class' for more info.
349							};
350
351							}
352
353							sub do_end {
354	17			17	0	20	my $self = $_[0];
355	17		50			19	print {$self->{'output_fh'}} $self->html_footer \|\| '';
	17					41
356	17					197	return 1;
357							}
358
359							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
360							# Normally this would just be a call to _do_middle_main_loop -- but we
361							# have to do some elaborate things to emit all the content and then
362							# summarize it and output it /before/ the content that it's a summary of.
363
364							sub do_middle {
365	59			59	0	73	my $self = $_[0];
366	59	100				126	return $self->_do_middle_main_loop unless $self->index;
367
368	10	50				27	if( $self->output_string ) {
369							# An efficiency hack
370	0					0	my $out = $self->output_string; #it's a reference to it
371	0					0	my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
372	0					0	$$out .= $sneakytag;
373	0					0	$self->_do_middle_main_loop;
374	0					0	$sneakytag = quotemeta($sneakytag);
375	0					0	my $index = $self->index_as_html();
376	0	0				0	if( $$out =~ s/$sneakytag/$index/s ) {
377							# Expected case
378	0					0	DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
379							} else {
380	0					0	DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
381							# I don't think this should ever happen.
382							}
383	0					0	return 1;
384							}
385
386	10	50				20	unless( $self->output_fh ) {
387	0					0	require Carp;
388	0					0	Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
389							}
390
391							# If we get here, we're outputting to a FH. So we need to do some magic.
392							# Namely, divert all content to a string, which we output after the index.
393	10					14	my $fh = $self->output_fh;
394	10					15	my $content = '';
395							{
396							# Our horrible bait and switch:
397	10					10	$self->output_string( \$content );
	10					25
398	10					23	$self->_do_middle_main_loop;
399	10					24	$self->abandon_output_string();
400	10					98	$self->output_fh($fh);
401							}
402	10					15	print $fh $self->index_as_html();
403	10					14	print $fh $content;
404
405	10					35	return 1;
406							}
407
408							###########################################################################
409
410							sub index_as_html {
411	10			10	0	11	my $self = $_[0];
412							# This is meant to be called AFTER the input document has been parsed!
413
414	10		50			32	my $points = $self->{'PSHTML_index_points'} \|\| [];
415
416	10	100				19	@$points > 1 or return qq[ \n];
417							# There's no point in having a 0-item or 1-item index, I dare say.
418
419	8					18	my(@out) = qq{\n };
420	8					8	my $level = 0;
421
422	8					11	my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
423	8					20	foreach my $p (@$points, ['head0', '(end)']) {
424	26					38	($tagname, $text) = @$p;
425	26					35	$anchorname = $self->section_escape($text);
426	26	50				70	if( $tagname =~ m{^head(\d+)$} ) {
427	26					38	$target_level = 0 + $1;
428							} else { # must be some kinda list item
429	0	0				0	if($previous_tagname =~ m{^head\d+$} ) {
430	0					0	$target_level = $level + 1;
431							} else {
432	0					0	$target_level = $level; # no change needed
433							}
434							}
435
436							# Get to target_level by opening or closing ULs
437	26					36	while($level > $target_level)
438	9					11	{ --$level; push @out, (" " x $level) . ""; }
	9					19
439	26					33	while($level < $target_level)
440	9					9	{ ++$level; push @out, (" " x ($level-1))
	9					29
441							. " "; }
442
443	26					26	$previous_tagname = $tagname;
444	26	100				38	next unless $level;
445
446	18					26	$indent = ' ' x $level;
447	18					25	push @out, sprintf
448							"%s %s",
449							$indent, $level, esc($anchorname), esc($text)
450							;
451							}
452	8					11	push @out, "\n";
453	8					44	return join "\n", @out;
454							}
455
456							###########################################################################
457
458							sub _do_middle_main_loop {
459	59			59		62	my $self = $_[0];
460	59					73	my $fh = $self->{'output_fh'};
461	59					72	my $tagmap = $self->{'Tagmap'};
462
463	59					112	$self->__adjust_html_h_levels;
464
465	59					101	my($token, $type, $tagname, $linkto, $linktype);
466	59					0	my @stack;
467	59					63	my $dont_wrap = 0;
468
469	59					150	while($token = $self->get_token) {
470
471							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
472	522	100				864	if( ($type = $token->type) eq 'start' ) {
		100
		50
473	203	100	100			376	if(($tagname = $token->tagname) eq 'L') {
		100
		100
474	22		50			43	$linktype = $token->attr('type') \|\| 'insane';
475
476	22					43	$linkto = $self->do_link($token);
477
478	22	50	33			51	if(defined $linkto and length $linkto) {
479	22					43	esc($linkto);
480							# (Yes, SGML-escaping applies on top of %-escaping!
481							# But it's rarely noticeable in practice.)
482	22					83	print $fh qq{};
483							} else {
484	0					0	print $fh ""; # Yes, an 'a' element with no attributes!
485							}
486
487							} elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
488	37		50			136	print $fh $tagmap->{$tagname} \|\| next;
489
490	37					39	my @to_unget;
491	37					33	while(1) {
492	85					113	push @to_unget, $self->get_token;
493	85	100	100			151	last if $to_unget[-1]->is_end
494							and $to_unget[-1]->tagname eq $tagname;
495
496							# TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
497							}
498
499	37					81	my $name = $self->linearize_tokens(@to_unget);
500	37	50				80	$name = $self->do_section($name, $token) if defined $name;
501
502	37					70	print $fh "
503	37	100				94	if ($tagname =~ m/^head\d$/s) {
504	33	100				63	print $fh "class='u'", $self->index
505							? " href='#___top' title='click to go to top of document'\n"
506							: "\n";
507							}
508
509	37	50				47	if(defined $name) {
510	37					77	my $esc = esc( $self->section_name_tidy( $name ) );
511	37					94	print $fh qq[name="$esc"];
512	37					36	DEBUG and print STDERR "Linearized ", scalar(@to_unget),
513							" tokens as \"$name\".\n";
514	31					82	push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
515	37	100				95	if $ToIndex{ $tagname };
516							# Obviously, this discards all formatting codes (saving
517							# just their content), but ahwell.
518
519							} else { # ludicrously long, so nevermind
520	0					0	DEBUG and print STDERR "Linearized ", scalar(@to_unget),
521							" tokens, but it was too long, so nevermind.\n";
522							}
523	37					77	print $fh "\n>";
524	37					75	$self->unget_token(@to_unget);
525
526							} elsif ($tagname eq 'Data') {
527	4					7	my $next = $self->get_token;
528	4	50				15	next unless defined $next;
529	4	50				5	unless( $next->type eq 'text' ) {
530	0					0	$self->unget_token($next);
531	0					0	next;
532							}
533	4					5	DEBUG and print STDERR " raw text ", $next->text, "\n";
534							# The parser sometimes preserves newlines and sometimes doesn't!
535	4					6	(my $text = $next->text) =~ s/\n\z//;
536	4					12	print $fh $text, "\n";
537	4					11	next;
538
539							} else {
540	140	100	33			341	if( $tagname =~ m/^over-/s ) {
		50	33
541	3					5	push @stack, '';
542							} elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
543	0					0	print $fh $stack[-1];
544	0					0	$stack[-1] = '';
545							}
546	140		100			483	print $fh $tagmap->{$tagname} \|\| next;
547	80	100	66			357	++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
			100
548							or $tagname eq 'X';
549							}
550
551							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
552							} elsif( $type eq 'end' ) {
553	203	100	66			319	if( ($tagname = $token->tagname) =~ m/^over-/s ) {
		100
554	3	50				8	if( my $end = pop @stack ) {
555	3					5	print $fh $end;
556							}
557							} elsif( $tagname =~ m/^item-/s and @stack) {
558	4					25	$stack[-1] = $tagmap->{"/$tagname"};
559	4	50	33			13	if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
560	4					25	$self->unget_token($next);
561	4	100				7	if( $next->type eq 'start' ) {
562	3					8	print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
563	3					7	$stack[-1] = $tagmap->{"/item-body"};
564							}
565							}
566	4					9	next;
567							}
568	199		100			490	print $fh $tagmap->{"/$tagname"} \|\| next;
569	139	100	66			409	--$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
570
571							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
572							} elsif( $type eq 'text' ) {
573	116					170	esc($type = $token->text); # reuse $type, why not
574	116	100				285	$type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
575	116					195	print $fh $type;
576							}
577
578							}
579	59					183	return 1;
580							}
581
582							###########################################################################
583							#
584
585							sub do_section {
586	36			36	0	57	my($self, $name, $token) = @_;
587	36					46	return $name;
588							}
589
590							sub do_link {
591	22			22	0	34	my($self, $token) = @_;
592	22					27	my $type = $token->attr('type');
593	22	50				52	if(!defined $type) {
		100
		100
		50
594	0					0	$self->whine("Typeless L!?", $token->attr('start_line'));
595	10					20	} elsif( $type eq 'pod') { return $self->do_pod_link($token);
596	9					13	} elsif( $type eq 'url') { return $self->do_url_link($token);
597	3					8	} elsif( $type eq 'man') { return $self->do_man_link($token);
598							} else {
599	0					0	$self->whine("L of unknown type $type!?", $token->attr('start_line'));
600							}
601	0					0	return 'FNORG'; # should never get called
602							}
603
604							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
605
606	9			9	0	17	sub do_url_link { return $_[1]->attr('to') }
607
608							sub do_man_link {
609	3			3	0	6	my ($self, $link) = @_;
610	3					4	my $to = $link->attr('to');
611	3					4	my $frag = $link->attr('section');
612
613	3	50	33			10	return undef unless defined $to and length $to; # should never happen
614
615	3	100	66			8	$frag = $self->section_escape($frag)
616							if defined $frag and length($frag .= ''); # (stringify)
617
618	3					4	DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
619
620	3					7	return $self->resolve_man_page_link($to, $frag);
621							}
622
623
624							sub do_pod_link {
625							# And now things get really messy...
626	10			10	0	17	my($self, $link) = @_;
627	10					22	my $to = $link->attr('to');
628	10					15	my $section = $link->attr('section');
629							return undef unless( # should never happen
630	10	50	66			67	(defined $to and length $to) or
			33
			66
631							(defined $section and length $section)
632							);
633
634	10	100	66			47	$section = $self->section_escape($section)
635							if defined $section and length($section .= ''); # (stringify)
636
637	10					14	DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
638							$to \|\| "(nil)", $section \|\| "(nil)";
639
640							{
641							# An early hack:
642	10					10	my $complete_url = $self->resolve_pod_link_by_table($to, $section);
	10					20
643	10	50				16	if( $complete_url ) {
644	0					0	DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
645							$complete_url, "\n (Returning that.)\n";
646	0					0	return $complete_url;
647							} else {
648	10					13	DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
649							" didn't return anything interesting.\n";
650							}
651							}
652
653	10	100	66			21	if(defined $to and length $to) {
654							# Give this routine first hack again
655	7					30	my $there = $self->resolve_pod_link_by_table($to);
656	7	50	33			15	if(defined $there and length $there) {
657	0					0	DEBUG > 1
658							and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
659							} else {
660	7					17	$there =
661							$self->resolve_pod_page_link($to, $section);
662							# (I pass it the section value, but I don't see a
663							# particular reason it'd use it.)
664	7					8	DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there \|\| "(nil)", "\n";
665	7	50	33			23	unless( defined $there and length $there ) {
666	0					0	DEBUG and print STDERR "Can't resolve $to\n";
667	0					0	return undef;
668							}
669							# resolve_pod_page_link returning undef is how it
670							# can signal that it gives up on making a link
671							}
672	7					12	$to = $there;
673							}
674
675							#DEBUG and print STDERR "So far [", $to\|\|'nil', "] [", $section\|\|'nil', "]\n";
676
677	10	100	66			25	my $out = (defined $to and length $to) ? $to : '';
678	10	100	66			27	$out .= "#" . $section if defined $section and length $section;
679
680	10	50				15	unless(length $out) { # sanity check
681	0					0	DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
682							$to \|\| "(nil)", $section \|\| "(nil)";
683	0					0	return undef;
684							}
685
686	10					9	DEBUG and print STDERR "Resolved to $out\n";
687	10					20	return $out;
688							}
689
690
691							# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
692
693							sub section_escape {
694	32			32	0	40	my($self, $section) = @_;
695	32					57	return $self->section_url_escape(
696							$self->section_name_tidy($section)
697							);
698							}
699
700							sub section_name_tidy {
701	69			69	0	83	my($self, $section) = @_;
702	69					131	$section =~ s/^\s+//;
703	69					115	$section =~ s/\s+$//;
704	69					96	$section =~ tr/ /_/;
705	69	50				221	if ("$]" >= 5.006) {
706	69					94	$section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
707							} elsif ('A' eq chr(65)) { # But not on early EBCDIC
708	0					0	$section =~ tr/\x00-\x1F\x80-\x9F//d;
709							}
710	69					112	$section = $self->unicode_escape_url($section);
711	69	50				111	$section = '_' unless length $section;
712	69					132	return $section;
713							}
714
715	32			32	0	57	sub section_url_escape { shift->general_url_escape(@_) }
716	7			7	0	12	sub pagepath_url_escape { shift->general_url_escape(@_) }
717	3			3	0	6	sub manpage_url_escape { shift->general_url_escape(@_) }
718
719							sub general_url_escape {
720	42			42	0	49	my($self, $string) = @_;
721
722	42					51	$string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
	0					0
723							# express Unicode things as urlencode(utf(orig)).
724
725							# A pretty conservative escaping, behoovey even for query components
726							# of a URL (see RFC 2396)
727
728	42	50				91	if ("$]" >= 5.007_003) {
729	42					60	$string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
	8					31
730							} else { # Is broken for non-ASCII platforms on early perls
731	0					0	$string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
	0					0
732							}
733							# Yes, stipulate the list without a range, so that this can work right on
734							# all charsets that this module happens to run under.
735
736	42					72	return $string;
737							}
738
739							#--------------------------------------------------------------------------
740							#
741							# Oh look, a yawning portal to Hell! Let's play touch football right by it!
742							#
743
744							sub resolve_pod_page_link {
745							# resolve_pod_page_link must return a properly escaped URL
746	7			7	0	9	my $self = shift;
747	7	50				22	return $self->batch_mode()
748							? $self->resolve_pod_page_link_batch_mode(@_)
749							: $self->resolve_pod_page_link_singleton_mode(@_)
750							;
751							}
752
753							sub resolve_pod_page_link_singleton_mode {
754	7			7	0	10	my($self, $it) = @_;
755	7	50	33			16	return undef unless defined $it and length $it;
756	7					15	my $url = $self->pagepath_url_escape($it);
757
758	7					10	$url =~ s{::$}{}s; # probably never comes up anyway
759	7	50				14	$url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
760
761	7	50				14	return undef unless length $url;
762	7					17	return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
763							}
764
765							sub resolve_pod_page_link_batch_mode {
766	0			0	0	0	my($self, $to) = @_;
767	0					0	DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
768	0					0	my @path = grep length($_), split m/::/s, $to, -1;
769	0	0				0	unless( @path ) { # sanity
770	0					0	DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n";
771	0					0	return undef;
772							}
773	0					0	$self->batch_mode_rectify_path(\@path);
774	0					0	my $out = join('/', map $self->pagepath_url_escape($_), @path)
775							. $HTML_EXTENSION;
776	0					0	DEBUG > 1 and print STDERR " => $out\n";
777	0					0	return $out;
778							}
779
780							sub batch_mode_rectify_path {
781	0			0	0	0	my($self, $pathbits) = @_;
782	0					0	my $level = $self->batch_mode_current_level;
783	0					0	$level--; # how many levels up to go to get to the root
784	0	0				0	if($level < 1) {
785	0					0	unshift @$pathbits, '.'; # just to be pretty
786							} else {
787	0					0	unshift @$pathbits, ('..') x $level;
788							}
789	0					0	return;
790							}
791
792							sub resolve_man_page_link {
793	3			3	0	7	my ($self, $to, $frag) = @_;
794	3					4	my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
795
796	3	50	33			11	return undef unless defined $page and length $page;
797	3		50			6	$section \|\|= 1;
798
799	3	100				7	return $self->man_url_prefix . "$section/"
800							. $self->manpage_url_escape($page) . ".$section"
801							. $self->man_url_postfix
802							. (defined $frag ? '#' . $frag : '');
803							}
804
805							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
806
807							sub resolve_pod_link_by_table {
808							# A crazy hack to allow specifying custom L => URL mappings
809
810	17	50		17	0	33	return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
811
812	0					0	my($self, $to, $section) = @_;
813
814							# TODO: add a method that actually populates podhtml_LOT from a file?
815
816	0	0				0	if(defined $section) {
817	0	0	0			0	$to = '' unless defined $to and length $to;
818	0					0	return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
819							} else {
820	0					0	return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
821							}
822	0					0	return;
823							}
824
825							###########################################################################
826
827							sub linearize_tokens { # self, tokens
828	37			37	0	37	my $self = shift;
829	37					43	my $out = '';
830
831	37					33	my $t;
832	37					79	while($t = shift @_) {
833	79	50	33			281	if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
		100	66
		100
834	0					0	$out .= $t; # a string, or some insane thing
835							} elsif($t->is_text) {
836	39					61	$out .= $t->text;
837							} elsif($t->is_start and $t->tag eq 'X') {
838							# Ignore until the end of this X<...> sequence:
839	3					4	my $x_open = 1;
840	3					4	while($x_open) {
841	6	100				11	next if( ($t = shift @_)->is_text );
842	3	50	33			4	if( $t->is_start and $t->tag eq 'X') { ++$x_open }
	0	50	33			0
843	3					7	elsif($t->is_end and $t->tag eq 'X') { --$x_open }
844							}
845							}
846							}
847	37	50				89	return undef if length $out > $Linearization_Limit;
848	37					59	return $out;
849							}
850
851							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
852
853							sub unicode_escape_url {
854	69			69	0	90	my($self, $string) = @_;
855	69					83	$string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
	0					0
856							# Turn char 1234 into "(1234)"
857	69					95	return $string;
858							}
859
860							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
861							sub esc { # a function.
862	464	100		464	0	686	if(defined wantarray) {
863	326	100				345	if(wantarray) {
864	275					547	@_ = splice @_; # break aliasing
865							} else {
866	51					71	my $x = shift;
867	51	50				142	if ("$]" >= 5.007_003) {
868	51					73	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
	0					0
869							} else { # Is broken for non-ASCII platforms on early perls
870	0					0	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
	0					0
871							}
872	51					114	return $x;
873							}
874							}
875	413					506	foreach my $x (@_) {
876							# Escape things very cautiously:
877	1101	50				1254	if (defined $x) {
878	1101	50				1815	if ("$]" >= 5.007_003) {
879	1101					1505	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
	52					133
880							} else { # Is broken for non-ASCII platforms on early perls
881	0					0	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
	0					0
882							}
883							}
884							# Leave out "- so that "--" won't make it thru in X-generated comments
885							# with text in them.
886
887							# Yes, stipulate the list without a range, so that this can work right on
888							# all charsets that this module happens to run under.
889							}
890	413					894	return @_;
891							}
892
893							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
894
895							1;
896							__END__

File Coverage

", # And also stick in an

", # ''

", # ''

", # ''

", # ''

", # ''