File Coverage

blib/lib/Pod/Simple/XHTML.pm
Criterion Covered Total %
statement 322 326 98.7
branch 100 122 81.9
condition 37 52 71.1
subroutine 70 71 98.5
pod 9 65 13.8
total 538 636 84.5


line stmt bran cond sub pod time code
1             =pod
2              
3             =head1 NAME
4              
5             Pod::Simple::XHTML -- format Pod as validating XHTML
6              
7             =head1 SYNOPSIS
8              
9             use Pod::Simple::XHTML;
10              
11             my $parser = Pod::Simple::XHTML->new();
12              
13             ...
14              
15             $parser->parse_file('path/to/file.pod');
16              
17             =head1 DESCRIPTION
18              
19             This class is a formatter that takes Pod and renders it as XHTML
20             validating HTML.
21              
22             This is a subclass of L and inherits all its
23             methods. The implementation is entirely different than
24             L, but it largely preserves the same interface.
25              
26             =head2 Minimal code
27              
28             use Pod::Simple::XHTML;
29             my $psx = Pod::Simple::XHTML->new;
30             $psx->output_string(\my $html);
31             $psx->parse_file('path/to/Module/Name.pm');
32             open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n";
33             print $out $html;
34              
35             You can also control the character encoding and entities. For example, if
36             you're sure that the POD is properly encoded (using the C<=encoding> command),
37             you can prevent high-bit characters from being encoded as HTML entities and
38             declare the output character set as UTF-8 before parsing, like so:
39              
40             $psx->html_charset('UTF-8');
41             use warnings;
42             $psx->html_encode_chars(q{&<>'"});
43              
44             =cut
45              
46             package Pod::Simple::XHTML;
47 13     13   2119231 use strict;
  13         21  
  13         573  
48             our $VERSION = '3.48';
49 13     13   4578 use Pod::Simple::Methody ();
  13         38  
  13         589  
50             our @ISA = ('Pod::Simple::Methody');
51              
52             our $HAS_HTML_ENTITIES;
53             BEGIN {
54 13     13   692 $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
55             }
56              
57             my %entities = (
58             q{>} => 'gt',
59             q{<} => 'lt',
60             q{'} => '#39',
61             q{"} => 'quot',
62             q{&} => 'amp',
63             );
64              
65             sub encode_entities {
66 581     581 0 754 my $self = shift;
67 581         1074 my $ents = $self->html_encode_chars;
68 581 100       1294 return HTML::Entities::encode_entities( $_[0], $ents ) if $HAS_HTML_ENTITIES;
69 73 100       93 if (defined $ents) {
70 1         3 $ents =~ s,(?
71 1         2 $ents =~ s,(?
72             } else {
73             # the same set of characters as in HTML::Entities
74 72         84 $ents = '^\n\r\t !\#\$%\(-;=?-~';
75             }
76 73         79 my $str = $_[0];
77 73   66     423 $str =~ s/([$ents])/'&' . ($entities{$1} || sprintf '#x%X', unpack('U', $1)) . ';'/ge;
  60         274  
78 73         149 return $str;
79             }
80              
81             my %entity_to_char = reverse %entities;
82             my ($entity_re) = map qr{$_}, join '|', map quotemeta, sort keys %entity_to_char;
83              
84             sub decode_entities {
85 5     5 0 8 my ($self, $string) = @_;
86 5 50       47 return HTML::Entities::decode_entities( $string ) if $HAS_HTML_ENTITIES;
87              
88 0         0 $string =~ s{&(?:($entity_re)|#x([0123456789abcdefABCDEF]+)|#([0123456789]+));}{
89 0 0       0 defined $1 ? $entity_to_char{$1}
    0          
    0          
90             : defined $2 ? chr(hex($2))
91             : defined $3 ? chr($3)
92             : die;
93             }ge;
94              
95 0         0 return $string;
96             }
97              
98             sub HAVE_UTF8_ENCODE ();
99             BEGIN {
100 13 50   13   51922 *HAVE_UTF8_ENCODE = defined &utf8::encode ? sub () { 1 } : sub () { 0 };
101             }
102             my %percent_enc = map +( chr($_) => sprintf('%%%02X', $_) ), 0 .. 255;
103              
104             sub encode_url {
105 96     96 0 3473 my ($self, $string) = @_;
106              
107 96         187 utf8::encode($string) if HAVE_UTF8_ENCODE;
108 96         249 $string =~ s{([^-_.!~*()a-zA-Z0-9])}{$percent_enc{$1}}g;
109              
110 96         184 return $string;
111             }
112              
113             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
114              
115             =head1 METHODS
116              
117             Pod::Simple::XHTML offers a number of methods that modify the format of
118             the HTML output. Call these after creating the parser object, but before
119             the call to C:
120              
121             my $parser = Pod::PseudoPod::HTML->new();
122             $parser->set_optional_param("value");
123             $parser->parse_file($file);
124              
125             =head2 perldoc_url_prefix
126              
127             In turning LEFoo::BarE into http://whatever/Foo%3a%3aBar, what
128             to put before the "Foo%3a%3aBar". The default value is
129             "https://metacpan.org/pod/".
130              
131             =head2 perldoc_url_postfix
132              
133             What to put after "Foo%3a%3aBar" in the URL. This option is not set by
134             default.
135              
136             =head2 man_url_prefix
137              
138             In turning C<< LEcrontab(5)E >> into http://whatever/man/1/crontab.1, what
139             to put before the "1/crontab.1". The default value is
140             "https://man7.org/linux/man-pages/man".
141              
142             =head2 man_url_postfix
143              
144             What to put after "1/crontab.1" in the URL.
145             This option is set to ".html" by default.
146              
147             =head2 title_prefix, title_postfix
148              
149             What to put before and after the title in the head. The values should
150             already be &-escaped.
151              
152             =head2 html_css
153              
154             $parser->html_css('path/to/style.css');
155              
156             The URL or relative path of a CSS file to include. This option is not
157             set by default.
158              
159             =head2 html_javascript
160              
161             The URL or relative path of a JavaScript file to pull in. This option is
162             not set by default.
163              
164             =head2 html_doctype
165              
166             A document type tag for the file. This option is not set by default.
167              
168             =head2 html_charset
169              
170             The character set to declare in the Content-Type meta tag created by default
171             for C. Note that this option will be ignored if the value of
172             C is changed. Defaults to "ISO-8859-1".
173              
174             =head2 html_header_tags
175              
176             Additional arbitrary HTML tags for the header of the document. The
177             default value is just a content type header tag:
178              
179            
180              
181             Add additional meta tags here, or blocks of inline CSS or JavaScript
182             (wrapped in the appropriate tags).
183              
184             =head3 html_encode_chars
185              
186             A string containing all characters that should be encoded as HTML entities,
187             specified using the regular expression character class syntax (what you find
188             within brackets in regular expressions). This value will be passed as the
189             second argument to the C function of L. If
190             L is not installed, then any characters other than C<&<>"'>
191             will be encoded numerically.
192              
193             =head2 html_h_level
194              
195             This is the level of HTML "Hn" element to which a Pod "head1" corresponds. For
196             example, if C is set to 2, a head1 will produce an H2, a head2
197             will produce an H3, and so on.
198              
199             =head2 default_title
200              
201             Set a default title for the page if no title can be determined from the
202             content. The value of this string should already be &-escaped.
203              
204             =head2 force_title
205              
206             Force a title for the page (don't try to determine it from the content).
207             The value of this string should already be &-escaped.
208              
209             =head2 html_header, html_footer
210              
211             Set the HTML output at the beginning and end of each file. The default
212             header includes a title, a doctype tag (if C is set), a
213             content tag (customized by C), a tag for a CSS file
214             (if C is set), and a tag for a Javascript file (if
215             C is set). The default footer simply closes the C
216             and C tags.
217              
218             The options listed above customize parts of the default header, but
219             setting C or C completely overrides the
220             built-in header or footer. These may be useful if you want to use
221             template tags instead of literal HTML headers and footers or are
222             integrating converted POD pages in a larger website.
223              
224             If you want no headers or footers output in the HTML, set these options
225             to the empty string.
226              
227             =head2 index
228              
229             Whether to add a table-of-contents at the top of each page (called an
230             index for the sake of tradition).
231              
232             =head2 anchor_items
233              
234             Whether to anchor every definition C<=item> directive. This needs to be
235             enabled if you want to be able to link to specific C<=item> directives, which
236             are output as C<<
>> elements. Disabled by default.
237              
238             =head2 backlink
239              
240             Whether to turn every =head1 directive into a link pointing to the top
241             of the page (specifically, the opening body tag).
242              
243             =cut
244              
245             __PACKAGE__->_accessorize(
246             'perldoc_url_prefix',
247             'perldoc_url_postfix',
248             'man_url_prefix',
249             'man_url_postfix',
250             'title_prefix', 'title_postfix',
251             'html_css',
252             'html_javascript',
253             'html_doctype',
254             'html_charset',
255             'html_encode_chars',
256             'html_h_level',
257             'title', # Used internally for the title extracted from the content
258             'default_title',
259             'force_title',
260             'html_header',
261             'html_footer',
262             'index',
263             'anchor_items',
264             'backlink',
265             'batch_mode', # whether we're in batch mode
266             'batch_mode_current_level',
267             # When in batch mode, how deep the current module is: 1 for "LWP",
268             # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
269             );
270              
271             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
272              
273             =head1 SUBCLASSING
274              
275             If the standard options aren't enough, you may want to subclass
276             Pod::Simple::XHMTL. These are the most likely candidates for methods
277             you'll want to override when subclassing.
278              
279             =cut
280              
281             sub new {
282 147     147 1 600544 my $self = shift;
283 147         596 my $new = $self->SUPER::new(@_);
284 147   50     782 $new->{'output_fh'} ||= *STDOUT{IO};
285 147         403 $new->perldoc_url_prefix('https://metacpan.org/pod/');
286 147         410 $new->man_url_prefix('https://man7.org/linux/man-pages/man');
287 147         332 $new->man_url_postfix('.html');
288 147         363 $new->html_charset('ISO-8859-1');
289 147         550 $new->nix_X_codes(1);
290 147         311 $new->{'scratch'} = '';
291 147         268 $new->{'to_index'} = [];
292 147         290 $new->{'output'} = [];
293 147         244 $new->{'saved'} = [];
294 147         322 $new->{'ids'} = { '_podtop_' => 1 }; # used in
295 147         195 $new->{'in_li'} = [];
296              
297 147         241 $new->{'__region_targets'} = [];
298 147         243 $new->{'__literal_targets'} = {};
299 147         342 $new->accept_targets_as_html( 'html', 'HTML' );
300              
301 147         1344 return $new;
302             }
303              
304             sub html_header_tags {
305 20     20 1 24 my $self = shift;
306 20 50       60 return $self->{html_header_tags} = shift if @_;
307             return $self->{html_header_tags}
308 20   33     92 ||= ' 309             . $self->html_charset . '" />';
310             }
311              
312             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
313              
314             =head2 handle_text
315              
316             This method handles the body of text within any element: it's the body
317             of a paragraph, or everything between a "=begin" tag and the
318             corresponding "=end" tag, or the text within an L entity, etc. You would
319             want to override this if you are adding a custom element type that does
320             more than just display formatted text. Perhaps adding a way to generate
321             HTML tables from an extended version of POD.
322              
323             So, let's say you want to add a custom element called 'foo'. In your
324             subclass's C method, after calling C you'd call:
325              
326             $new->accept_targets_as_text( 'foo' );
327              
328             Then override the C method in the subclass to check for when
329             "$flags->{'target'}" is equal to 'foo' and set a flag that marks that
330             you're in a foo block (maybe "$self->{'in_foo'} = 1"). Then override the
331             C method to check for the flag, and pass $text to your
332             custom subroutine to construct the HTML output for 'foo' elements,
333             something like:
334              
335             sub handle_text {
336             my ($self, $text) = @_;
337             if ($self->{'in_foo'}) {
338             $self->{'scratch'} .= build_foo_html($text);
339             return;
340             }
341             $self->SUPER::handle_text($text);
342             }
343              
344             =head2 handle_code
345              
346             This method handles the body of text that is marked up to be code.
347             You might for instance override this to plug in a syntax highlighter.
348             The base implementation just escapes the text.
349              
350             The callback methods C and C emits the C tags
351             before and after C is invoked, so you might want to override these
352             together with C if this wrapping isn't suitable.
353              
354             Note that the code might be broken into multiple segments if there are
355             nested formatting codes inside a C<< CE...> >> sequence. In between the
356             calls to C other markup tags might have been emitted in that
357             case. The same is true for verbatim sections if the C
358             option is turned on.
359              
360             =head2 accept_targets_as_html
361              
362             This method behaves like C, but also marks the region
363             as one whose content should be emitted literally, without HTML entity escaping
364             or wrapping in a C
element.
365              
366             =cut
367              
368             sub __in_literal_xhtml_region {
369 333 100   333   357 return unless @{ $_[0]{__region_targets} };
  333         794  
370 30         38 my $target = $_[0]{__region_targets}[-1];
371 30         57 return $_[0]{__literal_targets}{ $target };
372             }
373              
374             sub accept_targets_as_html {
375 147     147 1 314 my ($self, @targets) = @_;
376 147         659 $self->accept_targets(@targets);
377 147         523 $self->{__literal_targets}{$_} = 1 for @targets;
378             }
379              
380             sub handle_text {
381             # escape special characters in HTML (<, >, &, etc)
382 312     312 1 378 my $text = $_[1];
383 312         313 my $html;
384 312 100       509 if ($_[0]->__in_literal_xhtml_region) {
385 5         6 $html = $text;
386 5         22 $text =~ s{<[^>]+?>}{}g;
387 5         11 $text = $_[0]->decode_entities($text);
388             }
389             else {
390 307         562 $html = $_[0]->encode_entities($text);
391             }
392              
393 312 100 100     5189 if ($_[0]{'in_code'} && @{$_[0]{'in_code'}}) {
  55         111  
394             # Intentionally use the raw text in $_[1], even if we're not in a
395             # literal xhtml region, since handle_code calls encode_entities.
396 43         92 $_[0]->handle_code( $_[1], $_[0]{'in_code'}[-1] );
397             } else {
398 269 100       391 if ($_[0]->{in_for}) {
399 9 100       22 my $newlines = $_[0]->__in_literal_xhtml_region ? "\n\n" : '';
400 9 100       16 if ($_[0]->{started_for}) {
401 6 50       42 if ($html =~ /\S/) {
402 6         11 delete $_[0]->{started_for};
403 6         17 $_[0]{'scratch'} .= $html . $newlines;
404             }
405             # Otherwise, append nothing until we have something to append.
406             } else {
407             # The parser sometimes preserves newlines and sometimes doesn't!
408 3         6 $html =~ s/\n\z//;
409 3         6 $_[0]{'scratch'} .= $html . $newlines;
410             }
411             } else {
412             # Just plain text.
413 260         451 $_[0]{'scratch'} .= $html;
414             }
415             }
416              
417 312 100       1120 $_[0]{hhtml} .= $html if $_[0]{'in_head'};
418 312 100       499 $_[0]{htext} .= $text if $_[0]{'in_head'};
419 312 100       908 $_[0]{itext} .= $text if $_[0]{'in_item_text'};
420             }
421              
422             sub start_code {
423 32     32 0 65 $_[0]{'scratch'} .= '';
424             }
425              
426             sub end_code {
427 32     32 0 55 $_[0]{'scratch'} .= '';
428             }
429              
430             sub handle_code {
431 43     43 1 97 $_[0]{'scratch'} .= $_[0]->encode_entities( $_[1] );
432             }
433              
434             sub start_Para {
435 93     93 0 204 $_[0]{'scratch'} .= '

';

436             }
437              
438             sub start_Verbatim {
439 26     26 0 44 $_[0]{'scratch'} = '
'; 
440 26         33 push(@{$_[0]{'in_code'}}, 'Verbatim');
  26         74  
441 26         74 $_[0]->start_code($_[0]{'in_code'}[-1]);
442             }
443              
444 66     66 0 117 sub start_head1 { $_[0]{'in_head'} = 1; $_[0]{htext} = $_[0]{hhtml} = ''; }
  66         179  
445 15     15 0 26 sub start_head2 { $_[0]{'in_head'} = 2; $_[0]{htext} = $_[0]{hhtml} = ''; }
  15         36  
446 8     8 0 23 sub start_head3 { $_[0]{'in_head'} = 3; $_[0]{htext} = $_[0]{hhtml} = ''; }
  8         20  
447 9     9 0 14 sub start_head4 { $_[0]{'in_head'} = 4; $_[0]{htext} = $_[0]{hhtml} = ''; }
  9         19  
448 2     2 0 4 sub start_head5 { $_[0]{'in_head'} = 5; $_[0]{htext} = $_[0]{hhtml} = ''; }
  2         6  
449 2     2 0 4 sub start_head6 { $_[0]{'in_head'} = 6; $_[0]{htext} = $_[0]{hhtml} = ''; }
  2         6  
450              
451             sub start_item_number {
452 8 100 66 8 0 20 $_[0]{'scratch'} = "\n" if ($_[0]{'in_li'}->[-1] && pop @{$_[0]{'in_li'}});
  3         10  
453 8         10 $_[0]{'scratch'} .= '
  • ';

  • 454 8         10 push @{$_[0]{'in_li'}}, 1;
      8         17  
    455             }
    456              
    457             sub start_item_bullet {
    458 12 100 66 12 0 30 $_[0]{'scratch'} = "\n" if ($_[0]{'in_li'}->[-1] && pop @{$_[0]{'in_li'}});
      5         18  
    459 12         18 $_[0]{'scratch'} .= '
  • ';

  • 460 12         13 push @{$_[0]{'in_li'}}, 1;
      12         24  
    461             }
    462              
    463             sub start_item_text {
    464 20     20 0 34 $_[0]{'in_item_text'} = 1; $_[0]{itext} = '';
      20         33  
    465             # see end_item_text
    466             }
    467              
    468 7     7 0 14 sub start_over_bullet { $_[0]{'scratch'} = '
      '; push @{$_[0]{'in_li'}}, 0; $_[0]->emit }
      7         8  
      7         15  
      7         27  
    469 1     1 0 3 sub start_over_block { $_[0]{'scratch'} = '
    '; $_[0]->emit }
      1         4  
    470 5     5 0 9 sub start_over_number { $_[0]{'scratch'} = '
      '; push @{$_[0]{'in_li'}}, 0; $_[0]->emit }
      5         6  
      5         12  
      5         9  
    471             sub start_over_text {
    472 14     14 0 24 $_[0]{'scratch'} = '
    ';
    473 14         22 $_[0]{'dl_level'}++;
    474 14   100     52 $_[0]{'in_dd'} ||= [];
    475 14         27 $_[0]->emit
    476             }
    477              
    478 1     1 0 2 sub end_over_block { $_[0]{'scratch'} .= ''; $_[0]->emit }
      1         3  
    479              
    480             sub end_over_number {
    481 5 50   5 0 7 $_[0]{'scratch'} = "\n" if ( pop @{$_[0]{'in_li'}} );
      5         16  
    482 5         8 $_[0]{'scratch'} .= '';
    483 5         6 pop @{$_[0]{'in_li'}};
      5         7  
    484 5         10 $_[0]->emit;
    485             }
    486              
    487             sub end_over_bullet {
    488 7 50   7 0 9 $_[0]{'scratch'} = "\n" if ( pop @{$_[0]{'in_li'}} );
      7         27  
    489 7         12 $_[0]{'scratch'} .= '';
    490 7         9 pop @{$_[0]{'in_li'}};
      7         10  
    491 7         10 $_[0]->emit;
    492             }
    493              
    494             sub end_over_text {
    495 14 50   14 0 62 if ($_[0]{'in_dd'}[ $_[0]{'dl_level'} ]) {
    496 14         22 $_[0]{'scratch'} = "\n";
    497 14         23 $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 0;
    498             }
    499 14         32 $_[0]{'scratch'} .= '';
    500 14         20 $_[0]{'dl_level'}--;
    501 14         26 $_[0]->emit;
    502             }
    503              
    504             # . . . . . Now the actual formatters:
    505              
    506 93     93 0 154 sub end_Para { $_[0]{'scratch'} .= '

    '; $_[0]->emit }
      93         163  
    507             sub end_Verbatim {
    508 26     26 0 33 $_[0]->end_code(pop(@{$_[0]->{'in_code'}}));
      26         82  
    509 26         66 $_[0]{'scratch'} .= '';
    510 26         59 $_[0]->emit;
    511             }
    512              
    513             sub _end_head {
    514 102     102   176 my $h = delete $_[0]{in_head};
    515              
    516 102         213 my $add = $_[0]->html_h_level;
    517 102 100       169 $add = 1 unless defined $add;
    518 102         160 $h += $add - 1;
    519              
    520 102         232 my $id = $_[0]->idify(delete $_[0]{htext});
    521 102         270 my $text = $_[0]{scratch};
    522 102         232 my $head = qq{$text};
    523 102 100 100     1520 $_[0]{'scratch'} = $_[0]->backlink && ($h - $add == 0)
    524             # backlinks enabled && =head1
    525             ? qq{$head}
    526             : $head;
    527 102         224 $_[0]->emit;
    528 102         93 push @{ $_[0]{'to_index'} }, [$h, $id, delete $_[0]{'hhtml'}];
      102         514  
    529             }
    530              
    531 66     66 0 153 sub end_head1 { shift->_end_head(@_); }
    532 15     15 0 54 sub end_head2 { shift->_end_head(@_); }
    533 8     8 0 17 sub end_head3 { shift->_end_head(@_); }
    534 9     9 0 18 sub end_head4 { shift->_end_head(@_); }
    535 2     2 0 5 sub end_head5 { shift->_end_head(@_); }
    536 2     2 0 8 sub end_head6 { shift->_end_head(@_); }
    537              
    538 12     12 0 19 sub end_item_bullet { $_[0]{'scratch'} .= '

    '; $_[0]->emit }
      12         21  
    539 8     8 0 15 sub end_item_number { $_[0]{'scratch'} .= '

    '; $_[0]->emit }
      8         11  
    540              
    541             sub end_item_text {
    542             # idify and anchor =item content if wanted
    543             my $dt_id = $_[0]{'anchor_items'}
    544 20 100   20 0 52 ? ' id="'. $_[0]->encode_entities($_[0]->idify($_[0]{'itext'})) .'"'
    545             : '';
    546              
    547             # reset scratch
    548 20         53 my $text = $_[0]{scratch};
    549 20         28 $_[0]{'scratch'} = '';
    550              
    551 20 100       47 if ($_[0]{'in_dd'}[ $_[0]{'dl_level'} ]) {
    552 6         10 $_[0]{'scratch'} = "\n";
    553 6         11 $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 0;
    554             }
    555              
    556 20         34 $_[0]{'scratch'} .= qq{$text\n
    };
    557 20         29 $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 1;
    558 20         34 $_[0]->emit;
    559             }
    560              
    561             # This handles =begin and =for blocks of all kinds.
    562             sub start_for {
    563 6     6 0 11 my ($self, $flags) = @_;
    564              
    565 6         9 push @{ $self->{__region_targets} }, $flags->{target_matching};
      6         15  
    566 6         9 $self->{started_for} = 1;
    567 6         9 $self->{in_for} = 1;
    568              
    569 6 100       14 unless ($self->__in_literal_xhtml_region) {
    570 4         11 $self->{scratch} .= '
    571 4 50       9 $self->{scratch} .= qq( class="$flags->{target}") if $flags->{target};
    572 4         9 $self->{scratch} .= ">\n\n";
    573             }
    574             }
    575              
    576             sub end_for {
    577 6     6 0 11 my ($self) = @_;
    578 6         10 delete $self->{started_for};
    579 6         9 delete $self->{in_for};
    580              
    581 6 100       13 if ($self->__in_literal_xhtml_region) {
    582             # Remove trailine newlines.
    583 2         14 $self->{'scratch'} =~ s/\s+\z//s;
    584             } else {
    585 4         20 $self->{'scratch'} .= '';
    586             }
    587              
    588 6         6 pop @{ $self->{__region_targets} };
      6         11  
    589 6         13 $self->emit;
    590             }
    591              
    592             sub start_Document {
    593 142     142 0 246 my ($self) = @_;
    594 142 100       313 if (defined $self->html_header) {
    595 122         242 $self->{'scratch'} .= $self->html_header;
    596 122 100       231 $self->emit unless $self->html_header eq "";
    597             } else {
    598 20         30 my ($doctype, $title, $metatags, $bodyid);
    599 20   50     45 $doctype = $self->html_doctype || '';
    600 20   50     45 $title = $self->force_title || $self->title || $self->default_title || '';
    601 20   50     61 $metatags = $self->html_header_tags || '';
    602 20 100       47 if (my $css = $self->html_css) {
    603 3 100       9 if ($css !~ /
    604             # this is required to be compatible with Pod::Simple::BatchHTML
    605 2         5 $metatags .= ' 606             . $self->encode_entities($css) . '" type="text/css" />';
    607             } else {
    608 1         2 $metatags .= $css;
    609             }
    610             }
    611 20 50       74 if ($self->html_javascript) {
    612 0         0 $metatags .= qq{\n';
    614             }
    615 20 100       41 $bodyid = $self->backlink ? ' id="_podtop_"' : '';
    616 20         63 $self->{'scratch'} .= <<"HTML";
    617             $doctype
    618            
    619            
    620             $title
    621             $metatags
    622            
    623            
    624             HTML
    625 20         36 $self->emit;
    626             }
    627             }
    628              
    629             sub build_index {
    630 33     33 0 42 my ($self, $to_index) = @_;
    631              
    632 33         29 my @out;
    633 33         32 my $level = 0;
    634 33         32 my $indent = -1;
    635 33         33 my $space = '';
    636 33         34 my $id = ' id="index"';
    637              
    638 33         28 for my $h (@{ $to_index }, [0]) {
      33         63  
    639 102         123 my $target_level = $h->[0];
    640             # Get to target_level by opening or closing ULs
    641 102 100       160 if ($level == $target_level) {
        100          
    642 9         14 $out[-1] .= '';
    643             } elsif ($level > $target_level) {
    644 39 50       169 $out[-1] .= '' if $out[-1] =~ /^\s+
  • /;
  • 645 39         62 while ($level > $target_level) {
    646 68         86 --$level;
    647 68 100 66     193 push @out, (' ' x --$indent) . '' if @out && $out[-1] =~ m{^\s+<\/ul};
    648 68         137 push @out, (' ' x --$indent) . '';
    649             }
    650 39 100       62 push @out, (' ' x --$indent) . '' if $level;
    651             } else {
    652 54         86 while ($level < $target_level) {
    653 68         60 ++$level;
    654 68 100 100     180 push @out, (' ' x ++$indent) . '
  • ' if @out && $out[-1]=~ /^\s*
  • 655 68         132 push @out, (' ' x ++$indent) . "";
    656 68         99 $id = '';
    657             }
    658 54         50 ++$indent;
    659             }
    660              
    661 102 100       152 next unless $level;
    662 69         86 $space = ' ' x $indent;
    663 69         117 my $fragment = $self->encode_entities($self->encode_url($h->[1]));
    664 69         900 push @out, sprintf '%s
  • %s',
  • 665             $space, $fragment, $h->[2];
    666             }
    667              
    668 33         150 return join "\n", @out;
    669             }
    670              
    671             sub end_Document {
    672 142     142 0 202 my ($self) = @_;
    673 142         186 my $to_index = $self->{'to_index'};
    674 142 100 100     237 if ($self->index && @{ $to_index } ) {
      40         71  
    675 33         60 my $index = $self->build_index($to_index);
    676              
    677             # Splice the index in between the HTML headers and the first element.
    678 33 50       66 my $offset = defined $self->html_header ? $self->html_header eq '' ? 0 : 1 : 1;
        100          
    679 33         40 splice @{ $self->{'output'} }, $offset, 0, $index;
      33         139  
    680             }
    681              
    682 142 100       256 if (defined $self->html_footer) {
    683 122         224 $self->{'scratch'} .= $self->html_footer;
    684 122 100       206 $self->emit unless $self->html_footer eq "";
    685             } else {
    686 20         47 $self->{'scratch'} .= "\n";
    687 20         42 $self->emit;
    688             }
    689              
    690 142 100       284 if ($self->index) {
    691 40         39 print {$self->{'output_fh'}} join ("\n\n", @{ $self->{'output'} }), "\n\n";
      40         65  
      40         186  
    692 40         70 @{$self->{'output'}} = ();
      40         108  
    693             }
    694              
    695             }
    696              
    697             # Handling code tags
    698 7     7 0 19 sub start_B { $_[0]{'scratch'} .= '' }
    699 7     7 0 17 sub end_B { $_[0]{'scratch'} .= '' }
    700              
    701 10     10 0 14 sub start_C { push(@{$_[0]{'in_code'}}, 'C'); $_[0]->start_code($_[0]{'in_code'}[-1]); }
      10         24  
      10         27  
    702 10     10 0 18 sub end_C { $_[0]->end_code(pop(@{$_[0]{'in_code'}})); }
      10         31  
    703              
    704 1     1 0 4 sub start_F { $_[0]{'scratch'} .= '' }
    705 1     1 0 3 sub end_F { $_[0]{'scratch'} .= '' }
    706              
    707 1     1 0 3 sub start_I { $_[0]{'scratch'} .= '' }
    708 1     1 0 2 sub end_I { $_[0]{'scratch'} .= '' }
    709              
    710 1     1 0 2 sub start_U { $_[0]{'scratch'} .= '' }
    711 1     1 0 2 sub end_U { $_[0]{'scratch'} .= '' }
    712              
    713             sub start_L {
    714 36     36 0 53 my ($self, $flags) = @_;
    715 36         45 my ($type, $to, $section) = @{$flags}{'type', 'to', 'section'};
      36         82  
    716 36 50       119 my $url = $self->encode_entities(
        100          
        100          
    717             $type eq 'url' ? $to
    718             : $type eq 'pod' ? $self->resolve_pod_page_link($to, $section)
    719             : $type eq 'man' ? $self->resolve_man_page_link($to, $section)
    720             : undef
    721             );
    722              
    723             # If it's an unknown type, use an attribute-less like HTML.pm.
    724 36 50       399 $self->{'scratch'} .= '' : '>');
    725             }
    726              
    727 36     36 0 71 sub end_L { $_[0]{'scratch'} .= '' }
    728              
    729 1     1 0 3 sub start_S { $_[0]{'scratch'} .= '' }
    730 1     1 0 2 sub end_S { $_[0]{'scratch'} .= '' }
    731              
    732             sub emit {
    733 363     363 0 498 my($self) = @_;
    734 363 100       668 if ($self->index) {
    735 135         142 push @{ $self->{'output'} }, $self->{'scratch'};
      135         274  
    736             } else {
    737 228         267 print {$self->{'output_fh'}} $self->{'scratch'}, "\n\n";
      228         767  
    738             }
    739 363         511 $self->{'scratch'} = '';
    740 363         690 return;
    741             }
    742              
    743             =head2 resolve_pod_page_link
    744              
    745             my $url = $pod->resolve_pod_page_link('Net::Ping', 'INSTALL');
    746             my $url = $pod->resolve_pod_page_link('perlpodspec');
    747             my $url = $pod->resolve_pod_page_link(undef, 'SYNOPSIS');
    748              
    749             Resolves a POD link target (typically a module or POD file name) and section
    750             name to a URL. The resulting link will be returned for the above examples as:
    751              
    752             https://metacpan.org/pod/Net::Ping#INSTALL
    753             https://metacpan.org/pod/perlpodspec
    754             #SYNOPSIS
    755              
    756             Note that when there is only a section argument the URL will simply be a link
    757             to a section in the current document.
    758              
    759             =cut
    760              
    761             sub resolve_pod_page_link {
    762 26     26 1 63 my ($self, $to, $section) = @_;
    763 26 50 66     73 return undef unless defined $to || defined $section;
    764 26 100       50 if (defined $section) {
    765 18         46 my $id = $self->idify($section, 1);
    766 18         62 $section = '#' . $self->encode_url($id);
    767 18 100       53 return $section unless defined $to;
    768             } else {
    769 8         10 $section = ''
    770             }
    771              
    772 13   50     43 return ($self->perldoc_url_prefix || '')
          50        
    773             . $to . $section
    774             . ($self->perldoc_url_postfix || '');
    775             }
    776              
    777             =head2 resolve_man_page_link
    778              
    779             my $url = $pod->resolve_man_page_link('crontab(5)', 'EXAMPLE CRON FILE');
    780             my $url = $pod->resolve_man_page_link('crontab');
    781              
    782             Resolves a man page link target and numeric section to a URL. The resulting
    783             link will be returned for the above examples as:
    784              
    785             https://man7.org/linux/man-pages/man5/crontab.5.html
    786             https://man7.org/linux/man-pages/man1/crontab.1.html
    787              
    788             Note that the first argument is required. The section number will be parsed
    789             from it, and if it's missing will default to 1. The second argument is
    790             currently ignored. Subclass to link to a different man page HTTP server.
    791              
    792             =cut
    793              
    794             sub resolve_man_page_link {
    795 6     6 1 15 my ($self, $to, $section) = @_;
    796 6 50       12 return undef unless defined $to;
    797              
    798 6         26 my ($page, $part) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
    799 6 50       13 return undef unless $page;
    800              
    801 6 100       14 if (defined $section) {
    802 2         7 $section =~ s/\s+/_/g;
    803 2         8 my $id = $self->idify($section, 1);
    804 2         6 $section = '#' . $self->encode_url($id);
    805             } else {
    806 4         5 $section = ''
    807             }
    808              
    809 6   50     15 return ($self->man_url_prefix || '')
          100        
          100        
          50        
    810             . ($part || 1) . "/" . $self->encode_entities($page)
    811             . "." . ($part || 1) . ($self->man_url_postfix || '')
    812             . $section;
    813             }
    814              
    815             =head2 idify
    816              
    817             my $id = $pod->idify($text);
    818             my $hash = $pod->idify($text, 1);
    819              
    820             This method turns an arbitrary string into a valid XHTML ID attribute value.
    821             The rules enforced, following
    822             L, are:
    823              
    824             =over
    825              
    826             =item *
    827              
    828             The id must start with a letter (a-z or A-Z)
    829              
    830             =item *
    831              
    832             All subsequent characters can be letters, numbers (0-9), hyphens (-),
    833             underscores (_), colons (:), and periods (.).
    834              
    835             =item *
    836              
    837             The final character can't be a hyphen, colon, or period. URLs ending with these
    838             characters, while allowed by XHTML, can be awkward to extract from plain text.
    839              
    840             =item *
    841              
    842             Each id must be unique within the document.
    843              
    844             =back
    845              
    846             In addition, the returned value will be unique within the context of the
    847             Pod::Simple::XHTML object unless a second argument is passed a true value. ID
    848             attributes should always be unique within a single XHTML document, but pass
    849             the true value if you are creating not an ID but a URL hash to point to
    850             an ID (i.e., if you need to put the "#foo" in C<< foo >>.
    851              
    852             =cut
    853              
    854             sub idify {
    855 122     122 1 3009 my ($self, $t, $not_unique) = @_;
    856 122         179 for ($t) {
    857 122         219 s/[<>&'"]//g; # Strip HTML special characters
    858 122         243 s/^\s+//; s/\s+$//; # Strip white space.
      122         251  
    859 122         245 s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
    860 122         154 s/^[^a-zA-Z]+//; # First char must be a letter.
    861 122         230 s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
    862 122         227 s/[-:.]+$//; # Strip trailing punctuation.
    863             }
    864 122 100       230 return $t if $not_unique;
    865 103         122 my $i = '';
    866 103         375 $i++ while $self->{ids}{"$t$i"}++;
    867 103         225 return "$t$i";
    868             }
    869              
    870             =head2 batch_mode_page_object_init
    871              
    872             $pod->batch_mode_page_object_init($batchconvobj, $module, $infile, $outfile, $depth);
    873              
    874             Called by L so that the class has a chance to
    875             initialize the converter. Internally it sets the C property to
    876             true and sets C, but Pod::Simple::XHTML does not
    877             currently use those features. Subclasses might, though.
    878              
    879             =cut
    880              
    881             sub batch_mode_page_object_init {
    882 1     1 1 5 my ($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
    883 1         4 $self->batch_mode(1);
    884 1         3 $self->batch_mode_current_level($depth);
    885 1         3 return $self;
    886             }
    887              
    888       0 0   sub html_header_after_title {
    889             }
    890              
    891              
    892             1;
    893              
    894             __END__