File Coverage

blib/lib/Pod/Markdown.pm
Criterion Covered Total %
statement 395 403 98.0
branch 124 134 92.5
condition 33 43 76.7
subroutine 99 101 98.0
pod 11 59 18.6
total 662 740 89.4


line stmt bran cond sub pod time code
1             # vim: set ts=2 sts=2 sw=2 expandtab smarttab:
2             #
3             # This file is part of Pod-Markdown
4             #
5             # This software is copyright (c) 2011 by Randy Stauner.
6             #
7             # This is free software; you can redistribute it and/or modify it under
8             # the same terms as the Perl 5 programming language system itself.
9             #
10 29     29   2759549 use 5.008;
  29         114  
11 29     29   154 use strict;
  29         83  
  29         987  
12 29     29   145 use warnings;
  29         63  
  29         3481  
13              
14             package Pod::Markdown;
15             # git description: v3.300-3-gb01c18d
16              
17             our $AUTHORITY = 'cpan:RWSTAUNER';
18             # ABSTRACT: Convert POD to Markdown
19             $Pod::Markdown::VERSION = '3.400';
20 29     29   19160 use Pod::Simple 3.27 (); # detected_encoding and keep_encoding bug fix
  29         1473089  
  29         1291  
21 29     29   15271 use parent qw(Pod::Simple::Methody);
  29         9380  
  29         199  
22 29     29   45411 use Encode ();
  29         524309  
  29         1234  
23 29     29   15265 use URI::Escape ();
  29         59880  
  29         7997  
24              
25             our %URL_PREFIXES = (
26             sco => 'http://search.cpan.org/perldoc?',
27             metacpan => 'https://metacpan.org/pod/',
28             man => 'http://man.he.net/man',
29             );
30             $URL_PREFIXES{perldoc} = $URL_PREFIXES{metacpan};
31              
32             our $LOCAL_MODULE_RE = qr/^(Local::|\w*?_\w*)/;
33              
34             ## no critic
35             #{
36             our $HAS_HTML_ENTITIES;
37              
38             # Stolen from Pod::Simple::XHTML 3.28. {{{
39              
40             BEGIN {
41 29     29   2369 $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
42             }
43              
44             my %entities = (
45             q{>} => 'gt',
46             q{<} => 'lt',
47             q{'} => '#39',
48             q{"} => 'quot',
49             q{&} => 'amp',
50             );
51              
52             sub encode_entities {
53 238     238 0 608 my $self = shift;
54 238         527 my $ents = $self->html_encode_chars;
55 238 100       898 return HTML::Entities::encode_entities( $_[0], $ents ) if $HAS_HTML_ENTITIES;
56 49 100       108 if (defined $ents) {
57 48         127 $ents =~ s,(?
58 48         241 $ents =~ s,(?
59             } else {
60 1         5 $ents = join '', keys %entities;
61             }
62 49         81 my $str = $_[0];
63 49   66     465 $str =~ s/([$ents])/'&' . ($entities{$1} || sprintf '#x%X', ord $1) . ';'/ge;
  23         174  
64 49         189 return $str;
65             }
66              
67             # }}}
68              
69             # Add a few very common ones for consistency and readability
70             # (in case HTML::Entities isn't available).
71             %entities = (
72             # Pod::Markdown has always required 5.8 so unicode_to_native will be available.
73             chr(utf8::unicode_to_native(0xA0)) => 'nbsp',
74             chr(utf8::unicode_to_native(0xA9)) => 'copy',
75             %entities
76             );
77              
78             sub __entity_encode_ord_he {
79 12     12   188 my $chr = chr $_[0];
80             # Skip the encode_entities() logic and go straight for the substitution
81             # since we already have the char we know we want replaced.
82             # Both the hash and the function are documented as exportable (so should be reliable).
83 12   33     81 return $HTML::Entities::char2entity{ $chr } || HTML::Entities::num_entity( $chr );
84             }
85             sub __entity_encode_ord_basic {
86 7   33 7   215 return '&' . ($entities{chr $_[0]} || sprintf '#x%X', $_[0]) . ';';
87             }
88              
89             # From HTML::Entities 3.69
90             my $DEFAULT_ENTITY_CHARS = '^\n\r\t !\#\$%\(-;=?-~';
91              
92             #}
93             ## use critic
94              
95             # Use hash for simple "exists" check in `new` (much more accurate than `->can`).
96             my %attributes = map { ($_ => 1) }
97             qw(
98             html_encode_chars
99             match_encoding
100             output_encoding
101             local_module_re
102             local_module_url_prefix
103             man_url_prefix
104             perldoc_url_prefix
105             perldoc_fragment_format
106             markdown_fragment_format
107             include_meta_tags
108             escape_url
109             );
110              
111              
112             sub new {
113 317     317 1 6174593 my $class = shift;
114 317         1468 my %args = @_;
115              
116 317         1727 my $self = $class->SUPER::new();
117 317         11946 $self->preserve_whitespace(1);
118 317         2386 $self->nbsp_for_S(1);
119 317         2395 $self->accept_targets(qw( markdown html ));
120 317         7490 $self->escape_url(1);
121              
122             # Default to the global, but allow it to be overwritten in args.
123 317         2509 $self->local_module_re($LOCAL_MODULE_RE);
124              
125 317         1954 for my $type ( qw( perldoc man ) ){
126 634         1145 my $attr = $type . '_url_prefix';
127             # Initialize to the alias.
128 634         1992 $self->$attr($type);
129             }
130              
131 317         1314 while( my ($attr, $val) = each %args ){
132             # NOTE: Checking exists on a private var means we don't allow Pod::Simple
133             # attributes to be set this way. It's not very consistent, but I think
134             # I'm ok with that for now since there probably aren't many Pod::Simple attributes
135             # being changed besides `output_*` which feel like API rather than attributes.
136             # We'll see.
137             # This is currently backward-compatible as we previously just put the attribute
138             # into the private stash so anything unknown was silently ignored.
139             # We could open this up to `$self->can($attr)` in the future if that seems better
140             # but it tricked me when I was testing a misspelled attribute name
141             # which also happened to be a Pod::Simple method.
142              
143 924 100       4570 exists $attributes{ $attr } or
144             # Provide a more descriptive message than "Can't locate object method".
145             warn("Unknown argument to ${class}->new(): '$attr'"), next;
146              
147             # Call setter.
148 922         2359 $self->$attr($val);
149             }
150              
151             # TODO: call from the setters.
152 317         2243 $self->_prepare_fragment_formats;
153              
154 317 100 100     777 if(defined $self->local_module_url_prefix && $self->local_module_url_prefix eq '' && !$self->escape_url) {
      100        
155 1         22 warn("turning escape_url with an empty local_module_url_prefix is not recommended as relative URLs could be confused for IPv6 addresses");
156             }
157              
158 317         1337 return $self;
159             }
160              
161             for my $type ( qw( local_module perldoc man ) ){
162             my $attr = $type . '_url_prefix';
163 29     29   290 no strict 'refs'; ## no critic
  29         71  
  29         249403  
164             *$attr = sub {
165 1373     1373   23546 my $self = shift;
166 1373 100       2602 if (@_) {
167 877   100     4480 $self->{$attr} = $URL_PREFIXES{ $_[0] } || $_[0];
168             }
169             else {
170 496         1901 return $self->{$attr};
171             }
172             }
173             }
174              
175             ## Attribute accessors ##
176              
177              
178             sub html_encode_chars {
179 310     310 1 454 my $self = shift;
180 310         681 my $stash = $self->_private;
181              
182             # Setter.
183 310 100       741 if( @_ ){
184             # If false ('', 0, undef), disable.
185 72 50       164 if( !$_[0] ){
186 0         0 delete $stash->{html_encode_chars};
187 0         0 $stash->{encode_amp} = 1;
188 0         0 $stash->{encode_lt} = 1;
189             }
190             else {
191             # Special case boolean '1' to mean "all".
192             # If we have HTML::Entities, undef will use the default.
193             # Without it, we need to specify so that we use the same list (for consistency).
194 72 100       361 $stash->{html_encode_chars} = $_[0] eq '1' ? ($HAS_HTML_ENTITIES ? undef : $DEFAULT_ENTITY_CHARS) : $_[0];
    100          
195              
196             # If [char] doesn't get encoded, we need to do it ourselves.
197 72         218 $stash->{encode_amp} = ($self->encode_entities('&') eq '&');
198 72         2265 $stash->{encode_lt} = ($self->encode_entities('<') eq '<');
199             }
200 72         1613 return;
201             }
202              
203             # Getter.
204 238         529 return $stash->{html_encode_chars};
205             }
206              
207              
208             # I prefer ro-accessors (immutability!) but it can be confusing
209             # to not support the same API as other Pod::Simple classes.
210              
211             # NOTE: Pod::Simple::_accessorize is not a documented public API.
212             # Skip any that have already been defined.
213             __PACKAGE__->_accessorize(grep { !__PACKAGE__->can($_) } keys %attributes);
214              
215             sub _prepare_fragment_formats {
216 317     317   604 my ($self) = @_;
217              
218 317         1273 foreach my $attr ( keys %attributes ){
219 3487 100       10392 next unless $attr =~ /^(\w+)_fragment_format/;
220 634         1515 my $type = $1;
221 634         1765 my $format = $self->$attr;
222              
223             # If one was provided.
224 634 100       3871 if( $format ){
225             # If the attribute is a coderef just use it.
226 510 100       1467 next if ref($format) eq 'CODE';
227             }
228             # Else determine a default.
229             else {
230 124 100       324 if( $type eq 'perldoc' ){
231             # Choose a default that matches the destination url.
232 61         157 my $target = $self->perldoc_url_prefix;
233 61         137 foreach my $alias ( qw( metacpan sco ) ){
234 122 100       482 if( $target eq $URL_PREFIXES{ $alias } ){
235 52         101 $format = $alias;
236             }
237             }
238             # This seems like a reasonable fallback.
239 61   100     220 $format ||= 'pod_simple_xhtml';
240             }
241             else {
242 63         124 $format = $type;
243             }
244             }
245              
246             # The short name should become a method name with the prefix prepended.
247 170         319 my $prefix = 'format_fragment_';
248 170         1061 $format =~ s/^$prefix//;
249 170 50       857 die "Unknown fragment format '$format'"
250             unless $self->can($prefix . $format);
251              
252             # Save it.
253 170         461 $self->$attr($format);
254             }
255              
256 317         974 return;
257             }
258              
259             ## Backward compatible API ##
260              
261             # For backward compatibility (previously based on Pod::Parser):
262             # While Pod::Simple provides a parse_from_file() method
263             # it's primarily for Pod::Parser compatibility.
264             # When called without an output handle it will print to STDOUT
265             # but the old Pod::Markdown never printed to a handle
266             # so we don't want to start now.
267             sub parse_from_file {
268 11     11 1 23 my ($self, $file) = @_;
269              
270             # TODO: Check that all dependent cpan modules use the Pod::Simple API
271             # then add a deprecation warning here to avoid confusion.
272              
273 11         57 $self->output_string(\($self->{_as_markdown_}));
274 11         767 $self->parse_file($file);
275             }
276              
277             # Likewise, though Pod::Simple doesn't define this method at all.
278 10     10 0 593 sub parse_from_filehandle { shift->parse_from_file(@_) }
279              
280              
281             ## Document state ##
282              
283             sub _private {
284 10186     10186   14870 my ($self) = @_;
285             $self->{_Pod_Markdown_} ||= {
286 10186   100     35434 indent => 0,
287             stacks => [],
288             states => [{}],
289             link => [],
290             encode_amp => 1,
291             encode_lt => 1,
292             };
293             }
294              
295             sub _increase_indent {
296 67 50   67   154 ++$_[0]->_private->{indent} >= 1
297             or die 'Invalid state: indent < 0';
298             }
299             sub _decrease_indent {
300 67 50   67   143 --$_[0]->_private->{indent} >= 0
301             or die 'Invalid state: indent < 0';
302             }
303              
304             sub _new_stack {
305 1043     1043   2988 push @{ $_[0]->_private->{stacks} }, [];
  1043         2250  
306 1043         1721 push @{ $_[0]->_private->{states} }, {};
  1043         1980  
307             }
308              
309             sub _last_string {
310 18     18   25 $_[0]->_private->{stacks}->[-1][-1];
311             }
312              
313             sub _pop_stack_text {
314 740     740   1043 $_[0]->_private->{last_state} = pop @{ $_[0]->_private->{states} };
  740         1355  
315 740         1125 join '', @{ pop @{ $_[0]->_private->{stacks} } };
  740         1058  
  740         1314  
316             }
317              
318             sub _stack_state {
319 455     455   802 $_[0]->_private->{states}->[-1];
320             }
321              
322             sub _save {
323 1404     1404   2789 my ($self, $text) = @_;
324 1404         1828 push @{ $self->_private->{stacks}->[-1] }, $text;
  1404         2335  
325             # return $text; # DEBUG
326             }
327              
328             sub _save_line {
329 518     518   1055 my ($self, $text) = @_;
330              
331 518         1189 $text = $self->_process_escapes($text);
332              
333 518         1598 $self->_save($text . $/);
334             }
335              
336             # For paragraphs, etc.
337             sub _save_block {
338 453     453   812 my ($self, $text) = @_;
339              
340 453         898 $self->_stack_state->{blocks}++;
341              
342 453         1117 $self->_save_line($self->_indent($text) . $/);
343             }
344              
345             ## Formatting ##
346              
347             sub _chomp_all {
348 345     345   735 my ($self, $text) = @_;
349 345         1465 1 while chomp $text;
350 345         1169 return $text;
351             }
352              
353             sub _indent {
354 496     496   886 my ($self, $text) = @_;
355 496         809 my $level = $self->_private->{indent};
356              
357 496 100       1069 if( $level ){
358 34         148 my $indent = ' ' x ($level * 4);
359              
360             # Capture text on the line so that we don't indent blank lines (/^\x20{4}$/).
361 34         301 $text =~ s/^(.+)/$indent$1/mg;
362             }
363              
364 496         2088 return $text;
365             }
366              
367             # as_markdown() exists solely for backward compatibility
368             # and requires having called parse_from_file() to be useful.
369              
370              
371             sub as_markdown {
372 11     11 0 415 my ($parser, %args) = @_;
373 11         15 my @header;
374             # Don't add meta tags again if we've already done it.
375 11 100 100     34 if( $args{with_meta} && !$parser->include_meta_tags ){
376 3         19 @header = $parser->_build_markdown_head;
377             }
378 11         58 return join("\n" x 2, @header, $parser->{_as_markdown_});
379             }
380              
381             sub _build_markdown_head {
382 9     9   10 my $parser = shift;
383 9         14 my $data = $parser->_private;
384             return join "\n",
385 12         53 map { qq![[meta \l$_="$data->{$_}"]]! }
386 9         16 grep { defined $data->{$_} }
  18         31  
387             qw( Title Author );
388             }
389              
390             ## Escaping ##
391              
392             # http://daringfireball.net/projects/markdown/syntax#backslash
393             # Markdown provides backslash escapes for the following characters:
394             #
395             # \ backslash
396             # ` backtick
397             # * asterisk
398             # _ underscore
399             # {} curly braces
400             # [] square brackets
401             # () parentheses
402             # # hash mark
403             # + plus sign
404             # - minus sign (hyphen)
405             # . dot
406             # ! exclamation mark
407              
408             # However some of those only need to be escaped in certain places:
409             # * Backslashes *do* need to be escaped or they may be swallowed by markdown.
410             # * Word-surrounding characters (/[`*_]/) *do* need to be escaped mid-word
411             # because the markdown spec explicitly allows mid-word em*pha*sis.
412             # * I don't actually see anything that curly braces are used for.
413             # * Escaping square brackets is enough to avoid accidentally
414             # creating links and images (so we don't need to escape plain parentheses
415             # or exclamation points as that would generate a lot of unnecesary noise).
416             # Parentheses will be escaped in urls (&end_L) to avoid premature termination.
417             # * We don't need a backslash for every hash mark or every hyphen found mid-word,
418             # just the ones that start a line (likewise for plus and dot).
419             # (Those will all be handled by _escape_paragraph_markdown).
420              
421              
422             # Backslash escape markdown characters to avoid having them interpreted.
423             sub _escape_inline_markdown {
424 484     484   894 local $_ = $_[1];
425              
426             # s/([\\`*_{}\[\]()#+-.!])/\\$1/g; # See comments above.
427 484         1535 s/([\\`*_\[\]])/\\$1/g;
428              
429 484         1203 return $_;
430             }
431              
432             # Escape markdown characters that would be interpreted
433             # at the start of a line.
434             sub _escape_paragraph_markdown {
435 355     355   605 local $_ = $_[1];
436              
437             # Escape headings, horizontal rules, (unordered) lists, and blockquotes.
438 355         1054 s/^([-+#>])/\\$1/mg;
439              
440             # Markdown doesn't support backslash escapes for equal signs
441             # even though they can be used to underline a header.
442             # So use html to escape them to avoid having them interpreted.
443 355         681 s/^([=])/sprintf '&#x%x;', ord($1)/mge;
  1         10  
444              
445             # Escape the dots that would wrongfully create numbered lists.
446 355         674 s/^( (?:>\s+)? \d+ ) (\.\x20)/$1\\$2/xgm;
447              
448 355         672 return $_;
449             }
450              
451              
452             # Additionally Markdown allows inline html so we need to escape things that look like it.
453             # While _some_ Markdown processors handle backslash-escaped html,
454             # [Daring Fireball](http://daringfireball.net/projects/markdown/syntax) states distinctly:
455             # > In HTML, there are two characters that demand special treatment: < and &...
456             # > If you want to use them as literal characters, you must escape them as entities, e.g. <, and &.
457              
458             # It goes on to say:
459             # > Markdown allows you to use these characters naturally,
460             # > taking care of all the necessary escaping for you.
461             # > If you use an ampersand as part of an HTML entity,
462             # > it remains unchanged; otherwise it will be translated into &.
463             # > Similarly, because Markdown supports inline HTML,
464             # > if you use angle brackets as delimiters for HTML tags, Markdown will treat them as such.
465              
466             # In order to only encode the occurrences that require it (something that
467             # could be interpreted as an entity) we escape them all so that we can do the
468             # suffix test later after the string is complete (since we don't know what
469             # strings might come after this one).
470              
471             my %_escape =
472             map {
473             my ($k, $v) = split /:/;
474             # Put the "code" marker before the char instead of after so that it doesn't
475             # get confused as the $2 (which is what requires us to entity-encode it).
476             # ( "XsX", "XcsX", "X(c?)sX" )
477             my ($s, $code, $re) = map { "\0$_$v\0" } '', map { ($_, '('.$_.'?)') } 'c';
478              
479             (
480             $k => $s,
481             $k.'_code' => $code,
482             $k.'_re' => qr/$re/,
483             )
484             }
485             qw( amp:& lt:< );
486              
487             # Make the values of this private var available to the tests.
488 1     1   573081 sub __escape_sequences { %_escape }
489              
490              
491             # HTML-entity encode any characters configured by the user.
492             # If that doesn't include [&<] then we escape those chars so we can decide
493             # later if we will entity-encode them or put them back verbatim.
494             sub _encode_or_escape_entities {
495 484     484   701 my $self = $_[0];
496 484         940 my $stash = $self->_private;
497 484         855 local $_ = $_[1];
498              
499 484 100       1110 if( $stash->{encode_amp} ){
    50          
500 462 100       863 if( exists($stash->{html_encode_chars}) ){
501             # Escape all amps for later processing.
502             # Pass intermediate strings to entity encoder so that it doesn't
503             # process any of the characters of our escape sequences.
504             # Use -1 to get "as many fields as possible" so that we keep leading and
505             # trailing (possibly empty) fields.
506 38         183 $_ = join $_escape{amp}, map { $self->encode_entities($_) } split /&/, $_, -1;
  57         533  
507             }
508             else {
509 424         926 s/&/$_escape{amp}/g;
510             }
511             }
512             elsif( exists($stash->{html_encode_chars}) ){
513 22         72 $_ = $self->encode_entities($_);
514             }
515              
516             s/
517 484 100       3054 if $stash->{encode_lt};
518              
519 484         1090 return $_;
520             }
521              
522             # From Markdown.pl version 1.0.1 line 1172 (_DoAutoLinks).
523             my $EMAIL_MARKER = qr{
524             # < # Opening token is in parent regexp.
525             (?:mailto:)?
526             (
527             [-.\w]+
528             \@
529             [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
530             )
531             >
532             }x;
533              
534             # Process any escapes we put in the text earlier,
535             # now that the text is complete (end of a block).
536             sub _process_escapes {
537 518     518   811 my $self = $_[0];
538 518         842 my $stash = $self->_private;
539 518         827 local $_ = $_[1];
540              
541             # The patterns below are taken from Markdown.pl 1.0.1 _EncodeAmpsAndAngles().
542             # In this case we only want to encode the ones that Markdown won't.
543             # This is overkill but produces nicer looking text (less escaped entities).
544             # If it proves insufficent then we'll just encode them all.
545              
546             # $1: If the escape was in a code sequence, simply replace the original.
547             # $2: If the unescaped value would be followed by characters
548             # that could be interpreted as html, entity-encode it.
549             # else: The character is safe to leave bare.
550              
551             # Neither currently allows $2 to contain '0' so bool tests are sufficient.
552              
553 518 100       1138 if( $stash->{encode_amp} ){
554             # Encode & if succeeded by chars that look like an html entity.
555 483         4800 s,$_escape{amp_re}((?:#?[xX]?(?:[0-9a-fA-F]+|\w+);)?),
556 90 100       583 $1 ? '&'.$2 : $2 ? '&'.$2 : '&',egos;
    100          
557             }
558              
559 518 100       1802 if( $stash->{encode_lt} ){
560             # Encode < if succeeded by chars that look like an html tag.
561             # Leave email addresses () for Markdown to process.
562 483         5518 s,$_escape{lt_re}((?=$EMAIL_MARKER)|(?:[a-z/?\$!])?),
563 72 100       414 $1 ? '<'.$2 : $2 ? '<'.$2 : '<',egos;
    100          
564             }
565              
566 518         1270 return $_;
567             }
568              
569              
570             ## Parsing ##
571              
572             sub handle_text {
573 567     567 0 7568 my $self = $_[0];
574 567         1069 my $stash = $self->_private;
575 567         1045 local $_ = $_[1];
576              
577             # Unless we're in a code span, verbatim block, or formatted region.
578 567 100       1321 unless( $stash->{no_escape} ){
579              
580             # We could, in theory, alter what gets escaped according to context
581             # (for example, escape square brackets (but not parens) inside link text).
582             # The markdown produced might look slightly nicer but either way you're
583             # at the whim of the markdown processor to interpret things correctly.
584             # For now just escape everything.
585              
586             # Don't let literal characters be interpreted as markdown.
587 484         1083 $_ = $self->_escape_inline_markdown($_);
588              
589             # Entity-encode (or escape for later processing) necessary/desired chars.
590 484         1155 $_ = $self->_encode_or_escape_entities($_);
591              
592             }
593             # If this _is_ a code section, do limited/specific handling.
594             else {
595             # Always escaping these chars ensures that we won't mangle the text
596             # in the unlikely event that a sequence matching our escape occurred in the
597             # input stream (since we're going to escape it and then unescape it).
598 83 100       459 s/&/$_escape{amp_code}/gos if $stash->{encode_amp};
599 83 100       339 s/{encode_lt};
600             }
601              
602 567         1231 $self->_save($_);
603             }
604              
605             sub start_Document {
606 303     303 0 138868 my ($self) = @_;
607 303         925 $self->_new_stack;
608             }
609              
610             sub end_Document {
611 303     303 0 34290 my ($self) = @_;
612 303         1013 $self->_check_search_header;
613 303         458 my $end = pop @{ $self->_private->{stacks} };
  303         512  
614              
615 303 50       487 @{ $self->_private->{stacks} } == 0
  303         497  
616             or die 'Document ended with stacks remaining';
617              
618 303         1579 my @doc = $self->_chomp_all(join('', @$end)) . $/;
619              
620 303 100       1003 if( $self->include_meta_tags ){
621 6         39 unshift @doc, $self->_build_markdown_head, ($/ x 2);
622             }
623              
624 303 100       2275 if( my $encoding = $self->_get_output_encoding ){
625             # Do the check outside the loop(s) for efficiency.
626 66 100       622 my $ents = $HAS_HTML_ENTITIES ? \&__entity_encode_ord_he : \&__entity_encode_ord_basic;
627             # Iterate indices to avoid copying large strings.
628 66         196 for my $i ( 0 .. $#doc ){
629 66         90 print { $self->{output_fh} } Encode::encode($encoding, $doc[$i], $ents);
  66         482  
630             }
631             }
632             else {
633 237         1350 print { $self->{output_fh} } @doc;
  237         1330  
634             }
635             }
636              
637             sub _get_output_encoding {
638 303     303   544 my ($self) = @_;
639              
640             # If 'match_encoding' is set we need to return an encoding.
641             # If pod has no =encoding, Pod::Simple will guess if it sees a high-bit char.
642             # If there are no high-bit chars, encoding is undef.
643             # Use detected_encoding() rather than encoding() because if Pod::Simple
644             # can't use whatever encoding was specified, we probably can't either.
645             # Fallback to 'o_e' if no match is found. This gives the user the choice,
646             # since otherwise there would be no reason to specify 'o_e' *and* 'm_e'.
647             # Fallback to UTF-8 since it is a reasonable default these days.
648              
649 303 100 100     711 return $self->detected_encoding || $self->output_encoding || 'UTF-8'
650             if $self->match_encoding;
651              
652             # If output encoding wasn't specified, return false.
653 289         1906 return $self->output_encoding;
654             }
655              
656             ## Blocks ##
657              
658             sub start_Verbatim {
659 17     17 0 6396 my ($self) = @_;
660 17         60 $self->_new_stack;
661 17         64 $self->_private->{no_escape} = 1;
662             }
663              
664             sub end_Verbatim {
665 17     17 0 175 my ($self) = @_;
666              
667 17         106 my $text = $self->_pop_stack_text;
668              
669 17         46 $text = $self->_indent_verbatim($text);
670              
671 17         54 $self->_private->{no_escape} = 0;
672              
673             # Verbatim blocks do not generate a separate "Para" event.
674 17         39 $self->_save_block($text);
675             }
676              
677             sub _indent_verbatim {
678 17     17   33 my ($self, $paragraph) = @_;
679              
680             # NOTE: Pod::Simple expands the tabs for us (as suggested by perlpodspec).
681             # Pod::Simple also has a 'strip_verbatim_indent' attribute
682             # but it doesn't sound like it gains us anything over this method.
683              
684             # POD verbatim can start with any number of spaces (or tabs)
685             # markdown should be 4 spaces (or a tab)
686             # so indent any paragraphs so that all lines start with at least 4 spaces
687 17         62 my @lines = split /\n/, $paragraph;
688 17         31 my $indent = ' ' x 4;
689 17         39 foreach my $line ( @lines ){
690 32 100       123 next unless $line =~ m/^( +)/;
691             # find the smallest indentation
692 31 100       111 $indent = $1 if length($1) < length($indent);
693             }
694 17 100       55 if( (my $smallest = length($indent)) < 4 ){
695             # invert to get what needs to be prepended
696 11         31 $indent = ' ' x (4 - $smallest);
697              
698             # Prepend indent to each line.
699             # We could check /\S/ to only indent non-blank lines,
700             # but it's backward compatible to respect the whitespace.
701             # Additionally, both pod and markdown say they ignore blank lines
702             # so it shouldn't hurt to leave them in.
703 11 100       27 $paragraph = join "\n", map { length($_) ? $indent . $_ : '' } @lines;
  23         90  
704             }
705              
706 17         47 return $paragraph;
707             }
708              
709             sub start_Para {
710 355     355 0 187791 $_[0]->_new_stack;
711             }
712              
713             sub end_Para {
714 355     355 0 4781 my ($self) = @_;
715 355         664 my $text = $self->_pop_stack_text;
716              
717 355         920 $text = $self->_escape_paragraph_markdown($text);
718              
719 355         799 $self->_save_block($text);
720             }
721              
722              
723             ## Headings ##
724              
725 51     51 0 8454 sub start_head1 { $_[0]->_start_head(1) }
726 51     51 0 602 sub end_head1 { $_[0]->_end_head(1) }
727 11     11 0 4820 sub start_head2 { $_[0]->_start_head(2) }
728 11     11 0 150 sub end_head2 { $_[0]->_end_head(2) }
729 1     1 0 508 sub start_head3 { $_[0]->_start_head(3) }
730 1     1 0 7 sub end_head3 { $_[0]->_end_head(3) }
731 0     0 0 0 sub start_head4 { $_[0]->_start_head(4) }
732 0     0 0 0 sub end_head4 { $_[0]->_end_head(4) }
733              
734             sub _check_search_header {
735 366     366   574 my ($self) = @_;
736             # Save the text since the last heading if we want it for metadata.
737 366 100       725 if( my $last = $self->_private->{search_header} ){
738 18         25 for( $self->_private->{$last} = $self->_last_string ){
739 18         31 s/\A\s+//;
740 18         78 s/\s+\z//;
741             }
742             }
743             }
744             sub _start_head {
745 63     63   209 my ($self) = @_;
746 63         196 $self->_check_search_header;
747 63         125 $self->_new_stack;
748             }
749              
750             sub _end_head {
751 63     63   122 my ($self, $num) = @_;
752 63         129 my $h = '#' x $num;
753              
754 63         142 my $text = $self->_pop_stack_text;
755             $self->_private->{search_header} =
756 63 100       339 $text =~ /NAME/ ? 'Title'
    100          
757             : $text =~ /AUTHOR/ ? 'Author'
758             : undef;
759              
760             # TODO: option for $h suffix
761             # TODO: put a name="" if $self->{embed_anchor_tags}; ?
762             # https://rt.cpan.org/Ticket/Display.html?id=57776
763 63         249 $self->_save_block(join(' ', $h, $text));
764             }
765              
766             ## Lists ##
767              
768             # With Pod::Simple->parse_empty_lists(1) there could be an over_empty event,
769             # but what would you do with that?
770              
771             sub _start_list {
772 22     22   57 my ($self) = @_;
773 22         78 $self->_new_stack;
774              
775             # Nest again b/c start_item will pop this to look for preceding content.
776 22         72 $self->_increase_indent;
777 22         54 $self->_new_stack;
778             }
779              
780             sub _end_list {
781 22     22   51 my ($self) = @_;
782 22         67 $self->_handle_between_item_content;
783              
784             # Finish the list.
785              
786             # All the child elements should be blocks,
787             # but don't end with a double newline.
788 22         100 my $text = $self->_chomp_all($self->_pop_stack_text);
789              
790 22         98 $_[0]->_save_line($text . $/);
791             }
792              
793             sub _handle_between_item_content {
794 65     65   133 my ($self) = @_;
795              
796             # This might be empty (if the list item had no additional content).
797 65 100       170 if( my $text = $self->_pop_stack_text ){
798             # Else it's a sub-document.
799             # If there are blocks we need to separate with blank lines.
800 21 100       51 if( $self->_private->{last_state}->{blocks} ){
801 16         61 $text = $/ . $text;
802             }
803             # If not, we can condense the text.
804             # In this module's history there was a patch contributed to specifically
805             # produce "huddled" lists so we'll try to maintain that functionality.
806             else {
807 5         18 $text = $self->_chomp_all($text) . $/;
808             }
809 21         112 $self->_save($text)
810             }
811              
812 65         162 $self->_decrease_indent;
813             }
814              
815             sub _start_item {
816 43     43   98 my ($self) = @_;
817 43         127 $self->_handle_between_item_content;
818 43         99 $self->_new_stack;
819             }
820              
821             sub _end_item {
822 43     43   99 my ($self, $marker) = @_;
823 43         127 my $text = $self->_pop_stack_text;
824 43 100 66     440 $self->_save_line($self->_indent($marker .
825             # Add a space only if there is text after the marker.
826             (defined($text) && length($text) ? ' ' . $text : '')
827             ));
828              
829             # Store any possible contents in a new stack (like a sub-document).
830 43         144 $self->_increase_indent;
831 43         99 $self->_new_stack;
832             }
833              
834 7     7 0 2572 sub start_over_bullet { $_[0]->_start_list }
835 7     7 0 1050 sub end_over_bullet { $_[0]->_end_list }
836              
837 18     18 0 8647 sub start_item_bullet { $_[0]->_start_item }
838 18     18 0 258 sub end_item_bullet { $_[0]->_end_item('-') }
839              
840 9     9 0 4151 sub start_over_number { $_[0]->_start_list }
841 9     9 0 2494 sub end_over_number { $_[0]->_end_list }
842              
843             sub start_item_number {
844 18     18 0 9587 $_[0]->_start_item;
845             # It seems like this should be a stack,
846             # but from testing it appears that the corresponding 'end' event
847             # comes right after the text (it doesn't surround any embedded content).
848             # See t/nested.t which shows start-item, text, end-item, para, start-item....
849 18         52 $_[0]->_private->{item_number} = $_[1]->{number};
850             }
851              
852             sub end_item_number {
853 18     18 0 249 my ($self) = @_;
854 18         42 $self->_end_item($self->_private->{item_number} . '.');
855             }
856              
857             # Markdown doesn't support definition lists
858             # so do regular (unordered) lists with indented paragraphs.
859 6     6 0 2791 sub start_over_text { $_[0]->_start_list }
860 6     6 0 1273 sub end_over_text { $_[0]->_end_list }
861              
862 7     7 0 3225 sub start_item_text { $_[0]->_start_item }
863 7     7 0 151 sub end_item_text { $_[0]->_end_item('-')}
864              
865              
866             # perlpodspec equates an over/back region with no items to a blockquote.
867             sub start_over_block {
868             # NOTE: We don't actually need to indent for a blockquote.
869 3     3 0 1743 $_[0]->_new_stack;
870             }
871              
872             sub end_over_block {
873 3     3 0 844 my ($self) = @_;
874              
875             # Chomp first to avoid prefixing a blank line with a `>`.
876 3         10 my $text = $self->_chomp_all($self->_pop_stack_text);
877              
878             # NOTE: Paragraphs will already be escaped.
879              
880             # I don't really like either of these implementations
881             # but the join/map/split seems a little better and benches a little faster.
882             # You would lose the last newline but we've already chomped.
883             #$text =~ s{^(.)?}{'>' . (defined($1) && length($1) ? (' ' . $1) : '')}mge;
884 3 100       52 $text = join $/, map { length($_) ? '> ' . $_ : '>' } split qr-$/-, $text;
  22         68  
885              
886 3         16 $self->_save_block($text);
887             }
888              
889             ## Custom Formats ##
890              
891             sub start_for {
892 10     10 0 4211 my ($self, $attr) = @_;
893 10         34 $self->_new_stack;
894              
895 10 100       40 if( $attr->{target} eq 'html' ){
896             # Use another stack so we can indent
897             # (not syntactily necessary but seems appropriate).
898 2         8 $self->_new_stack;
899 2         7 $self->_increase_indent;
900 2         5 $self->_private->{no_escape} = 1;
901             # Mark this so we know to undo it.
902 2         5 $self->_stack_state->{for_html} = 1;
903             }
904             }
905              
906             sub end_for {
907 10     10 0 1051 my ($self) = @_;
908             # Data gets saved as a block (which will handle indents),
909             # but if there was html we'll alter this, so chomp and save a block again.
910 10         26 my $text = $self->_chomp_all($self->_pop_stack_text);
911              
912 10 100       19 if( $self->_private->{last_state}->{for_html} ){
913 2         6 $self->_private->{no_escape} = 0;
914             # Save it to the next stack up so we can pop it again (we made two stacks).
915 2         8 $self->_save($text);
916 2         8 $self->_decrease_indent;
917 2         6 $text = join "\n", '
', $self->_chomp_all($self->_pop_stack_text), '
';
918             }
919              
920 10         24 $self->_save_block($text);
921             }
922              
923             # Data events will be emitted for any formatted regions that have been enabled
924             # (by default, `markdown` and `html`).
925              
926             sub start_Data {
927 5     5 0 649 my ($self) = @_;
928             # TODO: limit this to what's in attr?
929 5         12 $self->_private->{no_escape}++;
930 5         11 $self->_new_stack;
931             }
932              
933             sub end_Data {
934 5     5 0 52 my ($self) = @_;
935 5         12 my $text = $self->_pop_stack_text;
936 5         11 $self->_private->{no_escape}--;
937 5         12 $self->_save_block($text);
938             }
939              
940             ## Codes ##
941              
942 96     96 0 1150 sub start_B { $_[0]->_save('**') }
943 48     48 0 726 sub end_B { $_[0]->start_B() }
944              
945 46     46 0 526 sub start_I { $_[0]->_save('_') }
946 23     23 0 276 sub end_I { $_[0]->start_I() }
947              
948             sub start_C {
949 61     61 0 1151 my ($self) = @_;
950 61         174 $self->_new_stack;
951 61         148 $self->_private->{no_escape}++;
952             }
953              
954             sub end_C {
955 61     61 0 677 my ($self) = @_;
956 61         124 $self->_private->{no_escape}--;
957 61         178 $self->_save( $self->_wrap_code_span($self->_pop_stack_text) );
958             }
959              
960             # Use code spans for F<>.
961 4     4 0 67 sub start_F { shift->start_C(@_); }
962 4     4 0 36 sub end_F { shift ->end_C(@_); }
963              
964             sub start_L {
965 93     93 0 1475 my ($self, $flags) = @_;
966 93         233 $self->_new_stack;
967 93         126 push @{ $self->_private->{link} }, $flags;
  93         169  
968             }
969              
970             sub end_L {
971 93     93 0 997 my ($self) = @_;
972 93 50       109 my $flags = pop @{ $self->_private->{link} }
  93         160  
973             or die 'Invalid state: link end with no link start';
974              
975 93         144 my ($type, $to, $section) = @{$flags}{qw( type to section )};
  93         247  
976              
977 93 50       487 my $url = (
    100          
    100          
978             $type eq 'url' ? $to
979             : $type eq 'man' ? $self->format_man_url($to, $section)
980             : $type eq 'pod' ? $self->format_perldoc_url($to, $section)
981             : undef
982             );
983              
984 93         254 my $text = $self->_pop_stack_text;
985              
986             # NOTE: I don't think the perlpodspec says what to do with L<|blah>
987             # but it seems like a blank link text just doesn't make sense
988 93 100       216 if( !length($text) ){
989 4 100       13 $text =
    100          
990             $section ?
991             $to ? sprintf('"%s" in %s', $section, $to)
992             : ('"' . $section . '"')
993             : $to;
994             }
995              
996             # FIXME: What does Pod::Simple::X?HTML do for this?
997             # if we don't know how to handle the url just print the pod back out
998 93 50       309 if (!$url) {
999 0         0 $self->_save(sprintf 'L<%s>', $flags->{raw});
1000 0         0 return;
1001             }
1002              
1003             # In the url we need to escape quotes and parentheses lest markdown
1004             # break the url (cut it short and/or wrongfully interpret a title).
1005              
1006             # Backslash escapes do not work for the space and quotes.
1007             # URL-encoding the space is not sufficient
1008             # (the quotes confuse some parsers and produce invalid html).
1009             # I've arbitratily chosen HTML encoding to hide them from markdown
1010             # while mangling the url as litle as possible.
1011 93         721 $url =~ s/([ '"])/sprintf '&#x%x;', ord($1)/ge;
  11         60  
1012              
1013             # We also need to double any backslashes that may be present
1014             # (lest they be swallowed up) and stop parens from breaking the url.
1015 93         484 $url =~ s/([\\()])/\\$1/g;
1016              
1017             # TODO: put section name in title if not the same as $text
1018 93         467 $self->_save('[' . $text . '](' . $url . ')');
1019             }
1020              
1021             sub start_X {
1022 1     1 0 14 $_[0]->_new_stack;
1023             }
1024              
1025             sub end_X {
1026 1     1 0 8 my ($self) = @_;
1027 1         2 my $text = $self->_pop_stack_text;
1028             # TODO: mangle $text?
1029             # TODO: put if configured
1030             }
1031              
1032             # A code span can be delimited by multiple backticks (and a space)
1033             # similar to pod codes (C<< code >>), so ensure we use a big enough
1034             # delimiter to not have it broken by embedded backticks.
1035             sub _wrap_code_span {
1036 61     61   175 my ($self, $arg) = @_;
1037 61         108 my $longest = 0;
1038 61         261 while( $arg =~ /([`]+)/g ){
1039 5         16 my $len = length($1);
1040 5 100       31 $longest = $len if $longest < $len;
1041             }
1042 61         180 my $delim = '`' x ($longest + 1);
1043 61 100       278 my $pad = $longest > 0 ? ' ' : '';
1044 61         230 return $delim . $pad . $arg . $pad . $delim;
1045             }
1046              
1047             ## Link Formatting (TODO: Move this to another module) ##
1048              
1049              
1050             sub format_man_url {
1051 6     6 1 13 my ($self, $to) = @_;
1052 6         14 my ($page, $part) = ($to =~ /^ ([^(]+) (?: \( (\S+) \) )? /x);
1053 6   50     98 return $self->man_url_prefix . ($part || 1) . '/' . ($page || $to);
      33        
1054             }
1055              
1056              
1057             sub format_perldoc_url {
1058 70     70 1 154 my ($self, $name, $section) = @_;
1059              
1060 70         155 my $url_prefix = $self->perldoc_url_prefix;
1061 70 100 100     283 if (
      100        
1062             defined($name)
1063             && $self->is_local_module($name)
1064             && defined($self->local_module_url_prefix)
1065             ) {
1066 5         10 $url_prefix = $self->local_module_url_prefix;
1067             }
1068              
1069 70         1508 my $url = '';
1070              
1071             # If the link is to another module (external link).
1072 70 100       138 if ($name) {
1073 59 100       1136 $url = $url_prefix . ($self->escape_url ? URI::Escape::uri_escape($name) : $name);
1074             }
1075              
1076             # See https://rt.cpan.org/Ticket/Display.html?id=57776
1077             # for a discussion on the need to mangle the section.
1078 70 100       3495 if ($section){
1079              
1080 42 100       923 my $method = $url
1081             # If we already have a prefix on the url it's external.
1082             ? $self->perldoc_fragment_format
1083             # Else an internal link points to this markdown doc.
1084             : $self->markdown_fragment_format;
1085              
1086 42 100       251 $method = 'format_fragment_' . $method
1087             unless ref($method);
1088              
1089             {
1090             # Set topic to enable code refs to be simple.
1091 42         59 local $_ = $section;
  42         56  
1092 42         138 $section = $self->$method($section);
1093             }
1094              
1095 42         139 $url .= '#' . $section;
1096             }
1097              
1098 70         355 return $url;
1099             }
1100              
1101              
1102             # TODO: simple, pandoc, etc?
1103              
1104             sub format_fragment_markdown {
1105 3     3 1 43 my ($self, $section) = @_;
1106              
1107             # If this is an internal link (to another section in this doc)
1108             # we can't be sure what the heading id's will look like
1109             # (it depends on what is rendering the markdown to html)
1110             # but we can try to follow popular conventions.
1111              
1112             # http://johnmacfarlane.net/pandoc/demo/example9/pandocs-markdown.html#header-identifiers-in-html-latex-and-context
1113             #$section =~ s/(?![-_.])[[:punct:]]//g;
1114             #$section =~ s/\s+/-/g;
1115 3         6 $section =~ s/\W+/-/g;
1116 3         98 $section =~ s/-+$//;
1117 3         5 $section =~ s/^-+//;
1118 3         6 $section = lc $section;
1119             #$section =~ s/^[^a-z]+//;
1120 3   50     10 $section ||= 'section';
1121              
1122 3         6 return $section;
1123             }
1124              
1125              
1126             {
1127             # From Pod::Simple::XHTML 3.28.
1128             # The strings gets passed through encode_entities() before idify().
1129             # If we don't do it here the substitutions below won't operate consistently.
1130              
1131             sub format_fragment_pod_simple_xhtml {
1132 9     9 1 18 my ($self, $t) = @_;
1133              
1134             # encode_entities {
1135             # We need to use the defaults in case html_encode_chars has been customized
1136             # (since the purpose is to match what external sources are doing).
1137              
1138 9         20 local $self->_private->{html_encode_chars};
1139 9         34 $t = $self->encode_entities($t);
1140             # }
1141              
1142             # idify {
1143 9         345 for ($t) {
1144 9         17 s/<[^>]+>//g; # Strip HTML.
1145 9         85 s/&[^;]+;//g; # Strip entities.
1146 9         71 s/^\s+//; s/\s+$//; # Strip white space.
  9         73  
1147 9         77 s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
1148 9         82 s/^[^a-zA-Z]+//; # First char must be a letter.
1149 9         70 s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
1150 9         114 s/[-:.]+$//; # Strip trailing punctuation.
1151             }
1152             # }
1153              
1154 9         77 return $t;
1155             }
1156             }
1157              
1158              
1159             sub format_fragment_pod_simple_html {
1160 9     9 1 24 my ($self, $section) = @_;
1161              
1162             # From Pod::Simple::HTML 3.28.
1163              
1164             # section_name_tidy {
1165 9         17 $section =~ s/^\s+//;
1166 9         115 $section =~ s/\s+$//;
1167 9         117 $section =~ tr/ /_/;
1168 9         189 $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters
1169              
1170             #$section = $self->unicode_escape_url($section);
1171             # unicode_escape_url {
1172 9         18 $section =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
  0         0  
1173             # Turn char 1234 into "(1234)"
1174             # }
1175              
1176 9 50       24 $section = '_' unless length $section;
1177 9         21 return $section;
1178             # }
1179             }
1180              
1181              
1182 9     9 1 28 sub format_fragment_metacpan { shift->format_fragment_pod_simple_xhtml(@_); }
1183 9     9 1 27 sub format_fragment_sco { shift->format_fragment_pod_simple_html(@_); }
1184              
1185              
1186             sub is_local_module {
1187 59     59 1 118 my ($self, $name) = @_;
1188              
1189 59         197 return ($name =~ $self->local_module_re);
1190             }
1191              
1192             1;
1193              
1194             __END__