| blib/lib/Pod/HtmlEasy/Parser.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 263 | 283 | 92.9 |
| branch | 64 | 80 | 80.0 |
| condition | 11 | 12 | 91.6 |
| subroutine | 31 | 33 | 93.9 |
| pod | 0 | 7 | 0.0 |
| total | 369 | 415 | 88.9 |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | ############################################################################# | ||||||
| 2 | ## Name: Parser.pm | ||||||
| 3 | ## Purpose: Pod::HtmlEasy::Parser | ||||||
| 4 | ## Author: Graciliano M. P. | ||||||
| 5 | ## Modified by: Geoffrey Leach | ||||||
| 6 | ## Created: 11/01/2004 | ||||||
| 7 | ## Updated: 2010-06-13 | ||||||
| 8 | ## Copyright: (c) 2004 Graciliano M. P. (c) 2007 - 2013 Geoffrey Leach | ||||||
| 9 | ## Licence: This program is free software; you can redistribute it and/or | ||||||
| 10 | ## modify it under the same terms as Perl itself | ||||||
| 11 | ############################################################################# | ||||||
| 12 | |||||||
| 13 | package Pod::HtmlEasy::Parser; | ||||||
| 14 | 4 | 4 | 70 | use 5.006003; | |||
| 4 | 12 | ||||||
| 4 | 172 | ||||||
| 15 | |||||||
| 16 | 4 | 4 | 21 | use base qw{ Pod::Parser }; | |||
| 4 | 7 | ||||||
| 4 | 536 | ||||||
| 17 | 4 | 4 | 21 | use Pod::Parser; | |||
| 4 | 5 | ||||||
| 4 | 225 | ||||||
| 18 | 4 | 4 | 4889 | use Pod::ParseLink; | |||
| 4 | 4013 | ||||||
| 4 | 266 | ||||||
| 19 | 4 | 4 | 1101 | use Readonly; | |||
| 4 | 3064 | ||||||
| 4 | 268 | ||||||
| 20 | 4 | 4 | 3112 | use Pod::HtmlEasy::Data qw(EMPTY NUL); | |||
| 4 | 15 | ||||||
| 4 | 373 | ||||||
| 21 | |||||||
| 22 | 4 | 4 | 34 | use Carp; | |||
| 4 | 7 | ||||||
| 4 | 275 | ||||||
| 23 | 4 | 4 | 20 | use English qw{ -no_match_vars }; | |||
| 4 | 8 | ||||||
| 4 | 27 | ||||||
| 24 | 4 | 4 | 16076 | use Regexp::Common qw{ whitespace number URI }; | |||
| 4 | 28016 | ||||||
| 4 | 28 | ||||||
| 25 | 4 | 4 | 198427 | use Regexp::Common::URI::RFC2396 qw { $escaped }; | |||
| 4 | 12 | ||||||
| 4 | 399 | ||||||
| 26 | 4 | 4 | 4842 | use Pod::Escapes qw{ e2char }; | |||
| 4 | 15872 | ||||||
| 4 | 781 | ||||||
| 27 | |||||||
| 28 | our $VERSION = version->declare("v1.1.11"); | ||||||
| 29 | |||||||
| 30 | # Provided for RT 82400. Use native switch if available. | ||||||
| 31 | BEGIN { | ||||||
| 32 | 4 | 50 | 4 | 230 | if ($PERL_VERSION >= 5.012) { | ||
| 33 | 4 | 40 | require feature; | ||||
| 34 | 4 | 522 | "feature"->import(qw(switch)); | ||||
| 35 | } else { | ||||||
| 36 | 0 | 0 | require Switch; | ||||
| 37 | 0 | 0 | "Switch"->import(qw(Perl6)); | ||||
| 38 | } | ||||||
| 39 | } | ||||||
| 40 | |||||||
| 41 | 4 | 4 | 25 | use strict; | |||
| 4 | 10 | ||||||
| 4 | 142 | ||||||
| 42 | 4 | 4 | 20 | use warnings; | |||
| 4 | 7 | ||||||
| 4 | 5310 | ||||||
| 43 | |||||||
| 44 | ######## | ||||||
| 45 | # VARS # | ||||||
| 46 | ######## | ||||||
| 47 | |||||||
| 48 | Readonly::Scalar my $NUL => NUL; | ||||||
| 49 | |||||||
| 50 | # RT 58274 [\w-]+ => [\w\.-] | ||||||
| 51 | # Commented patterns temp test | ||||||
| 52 | Readonly::Scalar my $MAIL_RE => qr{ | ||||||
| 53 | ( # grab all of this | ||||||
| 54 | [\w\.-]+ # some word chars with '-' and '.'included foo | ||||||
| 55 | \0? # possible NUL escape | ||||||
| 56 | \@ # literal '@' @ | ||||||
| 57 | [\w\.-]+ # another word bar | ||||||
| 58 | (?: # non-grabbing pattern | ||||||
| 59 | # \. # literal '.' . | ||||||
| 60 | [\w\.-]+ # that word stuff stuff | ||||||
| 61 | # \. # another literal '.' . | ||||||
| 62 | [\w\.-]+ # another word and | ||||||
| 63 | | # or | ||||||
| 64 | # \. # literal '.' . | ||||||
| 65 | [\w\.-]+ # word nonsense | ||||||
| 66 | | # or empty? | ||||||
| 67 | ) # end of non-grab | ||||||
| 68 | ) # end of grab | ||||||
| 69 | }smx; # [6062] | ||||||
| 70 | |||||||
| 71 | |||||||
| 72 | |||||||
| 73 | # Treatment of embedded HTML-significant characters and embedded URIs. | ||||||
| 74 | |||||||
| 75 | # There are some characters (%HTML_ENTITIES below) which may in some | ||||||
| 76 | # circumstances be interpreted by a browser, and you probably don't want that | ||||||
| 77 | # Consequently, they are replaced by names defined by the W3C UNICODE spec, | ||||||
| 78 | # http://www.w3.org/TR/MathML2/bycodes.html, bracketed by '&' and ';' | ||||||
| 79 | # Thus, '>' becomes '<' This is handled by _encode_entities() | ||||||
| 80 | # There's a "gotchya" in this process. As we are generating HTML, | ||||||
| 81 | # the encoding needs to take place _before_ any HTML is generated. | ||||||
| 82 | |||||||
| 83 | # If the HTML appears garbled, and UNICODE entities appear where they | ||||||
| 84 | # shouldn't, this encoding has happened to late at some point. | ||||||
| 85 | |||||||
| 86 | # This is all further complicated by the fact that the POD formatting | ||||||
| 87 | # codes syntax uses some of the same characters, as in "L<...>", for example, | ||||||
| 88 | # and we can't expand those first, because some of them generate | ||||||
| 89 | # HTML. This is resolved by tagging the characters that we want | ||||||
| 90 | # to distinguish from HTML with ASCII NUL ('\0', $NUL). Thus, '$lt;' becomes | ||||||
| 91 | # '\0&' in _encode_entities(). Generated HTML is also handled | ||||||
| 92 | # this way by _nul_escape(). After all processing of the POD formatting | ||||||
| 93 | # codes are processed, this is reversed by _remove _nul_escapes(). | ||||||
| 94 | |||||||
| 95 | # Then there's the issue of embedded URIs. URIs are also generated | ||||||
| 96 | # by the processing of L<...>, and can show up _inside L<...>, we | ||||||
| 97 | # delay processing of embedded URIs until after all of the POD | ||||||
| 98 | # formatting codes is complete. URIs that result from that processing | ||||||
| 99 | # are tagged (you guessed it!) with a NUL character, but not preceeding | ||||||
| 100 | # the generated URI, but after the first character. These NULs are removed | ||||||
| 101 | # by _remove _nul_escapes() | ||||||
| 102 | |||||||
| 103 | Readonly::Hash my %HTML_ENTITIES => ( | ||||||
| 104 | q{&} => q{amp}, | ||||||
| 105 | q{>} => q{gt}, | ||||||
| 106 | q{<} => q{lt}, | ||||||
| 107 | q{"} => q{quot}, | ||||||
| 108 | ); | ||||||
| 109 | |||||||
| 110 | my $HTML_ENTITIES_RE = join q{|}, keys %HTML_ENTITIES; | ||||||
| 111 | $HTML_ENTITIES_RE = qr{$HTML_ENTITIES_RE}msx; | ||||||
| 112 | |||||||
| 113 | ################# | ||||||
| 114 | # _NUL_ESCAPE # | ||||||
| 115 | ################# | ||||||
| 116 | |||||||
| 117 | # Escape HTML-significant characters with ASCII NUL to differentiate them | ||||||
| 118 | # from the same characters that get converted to entity names | ||||||
| 119 | sub _nul_escape { | ||||||
| 120 | 89 | 89 | 115 | my $txt_ref = shift; | |||
| 121 | |||||||
| 122 | 89 | 98 | ${$txt_ref} =~ s{($HTML_ENTITIES_RE)}{$NUL$1}gsmx; | ||||
| 89 | 1554 | ||||||
| 123 | 89 | 177 | return; | ||||
| 124 | } | ||||||
| 125 | |||||||
| 126 | ####################### | ||||||
| 127 | # _REMOVE_NUL_ESCAPSE # | ||||||
| 128 | ####################### | ||||||
| 129 | |||||||
| 130 | sub _remove_nul_escapes { | ||||||
| 131 | 496 | 496 | 594 | my $txt_ref = shift; | |||
| 132 | |||||||
| 133 | 496 | 481 | ${$txt_ref} =~ s{$NUL}{}gsmx; | ||||
| 496 | 1596 | ||||||
| 134 | 496 | 847 | return; | ||||
| 135 | } | ||||||
| 136 | |||||||
| 137 | #################### | ||||||
| 138 | # _ENCODE_ENTITIES # | ||||||
| 139 | #################### | ||||||
| 140 | |||||||
| 141 | sub _encode_entities { | ||||||
| 142 | 638 | 638 | 821 | my $txt_ref = shift; | |||
| 143 | |||||||
| 144 | 638 | 100 | 66 | 1555 | if ( !( defined $txt_ref && length ${$txt_ref} ) ) { return; } | ||
| 638 | 2611 | ||||||
| 259 | 680 | ||||||
| 145 | |||||||
| 146 | 379 | 2126 | foreach my $chr ( keys %HTML_ENTITIES ) { | ||||
| 147 | |||||||
| 148 | # $chr gets a lookbehind to avoid converting flagged from E<...> | ||||||
| 149 | 1516 | 9391 | my $re = qq{(? | ||||
| 150 | 1516 | 1464 | ${$txt_ref} =~ s{$re}{$NUL&$HTML_ENTITIES{$chr};}gsmx; | ||||
| 1516 | 21312 | ||||||
| 151 | } | ||||||
| 152 | |||||||
| 153 | 379 | 1191 | return; | ||||
| 154 | } | ||||||
| 155 | |||||||
| 156 | ################# | ||||||
| 157 | # _ADD_URI_HREF # | ||||||
| 158 | ################# | ||||||
| 159 | |||||||
| 160 | # process embedded URIs that are not noted in L<...> bracketing | ||||||
| 161 | # Note that the HTML-significant characters are escaped; | ||||||
| 162 | # The escapes are removed by _encode_entities | ||||||
| 163 | # Note that there's no presumption that there's a URI in the | ||||||
| 164 | # text, so not matching is _not_ and error. | ||||||
| 165 | |||||||
| 166 | sub _add_uri_href { | ||||||
| 167 | 348 | 348 | 457 | my ($txt_ref) = @_; | |||
| 168 | |||||||
| 169 | 348 | 100 | 367 | if ( ${$txt_ref} =~ m{https?:}smx ) { | |||
| 348 | 908 | ||||||
| 170 | |||||||
| 171 | # Replace escaped characters in URL with their ASCII equivalents | ||||||
| 172 | # Regexp::Common escapes in path part, but not in host part, which appears correct | ||||||
| 173 | # per the RFC. However, the Spamassassin folks use it in the host. | ||||||
| 174 | # $escaped is defined by Regexp::Common::URI::RFC2396, and matches %xx | ||||||
| 175 | # This is done first because if needed, the host part won't be parsed correctly | ||||||
| 176 | 12 | 17 | while ( ${$txt_ref} =~ m{($escaped)}msx ) { | ||||
| 13 | 130 | ||||||
| 177 | 1 | 4 | my $esc = $1; | ||||
| 178 | 1 | 2 | my $new = $1; | ||||
| 179 | 1 | 4 | $new =~ s{%}{0x}msx; | ||||
| 180 | 1 | 7 | $new = e2char($new); | ||||
| 181 | 1 | 22 | ${$txt_ref} =~ s{$esc}{$new}gmsx; | ||||
| 1 | 70 | ||||||
| 182 | } | ||||||
| 183 | |||||||
| 184 | # target='_blank' causes load to a new window or tab | ||||||
| 185 | # See HTML 4.01 spec, section 6.16 Frame target names | ||||||
| 186 | # Doing this because URI RE grabs non-word trailing characters | ||||||
| 187 | # ${$txt_ref} =~ m{$RE{URI}{HTTP}{-keep}{-scheme=>'https?'}}mx; | ||||||
| 188 | # my $uri = $1; | ||||||
| 189 | # my $host = $3; | ||||||
| 190 | # $uri =~ s{[^/\w]+\z}{}mx; | ||||||
| 191 | # ${$txt_ref} =~ s{$uri}{$host}mx; | ||||||
| 192 | 12 | 17 | ${$txt_ref} | ||||
| 12 | 64 | ||||||
| 193 | =~ s{$RE{URI}{HTTP}{-keep}{-scheme=>'https?'}}{$3}gsmx; | ||||||
| 194 | |||||||
| 195 | 12 | 3419 | return; | ||||
| 196 | } | ||||||
| 197 | |||||||
| 198 | 336 | 100 | 362 | if ( ${$txt_ref} =~ m{ftp:}smx ) { | |||
| 336 | 12682 | ||||||
| 199 | 2 | 4 | ${$txt_ref} =~ s{$RE{URI}{FTP}{-keep}}{$5}gsmx; | ||||
| 2 | 12 | ||||||
| 200 | 2 | 892 | return; | ||||
| 201 | } | ||||||
| 202 | |||||||
| 203 | 334 | 100 | 414 | if ( ${$txt_ref} =~ m{file:}smx ) { | |||
| 334 | 747 | ||||||
| 204 | 2 | 4 | ${$txt_ref} =~ s{$RE{URI}{file}{-keep}}{$3}gsmx; | ||||
| 2 | 12 | ||||||
| 205 | 2 | 506 | return; | ||||
| 206 | } | ||||||
| 207 | |||||||
| 208 | 332 | 100 | 426 | if ( ${$txt_ref} =~ m{$MAIL_RE}smx ) { | |||
| 332 | 1356 | ||||||
| 209 | 4 | 9 | ${$txt_ref} =~ s{mailto://}{}smx; | ||||
| 4 | 13 | ||||||
| 210 | 4 | 5 | ${$txt_ref} =~ s{($MAIL_RE)}{$1}gsmx; | ||||
| 4 | 276 | ||||||
| 211 | 4 | 11 | return; | ||||
| 212 | } | ||||||
| 213 | |||||||
| 214 | 328 | 516 | return; | ||||
| 215 | } | ||||||
| 216 | |||||||
| 217 | ########### | ||||||
| 218 | # COMMAND # | ||||||
| 219 | ########### | ||||||
| 220 | |||||||
| 221 | # Index levels, which translate into indentation in the index | ||||||
| 222 | Readonly::Scalar my $LEVEL1 => 1; | ||||||
| 223 | Readonly::Scalar my $LEVEL2 => 2; | ||||||
| 224 | Readonly::Scalar my $LEVEL3 => 3; | ||||||
| 225 | Readonly::Scalar my $LEVEL4 => 4; | ||||||
| 226 | Readonly::Scalar my $LEVELL => 0; | ||||||
| 227 | |||||||
| 228 | # Overrides command() provided by base class in Pod::Parser | ||||||
| 229 | sub command { | ||||||
| 230 | 129 | 129 | 0 | 258 | my ( $parser, $command, $paragraph, $line_num, $pod ) = @_; | ||
| 231 | |||||||
| 232 | 129 | 100 | 367 | if ( defined $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER} ) { | |||
| 233 | 89 | 199 | _verbatim($parser); | ||||
| 234 | } # [6062] | ||||||
| 235 | |||||||
| 236 | 129 | 9244 | my $expansion = $parser->interpolate( $paragraph, $line_num ); | ||||
| 237 | |||||||
| 238 | 129 | 897 | $expansion =~ s{$RE{ws}{crop}}{}gsmx; # delete surrounding whitespace | ||||
| 239 | |||||||
| 240 | # Encoding puts in a NUL; we're finished with the text, so remove them | ||||||
| 241 | 129 | 25664 | _encode_entities( \$expansion ); | ||||
| 242 | 129 | 284 | _remove_nul_escapes( \$expansion ); | ||||
| 243 | |||||||
| 244 | 129 | 167 | my $html; | ||||
| 245 | 4 | 4 | 31 | no warnings; # 'experimental' | |||
| 4 | 7 | ||||||
| 4 | 3090 | ||||||
| 246 | 129 | 187 | given ($command) { | ||||
| 247 | 129 | 412 | when (q{head1}) { | ||||
| 248 | 19 | 62 | _add_index( $parser, $expansion, $LEVEL1 ); | ||||
| 249 | 19 | 106 | $html = $parser->{POD_HTMLEASY} | ||||
| 250 | ->{ON_HEAD1}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 251 | } | ||||||
| 252 | 110 | 159 | when (q{head2}) { | ||||
| 253 | 11 | 34 | _add_index( $parser, $expansion, $LEVEL2 ); | ||||
| 254 | 11 | 65 | $html = $parser->{POD_HTMLEASY} | ||||
| 255 | ->{ON_HEAD2}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 256 | } | ||||||
| 257 | 99 | 139 | when (q{head3}) { | ||||
| 258 | 3 | 11 | _add_index( $parser, $expansion, $LEVEL3 ); | ||||
| 259 | 3 | 21 | $html = $parser->{POD_HTMLEASY} | ||||
| 260 | ->{ON_HEAD3}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 261 | } | ||||||
| 262 | 96 | 132 | when (q{head4}) { | ||||
| 263 | 3 | 10 | _add_index( $parser, $expansion, $LEVEL4 ); | ||||
| 264 | 3 | 20 | $html = $parser->{POD_HTMLEASY} | ||||
| 265 | ->{ON_HEAD4}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 266 | } | ||||||
| 267 | 93 | 117 | when (q{begin}) { | ||||
| 268 | 2 | 24 | $html = $parser->{POD_HTMLEASY} | ||||
| 269 | ->{ON_BEGIN}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 270 | } | ||||||
| 271 | 91 | 129 | when (q{end}) { | ||||
| 272 | 2 | 26 | $html = $parser->{POD_HTMLEASY} | ||||
| 273 | ->{ON_END}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 274 | } | ||||||
| 275 | 89 | 127 | when (q{over}) { | ||||
| 276 | 5 | 29 | $html = $parser->{POD_HTMLEASY} | ||||
| 277 | ->{ON_OVER}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 278 | } | ||||||
| 279 | 84 | 134 | when (q{item}) { | ||||
| 280 | |||||||
| 281 | # Items that begin with '* ' are ugly. Is it there for pod2man? | ||||||
| 282 | # Which is not the same as _only_ '*' | ||||||
| 283 | 33 | 181 | $expansion =~ s{\A\*\s+}{}msx; | ||||
| 284 | |||||||
| 285 | 33 | 100 | 156 | if ( $parser->{INDEX_ITEM} ) { | |||
| 286 | 16 | 53 | _add_index( $parser, $expansion, $LEVELL ); | ||||
| 287 | } | ||||||
| 288 | |||||||
| 289 | # This is for the folks who use =item to list URLs | ||||||
| 290 | 33 | 50 | 107 | if ( $expansion !~ m{ | |||
| 291 | |||||||
| 292 | # The URI's not already encoded (L<...> is already processed) | ||||||
| 293 | 33 | 80 | _add_uri_href( \$expansion ); | ||||
| 294 | } | ||||||
| 295 | 33 | 163 | $html = $parser->{POD_HTMLEASY} | ||||
| 296 | ->{ON_ITEM}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 297 | } | ||||||
| 298 | 51 | 87 | when (q{back}) { | ||||
| 299 | 5 | 30 | $html = $parser->{POD_HTMLEASY} | ||||
| 300 | ->{ON_BACK}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 301 | } | ||||||
| 302 | 46 | 74 | when (q{for}) { | ||||
| 303 | 1 | 6 | $html = $parser->{POD_HTMLEASY} | ||||
| 304 | ->{ON_FOR}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 305 | } | ||||||
| 306 | 45 | 61 | default { | ||||
| 307 | 45 | 50 | 566 | if ( defined $parser->{POD_HTMLEASY}->{qq{ON_\U$command\E}} ) { | |||
| 50 | |||||||
| 308 | 0 | 0 | $html | ||||
| 309 | = $parser->{POD_HTMLEASY} | ||||||
| 310 | ->{qq{ON_\U$command\E}}( $parser->{POD_HTMLEASY}, | ||||||
| 311 | $expansion ); | ||||||
| 312 | } | ||||||
| 313 | elsif ( $command !~ /^(?:pod|cut)$/imsx ) { | ||||||
| 314 | 0 | 0 | $html = qq{=$command $expansion}; |
||||
| 315 | } | ||||||
| 316 | 45 | 133 | else { $html = EMPTY; } | ||||
| 317 | } | ||||||
| 318 | }; | ||||||
| 319 | 4 | 4 | 35 | use warnings; | |||
| 4 | 8 | ||||||
| 4 | 3255 | ||||||
| 320 | |||||||
| 321 | 129 | 100 | 360 | if ( $html ne EMPTY ) { | |||
| 322 | 73 | 92 | push @{ $parser->{POD_HTMLEASY}->{HTML} }, $html; | ||||
| 73 | 211 | ||||||
| 323 | } | ||||||
| 324 | |||||||
| 325 | 129 | 5567 | return; | ||||
| 326 | } | ||||||
| 327 | |||||||
| 328 | ############ | ||||||
| 329 | # VERBATIM # | ||||||
| 330 | ############ | ||||||
| 331 | |||||||
| 332 | # Overrides verbatim() provided by base class in Pod::Parser | ||||||
| 333 | sub verbatim { | ||||||
| 334 | 12 | 12 | 0 | 21 | my ( $parser, $paragraph, $line_num ) = @_; | ||
| 335 | |||||||
| 336 | 12 | 50 | 36 | if ( exists $parser->{POD_HTMLEASY}->{IN_BEGIN} ) { return; } | |||
| 0 | 0 | ||||||
| 337 | 12 | 23 | $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER} .= $paragraph; | ||||
| 338 | |||||||
| 339 | 12 | 260 | return; | ||||
| 340 | } | ||||||
| 341 | |||||||
| 342 | sub _verbatim { | ||||||
| 343 | 209 | 209 | 294 | my ($parser) = @_; | |||
| 344 | |||||||
| 345 | 209 | 100 | 619 | if ( exists $parser->{POD_HTMLEASY}->{IN_BEGIN} ) { return; } | |||
| 1 | 3 | ||||||
| 346 | 208 | 380 | my $expansion = $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER}; | ||||
| 347 | 208 | 514 | $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER} = EMPTY; | ||||
| 348 | |||||||
| 349 | 208 | 447 | _encode_entities( \$expansion ); | ||||
| 350 | |||||||
| 351 | # If we had "=item *", we should now be looking at the text that will | ||||||
| 352 | # appear as the item. The "*" was passed over initially, so we need | ||||||
| 353 | # the text to index. Save the flag as ON_VERBATIM deletes IN_ITEM | ||||||
| 354 | |||||||
| 355 | 208 | 100 | 723 | my $add_index = $parser->{INDEX_ITEM} && $parser->{POD_HTMLEASY}{IN_ITEM}; | |||
| 356 | |||||||
| 357 | 208 | 840 | my $html = $parser->{POD_HTMLEASY} | ||||
| 358 | ->{ON_VERBATIM}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 359 | |||||||
| 360 | # Now look for any embedded URIs | ||||||
| 361 | 208 | 445 | _add_uri_href( \$html ); | ||||
| 362 | |||||||
| 363 | # And remove any NUL escapes | ||||||
| 364 | 208 | 448 | _remove_nul_escapes( \$html ); | ||||
| 365 | |||||||
| 366 | 208 | 100 | 515 | if ( $html ne EMPTY ) { | |||
| 367 | 4 | 50 | 12 | if ($add_index) { _add_index( $parser, $expansion, $LEVELL ); } | |||
| 0 | 0 | ||||||
| 368 | 4 | 7 | push @{ $parser->{POD_HTMLEASY}->{HTML} }, $html; | ||||
| 4 | 14 | ||||||
| 369 | } # [6062] | ||||||
| 370 | |||||||
| 371 | 208 | 391 | return; | ||||
| 372 | } | ||||||
| 373 | |||||||
| 374 | ############# | ||||||
| 375 | # TEXTBLOCK # | ||||||
| 376 | ############# | ||||||
| 377 | |||||||
| 378 | # Overrides textblock() provided by base class in Pod::Parser | ||||||
| 379 | sub textblock { | ||||||
| 380 | 108 | 108 | 0 | 271 | my ( $parser, $paragraph, $line_num ) = @_; | ||
| 381 | |||||||
| 382 | 108 | 100 | 539 | if ( exists $parser->{POD_HTMLEASY}->{IN_BEGIN} ) { return; } | |||
| 1 | 26 | ||||||
| 383 | 107 | 100 | 294 | if ( defined $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER} ) { | |||
| 384 | 94 | 179 | _verbatim($parser); | ||||
| 385 | } # [6062] | ||||||
| 386 | |||||||
| 387 | 107 | 10538 | my $expansion = $parser->interpolate( $paragraph, $line_num ); | ||||
| 388 | |||||||
| 389 | 107 | 647 | $expansion =~ s{$RE{ws}{crop}}{}gsmx; # delete surrounding whitespace | ||||
| 390 | 107 | 20917 | $expansion =~ s{\s+$}{}gsmx; | ||||
| 391 | |||||||
| 392 | # Encode HTML-specific characters before adding any HTML (eg ) |
||||||
| 393 | 107 | 238 | _encode_entities( \$expansion ); | ||||
| 394 | |||||||
| 395 | # If we had "=item *", we should now be looking at the text that will | ||||||
| 396 | # appear as the item. The "*" was passed over initially, so we need | ||||||
| 397 | # the text to index. Save the flag as ON_TEXTBLOCK deletes IN_ITEM | ||||||
| 398 | |||||||
| 399 | 107 | 100 | 358 | my $add_index = $parser->{INDEX_ITEM} && $parser->{POD_HTMLEASY}{IN_ITEM}; | |||
| 400 | |||||||
| 401 | 107 | 499 | my $html = $parser->{POD_HTMLEASY} | ||||
| 402 | ->{ON_TEXTBLOCK}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
| 403 | |||||||
| 404 | # Now look for any embedded URIs | ||||||
| 405 | 107 | 254 | _add_uri_href( \$html ); | ||||
| 406 | |||||||
| 407 | # And remove any NUL escapes | ||||||
| 408 | 107 | 781 | _remove_nul_escapes( \$html ); | ||||
| 409 | |||||||
| 410 | 107 | 50 | 347 | if ( $html ne EMPTY ) { | |||
| 411 | 107 | 100 | 210 | if ($add_index) { _add_index( $parser, $expansion, $LEVELL ); } | |||
| 4 | 14 | ||||||
| 412 | 107 | 112 | push @{ $parser->{POD_HTMLEASY}->{HTML} }, $html; | ||||
| 107 | 324 | ||||||
| 413 | } | ||||||
| 414 | |||||||
| 415 | 107 | 3990 | return; | ||||
| 416 | } | ||||||
| 417 | |||||||
| 418 | ##################### | ||||||
| 419 | # INTERIOR_SEQUENCE # | ||||||
| 420 | ##################### | ||||||
| 421 | |||||||
| 422 | # Overrides interior_sequence() provided by base class in Pod::Parser | ||||||
| 423 | sub interior_sequence { | ||||||
| 424 | 89 | 89 | 0 | 255 | my ( $parser, $seq_command, $seq_argument, $pod_seq ) = @_; | ||
| 425 | |||||||
| 426 | 89 | 95 | my $ret; | ||||
| 427 | |||||||
| 428 | # Encode HTML-specific characters before adding any HTML (eg ) |
||||||
| 429 | 89 | 100 | 215 | if ( $seq_command ne q{L} ) { | |||
| 430 | 47 | 235 | _encode_entities( \$seq_argument ); | ||||
| 431 | } | ||||||
| 432 | |||||||
| 433 | 4 | 4 | 27 | no warnings; # 'experimental' | |||
| 4 | 8 | ||||||
| 4 | 2098 | ||||||
| 434 | 89 | 135 | given ($seq_command) { | ||||
| 435 | 89 | 464 | when (q{B}) { | ||||
| 436 | 3 | 24 | $ret = $parser->{POD_HTMLEASY} | ||||
| 437 | ->{ON_B}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
| 438 | } | ||||||
| 439 | 86 | 119 | when (q{C}) { | ||||
| 440 | 18 | 81 | $ret = $parser->{POD_HTMLEASY} | ||||
| 441 | ->{ON_C}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
| 442 | } | ||||||
| 443 | 68 | 92 | when (q{E}) { | ||||
| 444 | 11 | 46 | $ret = $parser->{POD_HTMLEASY} | ||||
| 445 | ->{ON_E}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
| 446 | } | ||||||
| 447 | 57 | 83 | when (q{F}) { | ||||
| 448 | 1 | 6 | $ret = $parser->{POD_HTMLEASY} | ||||
| 449 | ->{ON_F}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
| 450 | } | ||||||
| 451 | 56 | 448 | when (q{I}) { | ||||
| 452 | 5 | 32 | $ret = $parser->{POD_HTMLEASY} | ||||
| 453 | ->{ON_I}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
| 454 | } | ||||||
| 455 | 51 | 91 | when (q{L}) { | ||||
| 456 | |||||||
| 457 | # L<> causes problems, but not with parselink. | ||||||
| 458 | 42 | 50 | 122 | if ( $seq_argument eq EMPTY ) { | |||
| 459 | 0 | 0 | _errors( $parser, q{Empty L<>} ); | ||||
| 460 | 0 | 0 | return EMPTY; | ||||
| 461 | } | ||||||
| 462 | 42 | 136 | my @parsed = Pod::ParseLink::parselink($seq_argument); | ||||
| 463 | 42 | 1375 | foreach (@parsed) { | ||||
| 464 | 210 | 100 | 419 | if ( defined $_ ) { _encode_entities( \$_ ); } | |||
| 147 | 308 | ||||||
| 465 | } | ||||||
| 466 | |||||||
| 467 | # Encoding handled in ON_L() | ||||||
| 468 | 42 | 240 | $ret = $parser->{POD_HTMLEASY} | ||||
| 469 | ->{ON_L}( $parser->{POD_HTMLEASY}, @parsed ); | ||||||
| 470 | } | ||||||
| 471 | 9 | 14 | when (q{S}) { | ||||
| 472 | 5 | 26 | $ret = $parser->{POD_HTMLEASY} | ||||
| 473 | ->{ON_S}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
| 474 | } | ||||||
| 475 | 4 | 7 | when (q{Z}) { | ||||
| 476 | 2 | 12 | $ret = $parser->{POD_HTMLEASY} | ||||
| 477 | ->{ON_Z}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
| 478 | } | ||||||
| 479 | 2 | 6 | default { | ||||
| 480 | 2 | 50 | 16 | if ( defined $parser->{POD_HTMLEASY}->{qq{ON_\U$seq_command\E}} ) | |||
| 481 | { | ||||||
| 482 | 2 | 12 | $ret | ||||
| 483 | = $parser->{POD_HTMLEASY} | ||||||
| 484 | ->{qq{ON_\U$seq_command\E}}( $parser->{POD_HTMLEASY}, | ||||||
| 485 | $seq_argument ); | ||||||
| 486 | } | ||||||
| 487 | else { | ||||||
| 488 | 0 | 0 | $ret = qq{$seq_command<$seq_argument>}; | ||||
| 489 | } | ||||||
| 490 | } | ||||||
| 491 | } | ||||||
| 492 | 4 | 4 | 25 | use warnings; | |||
| 4 | 7 | ||||||
| 4 | 5004 | ||||||
| 493 | |||||||
| 494 | # Escape HTML-significant characters | ||||||
| 495 | 89 | 380 | _nul_escape( \$ret ); | ||||
| 496 | |||||||
| 497 | 89 | 7885 | return $ret; | ||||
| 498 | } | ||||||
| 499 | |||||||
| 500 | ######################## | ||||||
| 501 | # PREPROCESS_PARAGRAPH # | ||||||
| 502 | ######################## | ||||||
| 503 | |||||||
| 504 | Readonly::Scalar my $INFO_DONE => 3; | ||||||
| 505 | |||||||
| 506 | # Overrides preprocess_paragraph() provided by base class in Pod::Parser | ||||||
| 507 | # NB: the text is _not_ altered. | ||||||
| 508 | sub preprocess_paragraph { | ||||||
| 509 | 297 | 297 | 0 | 478 | my ( $parser, $text, $line_num ) = @_; | ||
| 510 | |||||||
| 511 | 297 | 50 | 989 | if ( $parser->{POD_HTMLEASY}{INFO_COUNT} == $INFO_DONE ) { | |||
| 512 | 0 | 0 | return $text; | ||||
| 513 | } | ||||||
| 514 | |||||||
| 515 | 297 | 100 | 776 | if ( not exists $parser->{POD_HTMLEASY}{PACKAGE} ) { | |||
| 516 | 295 | 100 | 1047 | if ( $text =~ m{package}smx ) { | |||
| 517 | 1 | 7 | my ($pack) = $text =~ m{package\s+(\w+(?:::\w+)*)}smx; | ||||
| 518 | 1 | 50 | 4 | if ( defined $pack ) { | |||
| 519 | 1 | 3 | $parser->{POD_HTMLEASY}{PACKAGE} = $pack; | ||||
| 520 | 1 | 3 | $parser->{POD_HTMLEASY}{INFO_COUNT}++; | ||||
| 521 | } | ||||||
| 522 | } | ||||||
| 523 | } | ||||||
| 524 | |||||||
| 525 | 297 | 100 | 718 | if ( not exists $parser->{POD_HTMLEASY}{VERSION} ) { | |||
| 526 | 295 | 100 | 673 | if ( $text =~ m{VERSION}smx ) { | |||
| 527 | 1 | 11 | my ($ver) = $text =~ m{($RE{num}{decimal})}smx; | ||||
| 528 | 1 | 50 | 377 | if ( defined $ver ) { | |||
| 529 | 1 | 4 | $parser->{POD_HTMLEASY}{VERSION} = $ver; | ||||
| 530 | 1 | 4 | $parser->{POD_HTMLEASY}{INFO_COUNT}++; | ||||
| 531 | } | ||||||
| 532 | } | ||||||
| 533 | } | ||||||
| 534 | |||||||
| 535 | # This situation is created by evt_on_head1() | ||||||
| 536 | # _do_title has found nothing following =head1 NAME, so it | ||||||
| 537 | # creates ...{TITLE}, and leaves it undef, so that it will be | ||||||
| 538 | # picked up here when the paragraph following is processed. | ||||||
| 539 | 297 | 100 | 100 | 1094 | if ( ( exists $parser->{POD_HTMLEASY}{TITLE} ) | ||
| 540 | and ( not defined $parser->{POD_HTMLEASY}{TITLE} ) ) | ||||||
| 541 | { | ||||||
| 542 | 4 | 14 | my @lines = split m{\n}smx, $text; | ||||
| 543 | 4 | 9 | my $tmp_text = shift @lines; | ||||
| 544 | 4 | 100 | 13 | if ( not defined $tmp_text ) { return $text; } | |||
| 2 | 45 | ||||||
| 545 | 2 | 13 | $tmp_text =~ s{$RE{ws}{crop}}{}gsmx; # delete surrounding whitespace | ||||
| 546 | 2 | 256 | $parser->{POD_HTMLEASY}{TITLE} = $tmp_text; | ||||
| 547 | 2 | 8 | $parser->{POD_HTMLEASY}{INFO_COUNT}++; | ||||
| 548 | } | ||||||
| 549 | |||||||
| 550 | 295 | 16795 | return $text; | ||||
| 551 | } | ||||||
| 552 | |||||||
| 553 | ############## | ||||||
| 554 | # _ADD_INDEX # | ||||||
| 555 | ############## | ||||||
| 556 | |||||||
| 557 | sub _add_index { | ||||||
| 558 | 56 | 56 | 134 | my ( $parser, $txt, $level ) = @_; | |||
| 559 | |||||||
| 560 | # Don't index star items | ||||||
| 561 | 56 | 100 | 190 | if ( $txt eq q{*} ) { return; } | |||
| 4 | 15 | ||||||
| 562 | |||||||
| 563 | 52 | 100 | 135 | if ( exists $parser->{INDEX_ITEM} ) { | |||
| 564 | 23 | 38 | my $max_len = $parser->{INDEX_LENGTH}; | ||||
| 565 | 23 | 100 | 73 | if ( length $txt > $max_len ) { | |||
| 566 | 1 | 13 | while ( substr( $txt, $max_len, 1 ) ne q{ } ) { | ||||
| 567 | 5 | 7 | $max_len++; | ||||
| 568 | 5 | 50 | 12 | last if $max_len >= length $txt; | |||
| 569 | } | ||||||
| 570 | 1 | 50 | 4 | if ( $max_len < length $txt ) { | |||
| 571 | 1 | 3 | $txt = substr( $txt, 0, $max_len ) . "..."; | ||||
| 572 | } | ||||||
| 573 | } | ||||||
| 574 | } | ||||||
| 575 | |||||||
| 576 | 52 | 118 | _remove_nul_escapes( \$txt ); | ||||
| 577 | 52 | 186 | push @{ $parser->{POD_HTMLEASY}->{INDEX} }, [ $level, $txt ]; | ||||
| 52 | 348 | ||||||
| 578 | |||||||
| 579 | 52 | 103 | return; | ||||
| 580 | |||||||
| 581 | } | ||||||
| 582 | |||||||
| 583 | ############# | ||||||
| 584 | # BEGIN_POD # | ||||||
| 585 | ############# | ||||||
| 586 | |||||||
| 587 | # Overrides begin_pod() provided by base class in Pod::Parser | ||||||
| 588 | sub begin_pod { | ||||||
| 589 | 48 | 48 | 0 | 97 | my ($parser) = @_; | ||
| 590 | |||||||
| 591 | 48 | 274 | delete $parser->{POD_HTMLEASY}->{INDEX}; | ||||
| 592 | 48 | 135 | $parser->{POD_HTMLEASY}->{INDEX} = []; | ||||
| 593 | |||||||
| 594 | 48 | 3163 | return 1; | ||||
| 595 | } | ||||||
| 596 | |||||||
| 597 | ########### | ||||||
| 598 | # END_POD # | ||||||
| 599 | ########### | ||||||
| 600 | |||||||
| 601 | # Overrides end_pod() provided by base class in Pod::Parser | ||||||
| 602 | sub end_pod { | ||||||
| 603 | 48 | 48 | 0 | 95 | my ($parser) = @_; | ||
| 604 | |||||||
| 605 | 48 | 100 | 186 | if ( defined $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER} ) { | |||
| 606 | 26 | 59 | _verbatim($parser); | ||||
| 607 | } | ||||||
| 608 | |||||||
| 609 | 48 | 2193 | return 1; | ||||
| 610 | } | ||||||
| 611 | |||||||
| 612 | ########### | ||||||
| 613 | # _ERRORS # | ||||||
| 614 | ########### | ||||||
| 615 | |||||||
| 616 | sub _errors { | ||||||
| 617 | 0 | 0 | my ( $parser, $error ) = @_; | ||||
| 618 | |||||||
| 619 | 0 | carp "$error"; | |||||
| 620 | 0 | $error =~ s{^\s*\**\s*errors?:?\s*}{}ismx; | |||||
| 621 | 0 | $error =~ s{\s+$}{}smx; | |||||
| 622 | |||||||
| 623 | 0 | my $html = $parser->{POD_HTMLEASY} | |||||
| 624 | ->{ON_ERROR}( $parser->{POD_HTMLEASY}, $error ); | ||||||
| 625 | 0 | 0 | if ( $html ne EMPTY ) { | ||||
| 626 | 0 | push @{ $parser->{POD_HTMLEASY}->{HTML} }, $html; | |||||
| 0 | |||||||
| 627 | } | ||||||
| 628 | |||||||
| 629 | 0 | return 1; | |||||
| 630 | } | ||||||
| 631 | |||||||
| 632 | ########### | ||||||
| 633 | # DESTROY # | ||||||
| 634 | ########### | ||||||
| 635 | |||||||
| 636 | 0 | 0 | sub DESTROY { } | ||||
| 637 | |||||||
| 638 | ####### | ||||||
| 639 | # END # | ||||||
| 640 | ####### | ||||||
| 641 | |||||||
| 642 | 1; | ||||||
| 643 |