line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MojoMojo::Formatter::TOC; |
2
|
|
|
|
|
|
|
|
3
|
38
|
|
|
38
|
|
52308
|
use parent qw/MojoMojo::Formatter/; |
|
38
|
|
|
|
|
314
|
|
|
38
|
|
|
|
|
209
|
|
4
|
|
|
|
|
|
|
|
5
|
38
|
|
|
38
|
|
2820
|
use HTML::Entities; |
|
38
|
|
|
|
|
9553
|
|
|
38
|
|
|
|
|
1919
|
|
6
|
38
|
|
|
38
|
|
1110
|
use Encode; |
|
38
|
|
|
|
|
15570
|
|
|
38
|
|
|
|
|
15494
|
|
7
|
|
|
|
|
|
|
|
8
|
38
|
|
|
38
|
|
15472
|
eval "use HTML::Toc;use HTML::TocInsertor;"; |
|
38
|
|
|
38
|
|
82853
|
|
|
38
|
|
|
|
|
1026
|
|
|
38
|
|
|
|
|
17558
|
|
|
38
|
|
|
|
|
338230
|
|
|
38
|
|
|
|
|
717
|
|
9
|
|
|
|
|
|
|
my $eval_res = $@; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head2 module_loaded |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Return true if the module is loaded. |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=cut |
16
|
|
|
|
|
|
|
|
17
|
303
|
50
|
|
303
|
1
|
1491
|
sub module_loaded { $eval_res ? 0 : 1 } |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 NAME |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
MojoMojo::Formatter::TOC - generate table of contents |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 DESCRIPTION |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This formatter will replace C<{{toc}}> with a table of contents, using |
26
|
|
|
|
|
|
|
HTML::GenToc. If you don't want an element to be included in the TOC, |
27
|
|
|
|
|
|
|
make it have C<class="notoc"> |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 METHODS |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head2 format_content_order |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
The TOC formatter expects HTML input so it needs to run after the main |
34
|
|
|
|
|
|
|
formatter. Since comment-type formatters (order 91) could add a heading |
35
|
|
|
|
|
|
|
for the comment section, the TOC formatter will run with a priority of 95. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=cut |
38
|
|
|
|
|
|
|
|
39
|
868
|
|
|
868
|
1
|
2653
|
sub format_content_order { 95 } |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head2 format_content |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Calls the formatter. Takes a ref to the content as well as the context object. |
44
|
|
|
|
|
|
|
The syntax for the TOC plugin invocation is: |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
{{toc M- }} # start from Header level M |
47
|
|
|
|
|
|
|
{{toc -N }} # stop at Header level N |
48
|
|
|
|
|
|
|
{{toc M-N }} # process only header levels M..N |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
where M is the minimum heading level to include in the TOC, and N is the |
51
|
|
|
|
|
|
|
maximum level (depth). For example, suppose you only have one H1 on the page |
52
|
|
|
|
|
|
|
so it doesn't make sense to add it to the TOC; also, assume you and don't want |
53
|
|
|
|
|
|
|
to include any headers smaller than H3. The {{toc}} markup to achieve that would be: |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
{{toc 2-3}} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
Defaults to 1-6. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=cut |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub format_content { |
62
|
132
|
|
|
132
|
1
|
54176
|
my ( $class, $content ) = @_; |
63
|
132
|
50
|
|
|
|
591
|
return unless $class->module_loaded; |
64
|
132
|
|
|
|
|
609
|
my $toc_params_RE = qr/\s+ (\d+)? \s* - \s* (\d+)?/x; |
65
|
132
|
|
|
|
|
1936
|
while ( |
66
|
|
|
|
|
|
|
# replace the {{toc ..}} markup tag and parse potential parameters |
67
|
|
|
|
|
|
|
$$content =~ s[ |
68
|
|
|
|
|
|
|
\{\{ toc (?:$toc_params_RE)? \s* \/? }} |
69
|
|
|
|
|
|
|
][<div class="toc">\n<!--mojomojoTOCwillgohere-->\n</div>]ix) { |
70
|
12
|
|
|
|
|
33
|
my ($toc_h_min, $toc_h_max); |
71
|
12
|
|
100
|
|
|
62
|
$toc_h_min = $1 || 1; |
72
|
12
|
|
100
|
|
|
54
|
$toc_h_max = $2 || 9; # in practice, there are no more than 6 heading levels |
73
|
12
|
50
|
|
|
|
46
|
$toc_h_min = 9 if $toc_h_min > 9; # prevent TocGenerator error for headings >= 10 |
74
|
12
|
50
|
33
|
|
|
60
|
$toc_h_max = 9 if $toc_h_max > 9 or $toc_h_max < $toc_h_min; # {{toc 3-1}} is wrong; make it {{toc 3-9}} instead |
75
|
|
|
|
|
|
|
|
76
|
12
|
|
|
|
|
68
|
my $toc = HTML::Toc->new(); |
77
|
12
|
|
|
|
|
663
|
my $tocInsertor = HTML::TocInsertor->new(); |
78
|
|
|
|
|
|
|
|
79
|
12
|
|
|
|
|
1914
|
$toc->setOptions({ |
80
|
|
|
|
|
|
|
header => '', # by default, \n<!-- Table of Contents generated by Perl - HTML::Toc -->\n |
81
|
|
|
|
|
|
|
footer => '', |
82
|
|
|
|
|
|
|
insertionPoint => 'replace <!--mojomojoTOCwillgohere-->', |
83
|
|
|
|
|
|
|
doLinkToId => 0, |
84
|
|
|
|
|
|
|
levelToToc => "[$toc_h_min-$toc_h_max]", |
85
|
|
|
|
|
|
|
templateAnchorName => \&assembleAnchorName, |
86
|
|
|
|
|
|
|
}); |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# http://search.cpan.org/dist/HTML-Toc/Toc.pod#HTML::TocInsertor::insert() |
89
|
12
|
|
|
|
|
261
|
$tocInsertor->insert($toc, $$content, {output => $content}); |
90
|
|
|
|
|
|
|
|
91
|
12
|
|
|
|
|
18101
|
return 1; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=head2 SEO-friendly anchors |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Anchors should be generated with SEO- (and human-) friendly names, i.e. out of the entire |
98
|
|
|
|
|
|
|
token text, instead of being numeric or reduced to the first word(s) of the token. |
99
|
|
|
|
|
|
|
In the spirit of http://seo2.0.onreact.com/top-10-fatal-url-design-mistakes, compare: |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
http://beachfashion.com/photos/Pamela_Anderson#In_red_swimsuit_in_Baywatch |
102
|
|
|
|
|
|
|
vs. |
103
|
|
|
|
|
|
|
http://beachfashion.com/photos/Pamela_Anderson#in |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
"Which one speaks your language more, which one will you rather click?" |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
The anchor names generated are compliant with XHTML1.0 Strict. Also, per the |
108
|
|
|
|
|
|
|
HTML 4.01 spec, anchor names should be restricted to ASCII characters and |
109
|
|
|
|
|
|
|
anchors that differ only in case may not appear in the same document. In |
110
|
|
|
|
|
|
|
particular, an anchor name may be defined only once in a document (logically, |
111
|
|
|
|
|
|
|
because otherwise the user agent wouldn't know which #foo to scroll to). |
112
|
|
|
|
|
|
|
This is currently a problem with L<HTML::Toc> v1.11, which doesn't have |
113
|
|
|
|
|
|
|
support for passing the already existing anchors to the C<templateAnchorName> |
114
|
|
|
|
|
|
|
sub. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=head2 assembleAnchorName |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
http://search.cpan.org/dist/HTML-Toc/Toc.pod#templateAnchorName |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub assembleAnchorName { |
123
|
49
|
|
|
49
|
1
|
61192
|
my ($aFile, $aGroupId, $aLevel, $aNode, $text, $children) = @_; |
124
|
|
|
|
|
|
|
|
125
|
49
|
50
|
|
|
|
246
|
if ($text !~ /^\s*$/) { |
126
|
|
|
|
|
|
|
# generate a SEO-friendly anchor right from the token content |
127
|
|
|
|
|
|
|
# The allowed character set is limited first by the URI specification for fragments, http://tools.ietf.org/html/rfc3986#section-2: characters |
128
|
|
|
|
|
|
|
# then by the limitations of the values of 'id' and 'name' attributes: http://www.w3.org/TR/REC-html40/types.html#type-name |
129
|
|
|
|
|
|
|
# Eventually, the only punctuation allowed in id values is [_.:-] |
130
|
|
|
|
|
|
|
# Unicode characters with code points > 0x7E (e.g. Chinese characters) are allowed (test "<h1 id="????">header</h1>" at http://validator.w3.org/#validate_by_input+with_options), except for smart quotes (!), see http://www.w3.org/Search/Mail/Public/search?type-index=www-validator&index-type=t&keywords=[VE][122]+smart+quotes&search=Search+Mail+Archives |
131
|
|
|
|
|
|
|
# However, that contradicts the HTML 4.01 spec: "Anchor names should be restricted to ASCII characters." - http://www.w3.org/TR/REC-html40/struct/links.html#h-12.2.1 |
132
|
|
|
|
|
|
|
# ...and the [A-Za-z] class of letters mentioned at http://www.w3.org/TR/REC-html40/types.html#type-name |
133
|
|
|
|
|
|
|
# Finally, note that pod2html fails miserably to generate XHTML-compliant anchor links. See http://validator.w3.org/check?uri=http%3A%2F%2Fsearch.cpan.org%2Fdist%2FCatalyst-Runtime%2Flib%2FCatalyst%2FRequest.pm&charset=(detect+automatically)&doctype=XHTML+1.0+Transitional&group=0&user-agent=W3C_Validator%2F1.606 |
134
|
49
|
|
|
|
|
217
|
$text =~ s/\s/_/g; |
135
|
49
|
|
|
|
|
226
|
decode_entities($text); # we need to replace [#&;] only when they are NOT part of an HTML entity. decode_entities saves us from crafting a nasty regexp |
136
|
49
|
|
|
|
|
158
|
$text = encode('utf-8', $text); # convert to UTF-8 because we need to output the UTF-8 bytes |
137
|
49
|
|
|
|
|
2650
|
$text =~ s/([^A-Za-z0-9_:.-])/sprintf('.%02X', ord($1))/eg; # MediaWiki also uses the period, see http://en.wikipedia.org/wiki/Hierarchies#Ethics.2C_behavioral_psychology.2C_philosophies_of_identity |
|
69
|
|
|
|
|
203
|
|
138
|
49
|
100
|
|
|
|
163
|
$text = 'L'.$text if $text =~ /\A\W/; # "ID and NAME tokens must begin with a letter ([A-Za-z])" -- http://www.w3.org/TR/html4/types.html#type-name |
139
|
|
|
|
|
|
|
} |
140
|
49
|
50
|
|
|
|
142
|
$text = 'id' if $text eq ''; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# check if the anchor already exists; if so, add a number |
143
|
|
|
|
|
|
|
# NOTE: there is no way currently to do this easily in HTML-Toc-1.10. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
#my $anch_num = 1; |
146
|
|
|
|
|
|
|
#my $word_name = $name; |
147
|
|
|
|
|
|
|
## Reference: http://www.w3.org/TR/REC-html40/struct/links.html#h-12.2.1 |
148
|
|
|
|
|
|
|
## Anchor names must be unique within a document. Anchor names that differ only in case may not appear in the same document. |
149
|
|
|
|
|
|
|
#while (grep {lc $_ eq lc $name} keys %{$args{anchors}}) { |
150
|
|
|
|
|
|
|
# # FIXME (in caller sub): to avoid the grep above, the $args{anchors} hash |
151
|
|
|
|
|
|
|
# # should have as key the lowercased anchor name, and as value its actual value (instead of '1') |
152
|
|
|
|
|
|
|
# $name = $word_name . "_$anch_num"; |
153
|
|
|
|
|
|
|
# $anch_num++; |
154
|
|
|
|
|
|
|
#} |
155
|
|
|
|
|
|
|
|
156
|
49
|
|
|
|
|
136
|
return $text; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head1 SEE ALSO |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
L<MojoMojo> and L<Module::Pluggable::Ordered>. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head1 AUTHORS |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Dan Dascalescu, L<http://dandascalescu.com> |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head1 LICENSE |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
This library is free software. You can redistribute it and/or modify |
171
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
1; |