| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTML::GenToc; |
|
2
|
|
|
|
|
|
|
BEGIN { |
|
3
|
3
|
|
|
3
|
|
68896
|
$HTML::GenToc::VERSION = '3.20'; |
|
4
|
|
|
|
|
|
|
} |
|
5
|
3
|
|
|
3
|
|
29
|
use strict; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
156
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
HTML::GenToc - Generate a Table of Contents for HTML documents. |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 VERSION |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
version 3.20 |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use HTML::GenToc; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# create a new object |
|
20
|
|
|
|
|
|
|
my $toc = new HTML::GenToc(); |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $toc = new HTML::GenToc(title=>"Table of Contents", |
|
23
|
|
|
|
|
|
|
toc_entry=>{ |
|
24
|
|
|
|
|
|
|
H1=>1, |
|
25
|
|
|
|
|
|
|
H2=>2 |
|
26
|
|
|
|
|
|
|
}, |
|
27
|
|
|
|
|
|
|
toc_end=>{ |
|
28
|
|
|
|
|
|
|
H1=>'/H1', |
|
29
|
|
|
|
|
|
|
H2=>'/H2' |
|
30
|
|
|
|
|
|
|
} |
|
31
|
|
|
|
|
|
|
); |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# generate a ToC from a file |
|
34
|
|
|
|
|
|
|
$toc->generate_toc(input=>$html_file, |
|
35
|
|
|
|
|
|
|
footer=>$footer_file, |
|
36
|
|
|
|
|
|
|
header=>$header_file |
|
37
|
|
|
|
|
|
|
); |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
HTML::GenToc generates anchors and a table of contents for |
|
43
|
|
|
|
|
|
|
HTML documents. Depending on the arguments, it will insert |
|
44
|
|
|
|
|
|
|
the information it generates, or output to a string, a separate file |
|
45
|
|
|
|
|
|
|
or STDOUT. |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
While it defaults to taking H1 and H2 elements as the significant |
|
48
|
|
|
|
|
|
|
elements to put into the table of contents, any tag can be defined |
|
49
|
|
|
|
|
|
|
as a significant element. Also, it doesn't matter if the input |
|
50
|
|
|
|
|
|
|
HTML code is complete, pure HTML, one can input pseudo-html |
|
51
|
|
|
|
|
|
|
or page-fragments, which makes it suitable for using on templates |
|
52
|
|
|
|
|
|
|
and HTML meta-languages such as WML. |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Also included in the distrubution is hypertoc, a script which uses the |
|
55
|
|
|
|
|
|
|
module so that one can process files on the command-line in a |
|
56
|
|
|
|
|
|
|
user-friendly manner. |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head1 DETAILS |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
The ToC generated is a multi-level level list containing links to the |
|
61
|
|
|
|
|
|
|
significant elements. HTML::GenToc inserts the links into the ToC to |
|
62
|
|
|
|
|
|
|
significant elements at a level specified by the user. |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
B |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
If H1s are specified as level 1, than they appear in the first |
|
67
|
|
|
|
|
|
|
level list of the ToC. If H2s are specified as a level 2, than |
|
68
|
|
|
|
|
|
|
they appear in a second level list in the ToC. |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
Information on the significant elements and what level they should occur |
|
71
|
|
|
|
|
|
|
are passed in to the methods used by this object, or one can use the |
|
72
|
|
|
|
|
|
|
defaults. |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
There are two phases to the ToC generation. The first phase is to |
|
75
|
|
|
|
|
|
|
put suitable anchors into the HTML documents, and the second phase |
|
76
|
|
|
|
|
|
|
is to generate the ToC from HTML documents which have anchors |
|
77
|
|
|
|
|
|
|
in them for the ToC to link to. |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
For more information on controlling the contents of the created ToC, see |
|
80
|
|
|
|
|
|
|
L. |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
HTML::GenToc also supports the ability to incorporate the ToC into the HTML |
|
83
|
|
|
|
|
|
|
document itself via the B option. See L for more |
|
84
|
|
|
|
|
|
|
information. |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
In order for HTML::GenToc to support linking to significant elements, |
|
87
|
|
|
|
|
|
|
HTML::GenToc inserts anchors into the significant elements. One can |
|
88
|
|
|
|
|
|
|
use HTML::GenToc as a filter, outputing the result to another file, |
|
89
|
|
|
|
|
|
|
or one can overwrite the original file, with the original backed |
|
90
|
|
|
|
|
|
|
up with a suffix (default: "org") appended to the filename. |
|
91
|
|
|
|
|
|
|
One can also output the result to a string. |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head1 METHODS |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Default arguments can be set when the object is created, and overridden |
|
96
|
|
|
|
|
|
|
by setting arguments when the generate_toc method is called. |
|
97
|
|
|
|
|
|
|
Arguments are given as a hash of arguments. |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=cut |
|
100
|
|
|
|
|
|
|
|
|
101
|
3
|
|
|
3
|
|
295009
|
use Data::Dumper; |
|
|
3
|
|
|
|
|
52080
|
|
|
|
3
|
|
|
|
|
250
|
|
|
102
|
3
|
|
|
3
|
|
3014
|
use HTML::SimpleParse; |
|
|
3
|
|
|
|
|
9713
|
|
|
|
3
|
|
|
|
|
123
|
|
|
103
|
3
|
|
|
3
|
|
3089
|
use HTML::Entities; |
|
|
3
|
|
|
|
|
31445
|
|
|
|
3
|
|
|
|
|
359
|
|
|
104
|
3
|
|
|
3
|
|
3614
|
use HTML::LinkList; |
|
|
3
|
|
|
|
|
18412
|
|
|
|
3
|
|
|
|
|
17519
|
|
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
################################################################# |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#---------------------------------------------------------------# |
|
109
|
|
|
|
|
|
|
# Object interface |
|
110
|
|
|
|
|
|
|
#---------------------------------------------------------------# |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head2 Method -- new |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
$toc = new HTML::GenToc(); |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
$toc = new HTML::GenToc(toc_entry=>\%my_toc_entry, |
|
117
|
|
|
|
|
|
|
toc_end=>\%my_toc_end, |
|
118
|
|
|
|
|
|
|
bak=>'bak', |
|
119
|
|
|
|
|
|
|
... |
|
120
|
|
|
|
|
|
|
); |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Creates a new HTML::GenToc object. |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
These arguments will be used as defaults in invocations of other methods. |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
See L for possible arguments. |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=cut |
|
129
|
|
|
|
|
|
|
sub new { |
|
130
|
5
|
|
|
5
|
1
|
3380
|
my $invocant = shift; |
|
131
|
|
|
|
|
|
|
|
|
132
|
5
|
|
33
|
|
|
44
|
my $class = ref($invocant) || $invocant; # Object or class name |
|
133
|
5
|
|
|
|
|
155
|
my $self = { |
|
134
|
|
|
|
|
|
|
debug => 0, |
|
135
|
|
|
|
|
|
|
bak => 'org', |
|
136
|
|
|
|
|
|
|
entrysep => ', ', |
|
137
|
|
|
|
|
|
|
footer => '', |
|
138
|
|
|
|
|
|
|
inline => 0, |
|
139
|
|
|
|
|
|
|
header => '', |
|
140
|
|
|
|
|
|
|
input => '', |
|
141
|
|
|
|
|
|
|
notoc_match => 'class="notoc"', |
|
142
|
|
|
|
|
|
|
ol => 0, |
|
143
|
|
|
|
|
|
|
ol_num_levels => 1, |
|
144
|
|
|
|
|
|
|
overwrite => 0, |
|
145
|
|
|
|
|
|
|
outfile => '-', |
|
146
|
|
|
|
|
|
|
quiet => 0, |
|
147
|
|
|
|
|
|
|
textonly => 0, |
|
148
|
|
|
|
|
|
|
title => 'Table of Contents', |
|
149
|
|
|
|
|
|
|
toclabel => 'Table of Contents', |
|
150
|
|
|
|
|
|
|
toc_tag => '^BODY', |
|
151
|
|
|
|
|
|
|
toc_tag_replace => 0, |
|
152
|
|
|
|
|
|
|
toc_only => 0, |
|
153
|
|
|
|
|
|
|
# define TOC entry elements |
|
154
|
|
|
|
|
|
|
toc_entry => { |
|
155
|
|
|
|
|
|
|
'H1'=>1, |
|
156
|
|
|
|
|
|
|
'H2'=>2, |
|
157
|
|
|
|
|
|
|
}, |
|
158
|
|
|
|
|
|
|
# TOC entry element terminators |
|
159
|
|
|
|
|
|
|
toc_end => { |
|
160
|
|
|
|
|
|
|
'H1'=>'/H1', |
|
161
|
|
|
|
|
|
|
'H2'=>'/H2', |
|
162
|
|
|
|
|
|
|
}, |
|
163
|
|
|
|
|
|
|
useorg => 0, |
|
164
|
|
|
|
|
|
|
@_ |
|
165
|
|
|
|
|
|
|
}; |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# bless self |
|
168
|
5
|
|
|
|
|
19
|
bless($self, $class); |
|
169
|
|
|
|
|
|
|
|
|
170
|
5
|
50
|
|
|
|
40
|
if ($self->{debug}) |
|
171
|
|
|
|
|
|
|
{ |
|
172
|
0
|
|
|
|
|
0
|
print STDERR Dumper($self); |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
|
|
175
|
5
|
|
|
|
|
33
|
return $self; |
|
176
|
|
|
|
|
|
|
} # new |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=head2 generate_toc |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
$toc->generate_toc(outfile=>"index2.html"); |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
my $result_str = $toc->generate_toc(to_string=>1); |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
Generates a table of contents for the significant elements in the HTML |
|
185
|
|
|
|
|
|
|
documents, optionally generating anchors for them first. |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
B |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=over |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=item bak |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
bak => I |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
If the input file/files is/are being overwritten (B is on), copy |
|
196
|
|
|
|
|
|
|
the original file to "I.I". If the value is empty, B |
|
197
|
|
|
|
|
|
|
backup file will be created. |
|
198
|
|
|
|
|
|
|
(default:org) |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=item debug |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
debug => 1 |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Enable verbose debugging output. Used for debugging this module; |
|
205
|
|
|
|
|
|
|
in other words, don't bother. |
|
206
|
|
|
|
|
|
|
(default:off) |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=item entrysep |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
entrysep => I |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
Separator string for non- item entries |
|
213
|
|
|
|
|
|
|
(default: ", ") |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=item filenames |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
filenames => \@filenames |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
The filenames to use when creating table-of-contents links. |
|
220
|
|
|
|
|
|
|
This overrides the filenames given in the B option, |
|
221
|
|
|
|
|
|
|
and is expected to have exactly the same number of elements. |
|
222
|
|
|
|
|
|
|
This can also be used when passing in string-content to the B |
|
223
|
|
|
|
|
|
|
option, to give a (fake) filename to use for the links relating |
|
224
|
|
|
|
|
|
|
to that content. |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item footer |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
footer => I |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Either the filename of the file containing footer text for ToC; |
|
231
|
|
|
|
|
|
|
or a string containing the footer text. |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=item header |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
header => I |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Either the filename of the file containing header text for ToC; |
|
238
|
|
|
|
|
|
|
or a string containing the header text. |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=item ignore_only_one |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
ignore_only_one => 1 |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
If there would be only one item in the ToC, don't make a ToC. |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=item ignore_sole_first |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
ignore_sole_first => 1 |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
If the first item in the ToC is of the highest level, |
|
251
|
|
|
|
|
|
|
AND it is the only one of that level, ignore it. |
|
252
|
|
|
|
|
|
|
This is useful in web-pages where there is only one H1 header |
|
253
|
|
|
|
|
|
|
but one doesn't know beforehand whether there will be only one. |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=item inline |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
inline => 1 |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
Put ToC in document at a given point. |
|
260
|
|
|
|
|
|
|
See L for more information. |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=item input |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
input => \@filenames |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
input => $content |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
This is expected to be either a reference to an array of filenames, |
|
269
|
|
|
|
|
|
|
or a string containing content to process. |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
The three main uses would be: |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=over |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=item (a) |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
you have more than one file to process, so pass in multiple filenames |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=item (b) |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
you have one file to process, so pass in its filename as the only array item |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=item (c) |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
you have HTML content to process, so pass in just the content as a string |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=back |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
(default:undefined) |
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=item notoc_match |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
notoc_match => I |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
If there are certain individual tags you don't wish to include in the |
|
296
|
|
|
|
|
|
|
table of contents, even though they match the "significant elements", |
|
297
|
|
|
|
|
|
|
then if this pattern matches contents inside the tag (not the body), |
|
298
|
|
|
|
|
|
|
then that tag will not be included, either in generating anchors nor in |
|
299
|
|
|
|
|
|
|
generating the ToC. (default: C) |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=item ol |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
ol => 1 |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
Use an ordered list for level 1 ToC entries. |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=item ol_num_levels |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
ol_num_levels => 2 |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
The number of levels deep the OL listing will go if B is true. |
|
312
|
|
|
|
|
|
|
If set to zero, will use an ordered list for all levels. |
|
313
|
|
|
|
|
|
|
(default:1) |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=item overwrite |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
overwrite => 1 |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
Overwrite the input file with the output. |
|
320
|
|
|
|
|
|
|
(default:off) |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=item outfile |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
outfile => I |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
File to write the output to. This is where the modified HTML |
|
327
|
|
|
|
|
|
|
output goes to. Note that it doesn't make sense to use this option if you |
|
328
|
|
|
|
|
|
|
are processing more than one file. If you give '-' as the filename, then |
|
329
|
|
|
|
|
|
|
output will go to STDOUT. |
|
330
|
|
|
|
|
|
|
(default: STDOUT) |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=item quiet |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
quiet => 1 |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Suppress informative messages. (default: off) |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=item textonly |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
textonly => 1 |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
Use only text content in significant elements. |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=item title |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
title => I |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
Title for ToC page (if not using B |
|
349
|
|
|
|
|
|
|
(default: "Table of Contents") |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=item toc_after |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
toc_after => \%toc_after_data |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
%toc_after_data = { I => I, |
|
356
|
|
|
|
|
|
|
I => I |
|
357
|
|
|
|
|
|
|
}; |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
toc_after => { H2=>'' } |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
For defining layout of significant elements in the ToC. |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
This expects a reference to a hash of |
|
364
|
|
|
|
|
|
|
tag=>suffix pairs. |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
The I is the HTML tag which marks the start of the element. The |
|
367
|
|
|
|
|
|
|
I is what is required to be appended to the Table of Contents |
|
368
|
|
|
|
|
|
|
entry generated for that tag. |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
(default: undefined) |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=item toc_before |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
toc_before => \%toc_before_data |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
%toc_before_data = { I => I, |
|
377
|
|
|
|
|
|
|
I => I |
|
378
|
|
|
|
|
|
|
}; |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
toc_before=>{ H2=>'' } |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
For defining the layout of significant elements in the ToC. The I |
|
383
|
|
|
|
|
|
|
is the HTML tag which marks the start of the element. The I is |
|
384
|
|
|
|
|
|
|
what is required to be prepended to the Table of Contents entry |
|
385
|
|
|
|
|
|
|
generated for that tag. |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
(default: undefined) |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=item toc_end |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
toc_end => \%toc_end_data |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
%toc_end_data = { I => I, |
|
394
|
|
|
|
|
|
|
I => I |
|
395
|
|
|
|
|
|
|
}; |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
toc_end => { H1 => '/H1', H2 => '/H2' } |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
For defining significant elements. The I is the HTML tag which |
|
400
|
|
|
|
|
|
|
marks the start of the element. The I the HTML tag which marks |
|
401
|
|
|
|
|
|
|
the end of the element. When matching in the input file, case is |
|
402
|
|
|
|
|
|
|
ignored (but make sure that all your I options referring to the |
|
403
|
|
|
|
|
|
|
same tag are exactly the same!). |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=item toc_entry |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
toc_entry => \%toc_entry_data |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
%toc_entry_data = { I => I, |
|
410
|
|
|
|
|
|
|
I => I |
|
411
|
|
|
|
|
|
|
}; |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
toc_entry => { H1 => 1, H2 => 2 } |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
For defining significant elements. The I is the HTML tag which marks |
|
416
|
|
|
|
|
|
|
the start of the element. The I is what level the tag is considered |
|
417
|
|
|
|
|
|
|
to be. The value of I must be numeric, and non-zero. If the value |
|
418
|
|
|
|
|
|
|
is negative, consective entries represented by the significant_element will |
|
419
|
|
|
|
|
|
|
be separated by the value set by B option. |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item toclabel |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
toclabel => I |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
HTML text that labels the ToC. Always used. |
|
426
|
|
|
|
|
|
|
(default: "Table of Contents") |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=item toc_tag |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
toc_tag => I |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
If a ToC is to be included inline, this is the pattern which is used to |
|
433
|
|
|
|
|
|
|
match the tag where the ToC should be put. This can be a start-tag, an |
|
434
|
|
|
|
|
|
|
end-tag or a comment, but the E should be left out; that is, if you |
|
435
|
|
|
|
|
|
|
want the ToC to be placed after the BODY tag, then give "BODY". If you |
|
436
|
|
|
|
|
|
|
want a special comment tag to make where the ToC should go, then include |
|
437
|
|
|
|
|
|
|
the comment marks, for example: "!--toc--" (default:BODY) |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=item toc_tag_replace |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
toc_tag_replace => 1 |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
In conjunction with B, this is a flag to say whether the given tag |
|
444
|
|
|
|
|
|
|
should be replaced, or if the ToC should be put after the tag. |
|
445
|
|
|
|
|
|
|
This can be useful if your toc_tag is a comment and you don't need it |
|
446
|
|
|
|
|
|
|
after you have the ToC in place. |
|
447
|
|
|
|
|
|
|
(default:false) |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=item toc_only |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
toc_only => 1 |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
Output only the Table of Contents, that is, the Table of Contents plus |
|
454
|
|
|
|
|
|
|
the toclabel. If there is a B |
|
455
|
|
|
|
|
|
|
output. |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
If B is false then if there is no B |
|
458
|
|
|
|
|
|
|
not true, then a suitable HTML page header will be output, and if there |
|
459
|
|
|
|
|
|
|
is no B |
|
460
|
|
|
|
|
|
|
be output. |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
(default:false) |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=item to_string |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
to_string => 1 |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Return the modified HTML output as a string. This I override |
|
469
|
|
|
|
|
|
|
other methods of output (unlike version 3.00). If I is false, |
|
470
|
|
|
|
|
|
|
the method will return 1 rather than a string. |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=item use_id |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
use_id => 1 |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
Use id="I" for anchors rather than anchors. |
|
477
|
|
|
|
|
|
|
However if an anchor already exists for a Significant Element, this |
|
478
|
|
|
|
|
|
|
won't make an id for that particular element. |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=item useorg |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
useorg => 1 |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
Use pre-existing backup files as the input source; that is, files of the |
|
485
|
|
|
|
|
|
|
form I.I (see B and B). |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=back |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=cut |
|
490
|
|
|
|
|
|
|
sub generate_toc ($%) { |
|
491
|
27
|
|
|
27
|
1
|
42718
|
my $self = shift; |
|
492
|
27
|
|
|
|
|
564
|
my %args = ( |
|
493
|
|
|
|
|
|
|
make_anchors=>1, |
|
494
|
|
|
|
|
|
|
make_toc=>1, |
|
495
|
|
|
|
|
|
|
input=>undef, |
|
496
|
|
|
|
|
|
|
filenames=>undef, |
|
497
|
|
|
|
|
|
|
bak=>$self->{bak}, |
|
498
|
|
|
|
|
|
|
debug=>$self->{debug}, |
|
499
|
|
|
|
|
|
|
useorg=>$self->{useorg}, |
|
500
|
|
|
|
|
|
|
use_id=>$self->{use_id}, |
|
501
|
|
|
|
|
|
|
notoc_match=>$self->{notoc_match}, |
|
502
|
|
|
|
|
|
|
toc_entry=>$self->{toc_entry}, |
|
503
|
|
|
|
|
|
|
toc_end=>$self->{toc_end}, |
|
504
|
|
|
|
|
|
|
overwrite=>$self->{overwrite}, |
|
505
|
|
|
|
|
|
|
ol=>$self->{ol}, |
|
506
|
|
|
|
|
|
|
ol_num_levels=>$self->{ol_num_levels}, |
|
507
|
|
|
|
|
|
|
entrysep=>$self->{entrysep}, |
|
508
|
|
|
|
|
|
|
ignore_only_one=>$self->{ignore_only_one}, |
|
509
|
|
|
|
|
|
|
@_ |
|
510
|
|
|
|
|
|
|
); |
|
511
|
|
|
|
|
|
|
|
|
512
|
27
|
50
|
|
|
|
125
|
if ($args{debug}) |
|
513
|
|
|
|
|
|
|
{ |
|
514
|
0
|
|
|
|
|
0
|
print STDERR Dumper(\%args); |
|
515
|
|
|
|
|
|
|
} |
|
516
|
27
|
50
|
|
|
|
92
|
if (!$args{input}) |
|
517
|
|
|
|
|
|
|
{ |
|
518
|
0
|
|
|
|
|
0
|
warn "generate_toc: no input given\n"; |
|
519
|
0
|
|
|
|
|
0
|
return ''; |
|
520
|
|
|
|
|
|
|
} |
|
521
|
|
|
|
|
|
|
# |
|
522
|
|
|
|
|
|
|
# get the input |
|
523
|
|
|
|
|
|
|
# |
|
524
|
27
|
|
|
|
|
62
|
my @filenames = (); |
|
525
|
27
|
|
|
|
|
47
|
my @input = (); |
|
526
|
27
|
100
|
|
|
|
89
|
if (ref $args{input} eq "ARRAY") |
|
527
|
|
|
|
|
|
|
{ |
|
528
|
19
|
|
|
|
|
26
|
@filenames = @{$args{input}}; |
|
|
19
|
|
|
|
|
43
|
|
|
529
|
19
|
|
|
|
|
28
|
my $i = 0; |
|
530
|
19
|
|
|
|
|
29
|
my $fh_needs_closing = 0; |
|
531
|
19
|
|
|
|
|
45
|
foreach my $fn (@filenames) |
|
532
|
|
|
|
|
|
|
{ |
|
533
|
19
|
|
|
|
|
27
|
my $infn = $fn; |
|
534
|
19
|
|
|
|
|
57
|
my $bakfile = $fn . "." . $args{bak}; |
|
535
|
19
|
0
|
33
|
|
|
71
|
if ($args{useorg} |
|
|
|
|
33
|
|
|
|
|
|
536
|
|
|
|
|
|
|
&& $args{bak} |
|
537
|
|
|
|
|
|
|
&& -e $bakfile) |
|
538
|
|
|
|
|
|
|
{ |
|
539
|
|
|
|
|
|
|
# use the old backup files as source |
|
540
|
0
|
|
|
|
|
0
|
$infn = $bakfile; |
|
541
|
|
|
|
|
|
|
} |
|
542
|
19
|
|
|
|
|
29
|
my $fh = undef; |
|
543
|
|
|
|
|
|
|
# using '-' means STDIN |
|
544
|
19
|
50
|
|
|
|
41
|
if ($infn eq '-') |
|
545
|
|
|
|
|
|
|
{ |
|
546
|
0
|
|
|
|
|
0
|
$fh = *STDIN; |
|
547
|
0
|
|
|
|
|
0
|
$fh_needs_closing = 0; |
|
548
|
|
|
|
|
|
|
} |
|
549
|
|
|
|
|
|
|
else |
|
550
|
|
|
|
|
|
|
{ |
|
551
|
19
|
50
|
|
|
|
6198
|
open ($fh, $infn) || |
|
552
|
|
|
|
|
|
|
die "Error: unable to open ", $infn, ": $!\n"; |
|
553
|
19
|
|
|
|
|
51
|
$fh_needs_closing = 1; |
|
554
|
|
|
|
|
|
|
} |
|
555
|
|
|
|
|
|
|
|
|
556
|
19
|
|
|
|
|
33
|
my $content = ''; |
|
557
|
|
|
|
|
|
|
{ |
|
558
|
19
|
|
|
|
|
37
|
local $/; # slurp entire file |
|
|
19
|
|
|
|
|
78
|
|
|
559
|
19
|
|
|
|
|
553
|
$content = <$fh>; |
|
560
|
19
|
50
|
|
|
|
245
|
close ($fh) if ($fh_needs_closing); |
|
561
|
|
|
|
|
|
|
} |
|
562
|
19
|
|
|
|
|
42
|
$input[$i] = $content; |
|
563
|
|
|
|
|
|
|
|
|
564
|
19
|
|
|
|
|
94
|
$i++; |
|
565
|
|
|
|
|
|
|
} |
|
566
|
|
|
|
|
|
|
} |
|
567
|
|
|
|
|
|
|
else |
|
568
|
|
|
|
|
|
|
{ |
|
569
|
8
|
|
|
|
|
14
|
$filenames[0] = ''; |
|
570
|
8
|
|
|
|
|
16
|
$input[0] = $args{input}; |
|
571
|
|
|
|
|
|
|
} |
|
572
|
|
|
|
|
|
|
# overwrite the filenames array if a replacement |
|
573
|
|
|
|
|
|
|
# was passed in and has the same length |
|
574
|
27
|
50
|
66
|
|
|
127
|
if (defined $args{filenames} |
|
|
6
|
|
66
|
|
|
27
|
|
|
575
|
6
|
|
|
|
|
41
|
&& @{$args{filenames}} |
|
576
|
|
|
|
|
|
|
&& $#{$args{filenames}} == $#{filenames} |
|
577
|
|
|
|
|
|
|
) |
|
578
|
|
|
|
|
|
|
{ |
|
579
|
6
|
|
|
|
|
8
|
@filenames = @{$args{filenames}}; |
|
|
6
|
|
|
|
|
14
|
|
|
580
|
|
|
|
|
|
|
} |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
# |
|
583
|
|
|
|
|
|
|
# make the anchors |
|
584
|
|
|
|
|
|
|
# |
|
585
|
27
|
100
|
|
|
|
89
|
if ($args{make_anchors}) |
|
586
|
|
|
|
|
|
|
{ |
|
587
|
12
|
|
|
|
|
22
|
my $i = 0; |
|
588
|
12
|
|
|
|
|
27
|
foreach my $fn (@filenames) |
|
589
|
|
|
|
|
|
|
{ |
|
590
|
12
|
|
|
|
|
20
|
my $html_str = $input[$i]; |
|
591
|
12
|
|
|
|
|
103
|
$input[$i] = $self->make_anchors(%args, |
|
592
|
|
|
|
|
|
|
filename=>$fn, |
|
593
|
|
|
|
|
|
|
input=>$html_str); |
|
594
|
12
|
|
|
|
|
59
|
$i++; |
|
595
|
|
|
|
|
|
|
} |
|
596
|
|
|
|
|
|
|
} |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
# |
|
599
|
|
|
|
|
|
|
# make the ToC |
|
600
|
|
|
|
|
|
|
# |
|
601
|
27
|
|
|
|
|
53
|
my $toc_str = ''; |
|
602
|
27
|
100
|
|
|
|
86
|
if ($args{make_toc}) |
|
603
|
|
|
|
|
|
|
{ |
|
604
|
17
|
|
|
|
|
38
|
my %labels = (); |
|
605
|
17
|
|
|
|
|
29
|
my @list_of_lists = (); |
|
606
|
17
|
|
|
|
|
28
|
my $i = 0; |
|
607
|
17
|
|
|
|
|
58
|
for (my $i = 0; $i < @filenames; $i++) |
|
608
|
|
|
|
|
|
|
{ |
|
609
|
17
|
|
|
|
|
149
|
my @the_list = $self->make_toc_list(%args, |
|
610
|
|
|
|
|
|
|
first_file=>$filenames[0], |
|
611
|
|
|
|
|
|
|
labels=>\%labels, |
|
612
|
|
|
|
|
|
|
filename=>$filenames[$i], |
|
613
|
|
|
|
|
|
|
input=>$input[$i]); |
|
614
|
17
|
100
|
66
|
|
|
119
|
if (!($args{ignore_only_one} |
|
615
|
|
|
|
|
|
|
and @the_list <= 1)) |
|
616
|
|
|
|
|
|
|
{ |
|
617
|
16
|
|
|
|
|
92
|
push @list_of_lists, @the_list; |
|
618
|
|
|
|
|
|
|
} |
|
619
|
|
|
|
|
|
|
} |
|
620
|
17
|
100
|
|
|
|
55
|
if (@list_of_lists > 0) |
|
621
|
|
|
|
|
|
|
{ |
|
622
|
|
|
|
|
|
|
# |
|
623
|
|
|
|
|
|
|
# create the appropriate format |
|
624
|
|
|
|
|
|
|
# |
|
625
|
16
|
|
|
|
|
84
|
my %formats = (); |
|
626
|
|
|
|
|
|
|
# check for non-list entries, flagged by negative levels |
|
627
|
16
|
|
|
|
|
29
|
while (my ($key, $val) = each %{$args{toc_entry}}) |
|
|
51
|
|
|
|
|
176
|
|
|
628
|
|
|
|
|
|
|
{ |
|
629
|
35
|
100
|
|
|
|
88
|
if ($val < 0) |
|
630
|
|
|
|
|
|
|
{ |
|
631
|
1
|
|
|
|
|
4
|
$formats{abs($val) - 1} = {}; |
|
632
|
1
|
|
|
|
|
4
|
$formats{abs($val) - 1}->{tree_head} = ' |
|
633
|
1
|
|
|
|
|
2
|
$formats{abs($val) - 1}->{tree_foot} = "\n\n"; |
|
634
|
1
|
|
|
|
|
4
|
$formats{abs($val) - 1}->{item_sep} = $args{entrysep}; |
|
635
|
1
|
|
|
|
|
2
|
$formats{abs($val) - 1}->{pre_item} = ''; |
|
636
|
1
|
|
|
|
|
3
|
$formats{abs($val) - 1}->{post_item} = ''; |
|
637
|
|
|
|
|
|
|
} |
|
638
|
|
|
|
|
|
|
} |
|
639
|
|
|
|
|
|
|
# check for OL |
|
640
|
16
|
100
|
|
|
|
51
|
if ($args{ol}) |
|
641
|
|
|
|
|
|
|
{ |
|
642
|
4
|
|
|
|
|
12
|
$formats{0} = {}; |
|
643
|
4
|
|
|
|
|
14
|
$formats{0}->{tree_head} = ''; |
|
644
|
4
|
|
|
|
|
11
|
$formats{0}->{tree_foot} = "\n"; |
|
645
|
4
|
100
|
|
|
|
11
|
if ($args{ol_num_levels} > 0) |
|
646
|
|
|
|
|
|
|
{ |
|
647
|
3
|
|
|
|
|
9
|
$formats{$args{ol_num_levels}} = {}; |
|
648
|
3
|
|
|
|
|
10
|
$formats{$args{ol_num_levels}}->{tree_head} = ' |
|
649
|
3
|
|
|
|
|
9
|
$formats{$args{ol_num_levels}}->{tree_foot} = "\n"; |
|
650
|
|
|
|
|
|
|
} |
|
651
|
|
|
|
|
|
|
} |
|
652
|
16
|
|
|
|
|
160
|
$toc_str = HTML::LinkList::link_tree( |
|
653
|
|
|
|
|
|
|
%args, |
|
654
|
|
|
|
|
|
|
link_tree=>\@list_of_lists, |
|
655
|
|
|
|
|
|
|
labels=>\%labels, |
|
656
|
|
|
|
|
|
|
formats=>\%formats, |
|
657
|
|
|
|
|
|
|
); |
|
658
|
|
|
|
|
|
|
} |
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
# |
|
662
|
|
|
|
|
|
|
# put the output |
|
663
|
|
|
|
|
|
|
# |
|
664
|
27
|
|
|
|
|
18523
|
my $ret = $self->output_toc( |
|
665
|
|
|
|
|
|
|
%args, |
|
666
|
|
|
|
|
|
|
toc=>$toc_str, |
|
667
|
|
|
|
|
|
|
input=>\@input, |
|
668
|
|
|
|
|
|
|
filenames=>\@filenames, |
|
669
|
|
|
|
|
|
|
); |
|
670
|
|
|
|
|
|
|
|
|
671
|
27
|
|
|
|
|
267
|
return $ret; |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
} # generate_toc |
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
=head1 INTERNAL METHODS |
|
676
|
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
These methods are documented for developer purposes and aren't intended |
|
678
|
|
|
|
|
|
|
to be used externally. |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
=head2 make_anchor_name |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
$toc->make_anchor_name(content=>$content, |
|
683
|
|
|
|
|
|
|
anchors=>\%anchors); |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
Makes the anchor-name for one anchor. |
|
686
|
|
|
|
|
|
|
Bases the anchor on the content of the significant element. |
|
687
|
|
|
|
|
|
|
Ensures that anchors are unique. |
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=cut |
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
sub make_anchor_name ($%) { |
|
692
|
44
|
|
|
44
|
1
|
57
|
my $self = shift; |
|
693
|
44
|
|
|
|
|
190
|
my %args = ( |
|
694
|
|
|
|
|
|
|
content=>'', # will be overwritten by one of @_ |
|
695
|
|
|
|
|
|
|
anchors=>undef, |
|
696
|
|
|
|
|
|
|
@_ |
|
697
|
|
|
|
|
|
|
); |
|
698
|
44
|
|
|
|
|
78
|
my $name = $args{content}; # the anchor name will most often be very close to the token content |
|
699
|
|
|
|
|
|
|
|
|
700
|
44
|
50
|
|
|
|
174
|
if ($name !~ /^\s*$/) { |
|
701
|
|
|
|
|
|
|
# generate a SEO-friendly anchor right from the token content |
|
702
|
|
|
|
|
|
|
# The allowed character set is limited first by the URI specification |
|
703
|
|
|
|
|
|
|
# for fragments, http://tools.ietf.org/html/rfc3986#section-2: |
|
704
|
|
|
|
|
|
|
# characters then by the limitations of the values of 'id' and 'name' |
|
705
|
|
|
|
|
|
|
# attributes: http://www.w3.org/TR/REC-html40/types.html#type-name |
|
706
|
|
|
|
|
|
|
# Eventually, the only punctuation allowed in id values is [_.:-] |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
# we need to replace [#&;] only when they are NOT part of an HTML |
|
709
|
|
|
|
|
|
|
# entity. decode_entities saves us from crafting a nasty regexp |
|
710
|
44
|
|
|
|
|
228
|
decode_entities($name); |
|
711
|
|
|
|
|
|
|
# MediaWiki also uses the period, see |
|
712
|
|
|
|
|
|
|
# http://en.wikipedia.org/wiki/Hierarchies#Ethics.2C_behavioral_psychology.2C_philosophies_of_identity |
|
713
|
44
|
|
|
|
|
108
|
$name =~ s/([^\s\w_.:-])/'.'.sprintf('%02X', ord($1))/eg; |
|
|
17
|
|
|
|
|
91
|
|
|
714
|
|
|
|
|
|
|
|
|
715
|
44
|
|
|
|
|
156
|
$name =~ s/\s+/_/g; |
|
716
|
|
|
|
|
|
|
# "ID and NAME tokens must begin with a letter ([A-Za-z])" |
|
717
|
44
|
|
|
|
|
110
|
$name =~ s/^[^a-zA-Z]+//; |
|
718
|
|
|
|
|
|
|
} |
|
719
|
|
|
|
|
|
|
else |
|
720
|
|
|
|
|
|
|
{ |
|
721
|
0
|
|
|
|
|
0
|
$name = 'id'; |
|
722
|
|
|
|
|
|
|
} |
|
723
|
44
|
50
|
|
|
|
96
|
$name = 'id' if $name eq ''; |
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
# check if it already exists; if so, add a number |
|
726
|
44
|
|
|
|
|
55
|
my $anch_num = 1; |
|
727
|
44
|
|
|
|
|
51
|
my $word_name = $name; |
|
728
|
44
|
|
|
|
|
75
|
my $name_key = lc $name; |
|
729
|
|
|
|
|
|
|
# Reference: http://www.w3.org/TR/REC-html40/struct/links.html#h-12.2.1 |
|
730
|
|
|
|
|
|
|
# Anchor names must be unique within a document. Anchor names that differ |
|
731
|
|
|
|
|
|
|
# only in case may not appear in the same document. |
|
732
|
44
|
|
66
|
|
|
168
|
while (defined $args{anchors}->{$name_key} |
|
733
|
|
|
|
|
|
|
&& $args{anchors}->{$name_key}) |
|
734
|
|
|
|
|
|
|
{ |
|
735
|
10
|
|
|
|
|
23
|
$name = $word_name . "_$anch_num"; |
|
736
|
10
|
|
|
|
|
15
|
$name_key = lc $name; |
|
737
|
10
|
|
|
|
|
40
|
$anch_num++; |
|
738
|
|
|
|
|
|
|
} |
|
739
|
|
|
|
|
|
|
|
|
740
|
44
|
|
|
|
|
149
|
return $name; |
|
741
|
|
|
|
|
|
|
} # make_anchor_name |
|
742
|
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
=head2 make_anchors |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
my $new_html = $toc->make_anchors(input=>$html, |
|
746
|
|
|
|
|
|
|
notoc_match=>$notoc_match, |
|
747
|
|
|
|
|
|
|
use_id=>$use_id, |
|
748
|
|
|
|
|
|
|
toc_entry=>\%toc_entries, |
|
749
|
|
|
|
|
|
|
toc_end=>\%toc_ends, |
|
750
|
|
|
|
|
|
|
); |
|
751
|
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Makes the anchors the given input string. |
|
753
|
|
|
|
|
|
|
Returns a string. |
|
754
|
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
=cut |
|
756
|
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
sub make_anchors ($%) { |
|
758
|
12
|
|
|
12
|
1
|
19
|
my $self = shift; |
|
759
|
12
|
|
|
|
|
183
|
my %args = ( |
|
760
|
|
|
|
|
|
|
input=>'', |
|
761
|
|
|
|
|
|
|
notoc_match=>$self->{notoc_match}, |
|
762
|
|
|
|
|
|
|
use_id=>$self->{use_id}, |
|
763
|
|
|
|
|
|
|
toc_entry=>$self->{toc_entry}, |
|
764
|
|
|
|
|
|
|
toc_end=>$self->{toc_end}, |
|
765
|
|
|
|
|
|
|
debug=>$self->{debug}, |
|
766
|
|
|
|
|
|
|
quiet=>$self->{quiet}, |
|
767
|
|
|
|
|
|
|
@_ |
|
768
|
|
|
|
|
|
|
); |
|
769
|
12
|
|
|
|
|
26
|
my $html_str = $args{input}; |
|
770
|
|
|
|
|
|
|
|
|
771
|
12
|
50
|
66
|
|
|
67
|
print STDERR "Making anchors for ", $args{filename}, "...\n" |
|
772
|
|
|
|
|
|
|
if (!$args{quiet} && $args{filename}); |
|
773
|
|
|
|
|
|
|
|
|
774
|
12
|
|
|
|
|
27
|
my @newhtml = (); |
|
775
|
12
|
|
|
|
|
17
|
my %anchors = (); |
|
776
|
|
|
|
|
|
|
# Note that the keys to the anchors hash should be lower-cased |
|
777
|
|
|
|
|
|
|
# since anchor names that differ only in case are not allowed. |
|
778
|
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
# parse the HTML |
|
780
|
12
|
|
|
|
|
120
|
my $hp = new HTML::SimpleParse(); |
|
781
|
12
|
|
|
|
|
181
|
$hp->text($html_str); |
|
782
|
12
|
|
|
|
|
113
|
$hp->parse(); |
|
783
|
|
|
|
|
|
|
|
|
784
|
12
|
|
|
|
|
10428
|
my $tag; |
|
785
|
|
|
|
|
|
|
my $endtag; |
|
786
|
12
|
|
|
|
|
21
|
my $level = 0; |
|
787
|
12
|
|
|
|
|
20
|
my $tmp; |
|
788
|
12
|
|
|
|
|
21
|
my $adone = 0; |
|
789
|
12
|
|
|
|
|
22
|
my $name = ''; |
|
790
|
|
|
|
|
|
|
# go through the HTML |
|
791
|
12
|
|
|
|
|
20
|
my $tok; |
|
792
|
|
|
|
|
|
|
my $next_tok; |
|
793
|
0
|
|
|
|
|
0
|
my $i; |
|
794
|
12
|
|
|
|
|
23
|
my $notoc = $args{notoc_match}; |
|
795
|
12
|
|
|
|
|
77
|
my @tree = $hp->tree(); |
|
796
|
12
|
|
|
|
|
321
|
while (@tree) { |
|
797
|
1012
|
|
|
|
|
1276
|
$tok = shift @tree; |
|
798
|
1012
|
|
|
|
|
1187
|
$next_tok = $tree[0]; |
|
799
|
1012
|
100
|
|
|
|
19738
|
if ($tok->{type} ne 'starttag') |
|
800
|
|
|
|
|
|
|
{ |
|
801
|
698
|
|
|
|
|
1657
|
push @newhtml, $hp->execute($tok); |
|
802
|
698
|
|
|
|
|
7160
|
next; |
|
803
|
|
|
|
|
|
|
} |
|
804
|
|
|
|
|
|
|
# assert: we have a start tag |
|
805
|
314
|
|
|
|
|
381
|
$level = 0; |
|
806
|
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
# check if tag included in TOC (significant element) |
|
808
|
314
|
|
|
|
|
324
|
foreach my $key (keys %{$args{toc_entry}}) { |
|
|
314
|
|
|
|
|
833
|
|
|
809
|
613
|
100
|
66
|
|
|
6435
|
if ($tok->{content} =~ /^$key/i |
|
|
|
|
100
|
|
|
|
|
|
810
|
|
|
|
|
|
|
&& (!$notoc |
|
811
|
|
|
|
|
|
|
|| $tok->{content} !~ /$notoc/)) { |
|
812
|
48
|
|
|
|
|
61
|
$tag = $key; |
|
813
|
|
|
|
|
|
|
# level of significant element |
|
814
|
48
|
|
|
|
|
88
|
$level = abs($args{toc_entry}->{$key}); |
|
815
|
|
|
|
|
|
|
# End tag of significant element |
|
816
|
48
|
|
|
|
|
92
|
$endtag = $args{toc_end}->{$key}; |
|
817
|
48
|
|
|
|
|
82
|
last; |
|
818
|
|
|
|
|
|
|
} |
|
819
|
|
|
|
|
|
|
} |
|
820
|
|
|
|
|
|
|
# if $level is not set, we didn't find a Significant tag |
|
821
|
314
|
100
|
|
|
|
8978
|
if (!$level) { |
|
822
|
266
|
|
|
|
|
739
|
push @newhtml, $hp->execute($tok); |
|
823
|
266
|
|
|
|
|
2858
|
next; |
|
824
|
|
|
|
|
|
|
} |
|
825
|
|
|
|
|
|
|
# assert: current tag is a Significant tag |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
# |
|
828
|
|
|
|
|
|
|
# Add A element or ID to document |
|
829
|
|
|
|
|
|
|
# |
|
830
|
48
|
|
|
|
|
59
|
my $name_in_anchor = 0; |
|
831
|
48
|
|
|
|
|
54
|
$adone = 0; |
|
832
|
48
|
|
|
|
|
59
|
$name = ''; |
|
833
|
48
|
|
|
|
|
60
|
my $sig_tok = $tok; |
|
834
|
48
|
50
|
|
|
|
123
|
if ($tag =~ /title/i) { # TITLE tag is a special case |
|
835
|
0
|
|
|
|
|
0
|
$adone = 1; |
|
836
|
|
|
|
|
|
|
} |
|
837
|
48
|
100
|
|
|
|
88
|
if ($args{use_id}) |
|
838
|
|
|
|
|
|
|
{ |
|
839
|
|
|
|
|
|
|
# is there an existing ID? |
|
840
|
9
|
100
|
|
|
|
49
|
if ($sig_tok->{content} =~ /ID\s*=\s*(['"])/i) { |
|
841
|
1
|
|
|
|
|
7
|
my $q = $1; |
|
842
|
1
|
|
|
|
|
33
|
($name) = $sig_tok->{content} =~ m/ID\s*=\s*$q([^$q]*)$q/i; |
|
843
|
1
|
50
|
|
|
|
7
|
if ($name) |
|
844
|
|
|
|
|
|
|
{ |
|
845
|
1
|
|
|
|
|
5
|
$anchors{lc $name} = $name; |
|
846
|
1
|
|
|
|
|
6
|
push @newhtml, $hp->execute($sig_tok); |
|
847
|
1
|
|
|
|
|
13
|
$adone = 1; |
|
848
|
|
|
|
|
|
|
} |
|
849
|
|
|
|
|
|
|
else # if the ID has no name, remove it! |
|
850
|
|
|
|
|
|
|
{ |
|
851
|
0
|
|
|
|
|
0
|
$sig_tok->{content} =~ s/ID\s*=\s*$q$q//i; |
|
852
|
|
|
|
|
|
|
} |
|
853
|
|
|
|
|
|
|
} |
|
854
|
|
|
|
|
|
|
} |
|
855
|
|
|
|
|
|
|
else # not adding ID, move right along |
|
856
|
|
|
|
|
|
|
{ |
|
857
|
39
|
|
|
|
|
131
|
push @newhtml, $hp->execute($tok); |
|
858
|
|
|
|
|
|
|
} |
|
859
|
|
|
|
|
|
|
# Find the "name" of the significant element |
|
860
|
|
|
|
|
|
|
# Don't consume the tree, because ID behaves differently from A |
|
861
|
48
|
|
|
|
|
412
|
my $i = 0; |
|
862
|
48
|
|
66
|
|
|
527
|
while (!$name && $i < @tree) |
|
863
|
|
|
|
|
|
|
{ |
|
864
|
47
|
|
|
|
|
76
|
$tok = $tree[$i]; |
|
865
|
47
|
|
|
|
|
109
|
$next_tok = $tree[$i + 1]; |
|
866
|
47
|
100
|
33
|
|
|
226
|
if ($tok->{type} eq 'text') { |
|
|
|
50
|
33
|
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
867
|
44
|
|
|
|
|
153
|
$name = $self->make_anchor_name(content=>$tok->{content}, |
|
868
|
|
|
|
|
|
|
anchors=>\%anchors); |
|
869
|
|
|
|
|
|
|
# Anchor |
|
870
|
|
|
|
|
|
|
} elsif (!$adone && $tok->{type} eq 'starttag' |
|
871
|
|
|
|
|
|
|
&& $tok->{content} =~ /^A/i) |
|
872
|
|
|
|
|
|
|
{ |
|
873
|
3
|
50
|
|
|
|
24
|
if ($tok->{content} =~ /NAME\s*=\s*(['"])/i) { |
|
|
|
0
|
|
|
|
|
|
|
874
|
3
|
|
|
|
|
8
|
my $q = $1; |
|
875
|
3
|
|
|
|
|
43
|
($name) = $tok->{content} =~ m/NAME\s*=\s*$q([^$q]*)$q/i; |
|
876
|
3
|
|
|
|
|
6
|
$name_in_anchor = 1; |
|
877
|
|
|
|
|
|
|
} elsif ($next_tok->{type} eq 'text') { |
|
878
|
0
|
|
|
|
|
0
|
$name = $self->make_anchor_name(content=>$next_tok->{content}, |
|
879
|
|
|
|
|
|
|
anchors=>\%anchors); |
|
880
|
|
|
|
|
|
|
} |
|
881
|
|
|
|
|
|
|
} elsif ($tok->{type} eq 'starttag' |
|
882
|
|
|
|
|
|
|
|| $tok->{type} eq 'endtag') |
|
883
|
|
|
|
|
|
|
{ # Tag |
|
884
|
0
|
0
|
|
|
|
0
|
last if $tok->{content} =~ m|$endtag|i; |
|
885
|
|
|
|
|
|
|
} |
|
886
|
47
|
|
|
|
|
128
|
$i++; |
|
887
|
|
|
|
|
|
|
} |
|
888
|
|
|
|
|
|
|
# assert: there is a name, or there is no name to be found |
|
889
|
48
|
50
|
|
|
|
95
|
if (!$name) |
|
890
|
|
|
|
|
|
|
{ |
|
891
|
|
|
|
|
|
|
# make up a name |
|
892
|
0
|
|
|
|
|
0
|
$name = $self->make_anchor_name(content=>"TOC", |
|
893
|
|
|
|
|
|
|
anchors=>\%anchors); |
|
894
|
|
|
|
|
|
|
} |
|
895
|
48
|
100
|
100
|
|
|
221
|
if (!$adone && $args{use_id}) |
|
896
|
|
|
|
|
|
|
{ |
|
897
|
8
|
50
|
|
|
|
17
|
if (!$name_in_anchor) |
|
898
|
|
|
|
|
|
|
{ |
|
899
|
8
|
|
|
|
|
23
|
$anchors{lc $name} = $name; |
|
900
|
|
|
|
|
|
|
# add the ID |
|
901
|
8
|
|
|
|
|
22
|
$sig_tok->{content} .= " id='$name'"; |
|
902
|
8
|
|
|
|
|
26
|
push @newhtml, $hp->execute($sig_tok); |
|
903
|
8
|
|
|
|
|
79
|
$adone = 1; |
|
904
|
|
|
|
|
|
|
} |
|
905
|
|
|
|
|
|
|
else |
|
906
|
|
|
|
|
|
|
{ |
|
907
|
|
|
|
|
|
|
# we have an already-named anchor, so don't add an ID |
|
908
|
0
|
|
|
|
|
0
|
push @newhtml, $hp->execute($sig_tok); |
|
909
|
|
|
|
|
|
|
} |
|
910
|
|
|
|
|
|
|
} |
|
911
|
|
|
|
|
|
|
|
|
912
|
48
|
|
|
|
|
106
|
while (@tree) { |
|
913
|
102
|
|
|
|
|
231
|
$tok = shift @tree; |
|
914
|
102
|
|
|
|
|
130
|
$next_tok = $tree[0]; |
|
915
|
|
|
|
|
|
|
# Text |
|
916
|
102
|
100
|
66
|
|
|
519
|
if ($tok->{type} eq 'text') { |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
917
|
48
|
100
|
66
|
|
|
240
|
if (!$adone && $tok->{content} !~ /^\s*$/) { |
|
918
|
36
|
|
|
|
|
94
|
$anchors{lc $name} = $name; |
|
919
|
|
|
|
|
|
|
# replace the text with an anchor containing the text |
|
920
|
36
|
|
|
|
|
96
|
push(@newhtml, qq|$tok->{content}|); |
|
921
|
36
|
|
|
|
|
83
|
$adone = 1; |
|
922
|
|
|
|
|
|
|
} else { |
|
923
|
12
|
|
|
|
|
35
|
push @newhtml, $hp->execute($tok); |
|
924
|
|
|
|
|
|
|
} |
|
925
|
|
|
|
|
|
|
# Anchor |
|
926
|
|
|
|
|
|
|
} elsif (!$adone && $tok->{type} eq 'starttag' |
|
927
|
|
|
|
|
|
|
&& $tok->{content} =~ /^A/i) |
|
928
|
|
|
|
|
|
|
{ |
|
929
|
|
|
|
|
|
|
# is there an existing NAME anchor? |
|
930
|
3
|
50
|
|
|
|
6
|
if ($name_in_anchor) { |
|
931
|
3
|
|
|
|
|
14
|
$anchors{lc $name} = $name; |
|
932
|
3
|
|
|
|
|
9
|
push @newhtml, $hp->execute($tok); |
|
933
|
|
|
|
|
|
|
} else { |
|
934
|
|
|
|
|
|
|
# add the current name anchor |
|
935
|
0
|
|
|
|
|
0
|
$tmp = $hp->execute($tok); |
|
936
|
0
|
|
|
|
|
0
|
$tmp =~ s/^(
|
|
937
|
0
|
|
|
|
|
0
|
push @newhtml, $tmp; |
|
938
|
0
|
|
|
|
|
0
|
$anchors{lc $name} = $name; |
|
939
|
|
|
|
|
|
|
} |
|
940
|
3
|
|
|
|
|
32
|
$adone = 1; |
|
941
|
|
|
|
|
|
|
} elsif ($tok->{type} eq 'starttag' |
|
942
|
|
|
|
|
|
|
|| $tok->{type} eq 'endtag') |
|
943
|
|
|
|
|
|
|
{ # Tag |
|
944
|
51
|
|
|
|
|
150
|
push @newhtml, $hp->execute($tok); |
|
945
|
51
|
100
|
|
|
|
873
|
last if $tok->{content} =~ m|$endtag|i; |
|
946
|
|
|
|
|
|
|
} |
|
947
|
|
|
|
|
|
|
else { |
|
948
|
0
|
|
|
|
|
0
|
push @newhtml, $hp->execute($tok); |
|
949
|
|
|
|
|
|
|
} |
|
950
|
|
|
|
|
|
|
} |
|
951
|
|
|
|
|
|
|
} |
|
952
|
12
|
|
|
|
|
179
|
my $out = join('', @newhtml); |
|
953
|
|
|
|
|
|
|
|
|
954
|
12
|
|
|
|
|
720
|
return $out; |
|
955
|
|
|
|
|
|
|
} # make_anchors |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
=head2 make_toc_list |
|
958
|
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
my @toc_list = $toc->make_toc_list(input=>$html, |
|
960
|
|
|
|
|
|
|
labels=>\%labels, |
|
961
|
|
|
|
|
|
|
notoc_match=>$notoc_match, |
|
962
|
|
|
|
|
|
|
toc_entry=>\%toc_entry, |
|
963
|
|
|
|
|
|
|
toc_end=>\%toc_end, |
|
964
|
|
|
|
|
|
|
filename=>$filename); |
|
965
|
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
Makes a list of lists which represents the structure and content |
|
967
|
|
|
|
|
|
|
of (a portion of) the ToC from one file. |
|
968
|
|
|
|
|
|
|
Also updates a list of labels for the ToC entries. |
|
969
|
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
=cut |
|
971
|
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
sub make_toc_list ($%) { |
|
973
|
17
|
|
|
17
|
1
|
32
|
my $self = shift; |
|
974
|
17
|
|
|
|
|
322
|
my %args = ( |
|
975
|
|
|
|
|
|
|
input=>'', |
|
976
|
|
|
|
|
|
|
filename=>'', |
|
977
|
|
|
|
|
|
|
labels=>undef, |
|
978
|
|
|
|
|
|
|
notoc_match=>$self->{notoc_match}, |
|
979
|
|
|
|
|
|
|
toc_entry=>$self->{toc_entry}, |
|
980
|
|
|
|
|
|
|
toc_end=>$self->{toc_end}, |
|
981
|
|
|
|
|
|
|
inline=>$self->{inline}, |
|
982
|
|
|
|
|
|
|
debug=>$self->{debug}, |
|
983
|
|
|
|
|
|
|
toc_before=>$self->{toc_before}, |
|
984
|
|
|
|
|
|
|
toc_after=>$self->{toc_after}, |
|
985
|
|
|
|
|
|
|
textonly=>$self->{textonly}, |
|
986
|
|
|
|
|
|
|
ignore_sole_first=>$self->{ignore_sole_first}, |
|
987
|
|
|
|
|
|
|
ignore_only_one=>$self->{ignore_only_one}, |
|
988
|
|
|
|
|
|
|
@_ |
|
989
|
|
|
|
|
|
|
); |
|
990
|
17
|
|
|
|
|
35
|
my $html_str = $args{input}; |
|
991
|
17
|
|
|
|
|
28
|
my $infile = $args{filename}; |
|
992
|
17
|
|
|
|
|
24
|
my $labels = $args{labels}; |
|
993
|
|
|
|
|
|
|
|
|
994
|
17
|
|
|
|
|
29
|
my $toc_str = ""; |
|
995
|
17
|
|
|
|
|
33
|
my @toc = (); |
|
996
|
17
|
|
|
|
|
28
|
my @list_of_paths = (); |
|
997
|
17
|
|
|
|
|
28
|
my %level_count = (); |
|
998
|
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
# parse the HTML |
|
1000
|
17
|
|
|
|
|
133
|
my $hp = new HTML::SimpleParse(); |
|
1001
|
17
|
|
|
|
|
224
|
$hp->text($html_str); |
|
1002
|
17
|
|
|
|
|
132
|
$hp->parse(); |
|
1003
|
|
|
|
|
|
|
|
|
1004
|
17
|
|
|
|
|
21029
|
my $noli; |
|
1005
|
|
|
|
|
|
|
my $prevnoli; |
|
1006
|
17
|
|
|
|
|
39
|
my $before = ""; |
|
1007
|
17
|
|
|
|
|
29
|
my $after = ""; |
|
1008
|
17
|
|
|
|
|
35
|
my $tag; |
|
1009
|
|
|
|
|
|
|
my $endtag; |
|
1010
|
17
|
|
|
|
|
129
|
my $level = 0; |
|
1011
|
17
|
|
|
|
|
27
|
my $levelopen; |
|
1012
|
|
|
|
|
|
|
my $tmp; |
|
1013
|
0
|
|
|
|
|
0
|
my $content; |
|
1014
|
17
|
|
|
|
|
39
|
my $adone = 0; |
|
1015
|
17
|
|
|
|
|
34
|
my $name = "NOTOC"; # if no anchor is found... |
|
1016
|
17
|
|
|
|
|
22
|
my $is_title; |
|
1017
|
17
|
|
|
|
|
23
|
my $found_title = 0; |
|
1018
|
17
|
|
|
|
|
33
|
my $notoc = $args{notoc_match}; |
|
1019
|
|
|
|
|
|
|
# go through the HTML |
|
1020
|
17
|
|
|
|
|
19
|
my $tok; |
|
1021
|
17
|
|
|
|
|
73
|
my @tree = $hp->tree(); |
|
1022
|
17
|
|
|
|
|
573
|
while (@tree) { |
|
1023
|
1600
|
|
|
|
|
2087
|
$tok = shift @tree; |
|
1024
|
1600
|
|
|
|
|
1816
|
$level = 0; |
|
1025
|
1600
|
|
|
|
|
1584
|
$is_title = 0; |
|
1026
|
1600
|
|
|
|
|
1799
|
$tag = ''; |
|
1027
|
1600
|
100
|
|
|
|
3479
|
if ($tok->{type} eq 'starttag') |
|
1028
|
|
|
|
|
|
|
{ |
|
1029
|
|
|
|
|
|
|
# check if tag included in TOC |
|
1030
|
548
|
|
|
|
|
572
|
foreach my $key (keys %{$args{toc_entry}}) { |
|
|
548
|
|
|
|
|
1396
|
|
|
1031
|
1185
|
100
|
66
|
|
|
11489
|
if ($tok->{content} =~ /^$key/i |
|
|
|
|
100
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
&& (!$notoc |
|
1033
|
|
|
|
|
|
|
|| $tok->{content} !~ /$notoc/)) { |
|
1034
|
69
|
|
|
|
|
98
|
$tag = $key; |
|
1035
|
69
|
50
|
|
|
|
250
|
if ($args{debug}) { |
|
1036
|
0
|
|
|
|
|
0
|
print STDERR "============\n"; |
|
1037
|
0
|
|
|
|
|
0
|
print STDERR "key = $key "; |
|
1038
|
0
|
|
|
|
|
0
|
print STDERR "tok->content = '", $tok->{content}, "' "; |
|
1039
|
0
|
|
|
|
|
0
|
print STDERR "tag = $tag"; |
|
1040
|
0
|
|
|
|
|
0
|
print STDERR "\n============\n"; |
|
1041
|
|
|
|
|
|
|
} |
|
1042
|
|
|
|
|
|
|
# level of significant element |
|
1043
|
69
|
|
|
|
|
115
|
$level = abs($args{toc_entry}->{$key}); |
|
1044
|
|
|
|
|
|
|
# no used in ToC listing |
|
1045
|
69
|
|
|
|
|
117
|
$noli = $args{toc_entry}->{$key} < 0; |
|
1046
|
|
|
|
|
|
|
# End tag of significant element |
|
1047
|
69
|
|
|
|
|
147
|
$endtag = $args{toc_end}->{$key}; |
|
1048
|
69
|
50
|
|
|
|
252
|
if (defined $args{toc_before}->{$key}) { |
|
1049
|
0
|
|
|
|
|
0
|
$before = $args{toc_before}->{$key}; |
|
1050
|
|
|
|
|
|
|
} else { |
|
1051
|
69
|
|
|
|
|
99
|
$before = ""; |
|
1052
|
|
|
|
|
|
|
} |
|
1053
|
69
|
50
|
|
|
|
153
|
if (defined $args{toc_after}->{$key}) { |
|
1054
|
0
|
|
|
|
|
0
|
$after = $args{toc_after}->{$key}; |
|
1055
|
|
|
|
|
|
|
} else { |
|
1056
|
69
|
|
|
|
|
200
|
$after = ""; |
|
1057
|
|
|
|
|
|
|
} |
|
1058
|
|
|
|
|
|
|
} |
|
1059
|
|
|
|
|
|
|
} |
|
1060
|
|
|
|
|
|
|
} |
|
1061
|
1600
|
100
|
|
|
|
3080
|
if (!$level) { |
|
1062
|
1531
|
|
|
|
|
2960
|
next; |
|
1063
|
|
|
|
|
|
|
} |
|
1064
|
69
|
50
|
|
|
|
149
|
if ($args{debug}) { |
|
1065
|
0
|
|
|
|
|
0
|
print STDERR "Chosen tag:$tag\n"; |
|
1066
|
|
|
|
|
|
|
} |
|
1067
|
|
|
|
|
|
|
# assert: we are at a Significant tag |
|
1068
|
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
# get A element or ID from document |
|
1070
|
|
|
|
|
|
|
# This assumes that there is one there |
|
1071
|
69
|
|
|
|
|
89
|
$content = ''; |
|
1072
|
69
|
|
|
|
|
75
|
$adone = 0; |
|
1073
|
69
|
50
|
|
|
|
135
|
if ($tag =~ /title/i) { # TITLE tag is a special case |
|
1074
|
0
|
0
|
|
|
|
0
|
if ($found_title) { |
|
1075
|
|
|
|
|
|
|
# don't need to find a title again, we found it |
|
1076
|
0
|
|
|
|
|
0
|
next; |
|
1077
|
|
|
|
|
|
|
} else { |
|
1078
|
0
|
|
|
|
|
0
|
$is_title = 1; $adone = 1; |
|
|
0
|
|
|
|
|
0
|
|
|
1079
|
0
|
|
|
|
|
0
|
$found_title = 1; |
|
1080
|
|
|
|
|
|
|
} |
|
1081
|
|
|
|
|
|
|
} |
|
1082
|
69
|
50
|
|
|
|
160
|
if ($args{debug}) { |
|
1083
|
0
|
|
|
|
|
0
|
print STDERR "is_title:$is_title\n"; |
|
1084
|
|
|
|
|
|
|
} |
|
1085
|
|
|
|
|
|
|
# check for an ID before we skip this tag |
|
1086
|
69
|
100
|
|
|
|
187
|
if ($tok->{content} =~ /ID\s*=\s*(['"])/i) { |
|
1087
|
8
|
|
|
|
|
18
|
my $q = $1; |
|
1088
|
8
|
|
|
|
|
95
|
($name) = $tok->{content} =~ m/ID\s*=\s*$q([^$q]*)$q/i; |
|
1089
|
8
|
|
|
|
|
18
|
$adone = 1; |
|
1090
|
|
|
|
|
|
|
} |
|
1091
|
69
|
|
|
|
|
152
|
while (@tree) { |
|
1092
|
310
|
|
|
|
|
421
|
$tok = shift @tree; |
|
1093
|
|
|
|
|
|
|
# Text |
|
1094
|
310
|
100
|
66
|
|
|
1914
|
if ($tok->{type} eq 'text') { |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
1095
|
95
|
|
|
|
|
172
|
$content .= $tok->{content}; |
|
1096
|
95
|
50
|
|
|
|
301
|
if ($args{debug}) { |
|
1097
|
0
|
|
|
|
|
0
|
print STDERR "tok-content = ", $tok->{content}, "\n"; |
|
1098
|
0
|
|
|
|
|
0
|
print STDERR "content = $content\n"; |
|
1099
|
|
|
|
|
|
|
} |
|
1100
|
|
|
|
|
|
|
# Anchor |
|
1101
|
|
|
|
|
|
|
} elsif (!$adone && $tok->{type} eq 'starttag' |
|
1102
|
|
|
|
|
|
|
&& $tok->{content} =~ /^A/i) |
|
1103
|
|
|
|
|
|
|
{ |
|
1104
|
61
|
50
|
|
|
|
380
|
if ($tok->{content} =~ /NAME\s*=\s*(['"])/i) { |
|
1105
|
61
|
|
|
|
|
142
|
my $q = $1; |
|
1106
|
61
|
|
|
|
|
473
|
($name) = $tok->{content} =~ m/NAME\s*=\s*$q([^$q]*)$q/i; |
|
1107
|
61
|
|
|
|
|
193
|
$adone = 1; |
|
1108
|
|
|
|
|
|
|
} |
|
1109
|
|
|
|
|
|
|
} elsif ($tok->{type} eq 'starttag' |
|
1110
|
|
|
|
|
|
|
|| $tok->{type} eq 'endtag') |
|
1111
|
|
|
|
|
|
|
{ # Tag |
|
1112
|
154
|
50
|
|
|
|
376
|
if ($args{debug}) { |
|
1113
|
0
|
|
|
|
|
0
|
print STDERR "file = ", $infile, |
|
1114
|
|
|
|
|
|
|
" tag = $tag, endtag = '$endtag", |
|
1115
|
|
|
|
|
|
|
"' tok-type = ", $tok->{type}, |
|
1116
|
|
|
|
|
|
|
" tok-content = '", $tok->{content}, "'\n"; |
|
1117
|
|
|
|
|
|
|
} |
|
1118
|
154
|
100
|
|
|
|
735
|
last if $tok->{content} =~ m#$endtag#i; |
|
1119
|
85
|
50
|
33
|
|
|
661
|
$content .= $hp->execute($tok) |
|
1120
|
|
|
|
|
|
|
unless $args{textonly} |
|
1121
|
|
|
|
|
|
|
|| $tok->{content} =~ m#/?(hr|p|a|img)#i; |
|
1122
|
|
|
|
|
|
|
} |
|
1123
|
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
} |
|
1125
|
69
|
50
|
|
|
|
144
|
if ($args{debug}) { |
|
1126
|
0
|
|
|
|
|
0
|
print STDERR "Chosen content:'$content'\n"; |
|
1127
|
|
|
|
|
|
|
} |
|
1128
|
|
|
|
|
|
|
|
|
1129
|
69
|
50
|
|
|
|
218
|
if ($content =~ /^\s*$/) { # Check for empty content |
|
1130
|
0
|
|
|
|
|
0
|
warn "Warning: A $tag in $infile has no content; $tag skipped\n"; |
|
1131
|
0
|
|
|
|
|
0
|
next; |
|
1132
|
|
|
|
|
|
|
} else { |
|
1133
|
69
|
|
|
|
|
168
|
$content =~ s/^\s+//; # Strip beginning whitespace |
|
1134
|
69
|
|
|
|
|
171
|
$content =~ s/\s+$//; # Strip end whitespace |
|
1135
|
69
|
|
|
|
|
137
|
$content = $before . $content . $after; |
|
1136
|
|
|
|
|
|
|
} |
|
1137
|
|
|
|
|
|
|
# figure out the anchor link needed |
|
1138
|
69
|
|
|
|
|
84
|
my $link = ''; |
|
1139
|
69
|
100
|
66
|
|
|
239
|
if ($args{inline} and $args{first_file} eq $infile) |
|
1140
|
|
|
|
|
|
|
{ |
|
1141
|
19
|
50
|
|
|
|
57
|
$link = (!$is_title ? qq|#$name| : ''); |
|
1142
|
|
|
|
|
|
|
} |
|
1143
|
|
|
|
|
|
|
else |
|
1144
|
|
|
|
|
|
|
{ |
|
1145
|
50
|
50
|
|
|
|
197
|
$link .= join('', |
|
1146
|
|
|
|
|
|
|
qq|$infile|, |
|
1147
|
|
|
|
|
|
|
!$is_title ? qq|#$name| : ''); |
|
1148
|
|
|
|
|
|
|
} |
|
1149
|
|
|
|
|
|
|
# Assert: we know the info about this TOC entry |
|
1150
|
69
|
|
|
|
|
266
|
push @list_of_paths, { |
|
1151
|
|
|
|
|
|
|
level=>$level, |
|
1152
|
|
|
|
|
|
|
path=>$link, |
|
1153
|
|
|
|
|
|
|
}; |
|
1154
|
69
|
|
|
|
|
181
|
$labels->{$link} = $content; |
|
1155
|
69
|
|
|
|
|
134
|
$level_count{$level}++; |
|
1156
|
|
|
|
|
|
|
|
|
1157
|
69
|
|
|
|
|
80
|
$name = 'NOTOC'; |
|
1158
|
69
|
|
|
|
|
171
|
$prevnoli = $noli; |
|
1159
|
|
|
|
|
|
|
} # while tree |
|
1160
|
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
# If we want to ignore the first H1 if there's only one of them |
|
1162
|
|
|
|
|
|
|
# if the first item is a level-0 item |
|
1163
|
|
|
|
|
|
|
# and there is only one of them |
|
1164
|
|
|
|
|
|
|
# then remove it and readjust levels |
|
1165
|
17
|
100
|
66
|
|
|
155
|
if ($args{ignore_sole_first} |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
and $level_count{"1"} == 1 |
|
1167
|
|
|
|
|
|
|
and $list_of_paths[0]->{level} == 1) |
|
1168
|
|
|
|
|
|
|
{ |
|
1169
|
1
|
|
|
|
|
2
|
shift @list_of_paths; |
|
1170
|
1
|
|
|
|
|
6
|
for (my $i = 0; $i < @list_of_paths; $i++) |
|
1171
|
|
|
|
|
|
|
{ |
|
1172
|
1
|
|
|
|
|
4
|
$list_of_paths[$i]->{level}--; |
|
1173
|
|
|
|
|
|
|
} |
|
1174
|
|
|
|
|
|
|
} |
|
1175
|
|
|
|
|
|
|
elsif ($args{ignore_only_one} |
|
1176
|
|
|
|
|
|
|
and @list_of_paths == 1) |
|
1177
|
|
|
|
|
|
|
{ |
|
1178
|
1
|
|
|
|
|
12
|
return (); |
|
1179
|
|
|
|
|
|
|
} |
|
1180
|
|
|
|
|
|
|
|
|
1181
|
16
|
|
|
|
|
39
|
my @list_of_lists = (); |
|
1182
|
16
|
|
|
|
|
90
|
@list_of_lists = $self->build_lol( |
|
1183
|
|
|
|
|
|
|
paths=>\@list_of_paths); |
|
1184
|
|
|
|
|
|
|
|
|
1185
|
16
|
|
|
|
|
956
|
return @list_of_lists; |
|
1186
|
|
|
|
|
|
|
} # make_toc_list |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
=head2 build_lol |
|
1189
|
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
Build a list of lists of paths, given a list |
|
1191
|
|
|
|
|
|
|
of hashes with info about paths. |
|
1192
|
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
=cut |
|
1194
|
|
|
|
|
|
|
sub build_lol { |
|
1195
|
36
|
|
|
36
|
1
|
53
|
my $self = shift; |
|
1196
|
36
|
|
|
|
|
229
|
my %args = ( |
|
1197
|
|
|
|
|
|
|
paths=>undef, |
|
1198
|
|
|
|
|
|
|
depth=>1, |
|
1199
|
|
|
|
|
|
|
prepend_list=>undef, |
|
1200
|
|
|
|
|
|
|
append_list=>undef, |
|
1201
|
|
|
|
|
|
|
@_ |
|
1202
|
|
|
|
|
|
|
); |
|
1203
|
36
|
|
|
|
|
66
|
my $paths_ref = $args{paths}; |
|
1204
|
36
|
|
|
|
|
47
|
my $depth = $args{depth}; |
|
1205
|
|
|
|
|
|
|
|
|
1206
|
36
|
|
|
|
|
54
|
my @list_of_lists = (); |
|
1207
|
36
|
|
|
|
|
40
|
while (@{$paths_ref}) |
|
|
123
|
|
|
|
|
300
|
|
|
1208
|
|
|
|
|
|
|
{ |
|
1209
|
96
|
|
|
|
|
149
|
my $toc_entry = $paths_ref->[0]; |
|
1210
|
96
|
|
|
|
|
130
|
my $path_depth = $toc_entry->{level}; |
|
1211
|
96
|
|
|
|
|
124
|
my $path = $toc_entry->{path}; |
|
1212
|
96
|
100
|
|
|
|
428
|
if ($path_depth == $depth) |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
{ |
|
1214
|
67
|
|
|
|
|
75
|
shift @{$paths_ref}; # use this path |
|
|
67
|
|
|
|
|
96
|
|
|
1215
|
67
|
|
|
|
|
159
|
push @list_of_lists, $path; |
|
1216
|
|
|
|
|
|
|
} |
|
1217
|
|
|
|
|
|
|
elsif ($path_depth > $depth) |
|
1218
|
|
|
|
|
|
|
{ |
|
1219
|
20
|
|
|
|
|
117
|
push @list_of_lists, [$self->build_lol( |
|
1220
|
|
|
|
|
|
|
%args, |
|
1221
|
|
|
|
|
|
|
prepend_list=>undef, |
|
1222
|
|
|
|
|
|
|
append_list=>undef, |
|
1223
|
|
|
|
|
|
|
paths=>$paths_ref, |
|
1224
|
|
|
|
|
|
|
depth=>$path_depth, |
|
1225
|
|
|
|
|
|
|
)]; |
|
1226
|
|
|
|
|
|
|
} |
|
1227
|
|
|
|
|
|
|
elsif ($path_depth < $depth) |
|
1228
|
|
|
|
|
|
|
{ |
|
1229
|
9
|
|
|
|
|
69
|
return @list_of_lists; |
|
1230
|
|
|
|
|
|
|
} |
|
1231
|
|
|
|
|
|
|
} |
|
1232
|
|
|
|
|
|
|
# prepend the given list to the top level |
|
1233
|
27
|
50
|
33
|
|
|
90
|
if (defined $args{prepend_list} and @{$args{prepend_list}}) |
|
|
0
|
|
|
|
|
0
|
|
|
1234
|
|
|
|
|
|
|
{ |
|
1235
|
|
|
|
|
|
|
# if the list of lists is a single item which is a list |
|
1236
|
|
|
|
|
|
|
# then add the extra list to that item |
|
1237
|
0
|
0
|
0
|
|
|
0
|
if ($#list_of_lists == 0 |
|
1238
|
|
|
|
|
|
|
and ref($list_of_lists[0]) eq "ARRAY") |
|
1239
|
|
|
|
|
|
|
{ |
|
1240
|
0
|
|
|
|
|
0
|
unshift @{$list_of_lists[0]}, @{$args{prepend_list}}; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1241
|
|
|
|
|
|
|
} |
|
1242
|
|
|
|
|
|
|
else |
|
1243
|
|
|
|
|
|
|
{ |
|
1244
|
0
|
|
|
|
|
0
|
unshift @list_of_lists, @{$args{prepend_list}}; |
|
|
0
|
|
|
|
|
0
|
|
|
1245
|
|
|
|
|
|
|
} |
|
1246
|
|
|
|
|
|
|
} |
|
1247
|
|
|
|
|
|
|
# append the given list to the top level |
|
1248
|
27
|
50
|
33
|
|
|
94
|
if (defined $args{append_list} and @{$args{append_list}}) |
|
|
0
|
|
|
|
|
0
|
|
|
1249
|
|
|
|
|
|
|
{ |
|
1250
|
|
|
|
|
|
|
# if the list of lists is a single item which is a list |
|
1251
|
|
|
|
|
|
|
# then add the extra list to that item |
|
1252
|
0
|
0
|
0
|
|
|
0
|
if ($#list_of_lists == 0 |
|
1253
|
|
|
|
|
|
|
and ref($list_of_lists[0]) eq "ARRAY") |
|
1254
|
|
|
|
|
|
|
{ |
|
1255
|
0
|
|
|
|
|
0
|
push @{$list_of_lists[0]}, @{$args{append_list}}; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1256
|
|
|
|
|
|
|
} |
|
1257
|
|
|
|
|
|
|
else |
|
1258
|
|
|
|
|
|
|
{ |
|
1259
|
0
|
|
|
|
|
0
|
push @list_of_lists, @{$args{append_list}}; |
|
|
0
|
|
|
|
|
0
|
|
|
1260
|
|
|
|
|
|
|
} |
|
1261
|
|
|
|
|
|
|
} |
|
1262
|
27
|
|
|
|
|
132
|
return @list_of_lists; |
|
1263
|
|
|
|
|
|
|
} # build_lol |
|
1264
|
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
=head2 output_toc |
|
1266
|
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
$self->output_toc(toc=>$toc_str, |
|
1268
|
|
|
|
|
|
|
input=>\@input, |
|
1269
|
|
|
|
|
|
|
filenames=>\@filenames); |
|
1270
|
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
Put the output (whether to file, STDOUT or string). |
|
1272
|
|
|
|
|
|
|
The "output" in this case could be the ToC, the modified |
|
1273
|
|
|
|
|
|
|
(anchors added) HTML, or both. |
|
1274
|
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
=cut |
|
1276
|
|
|
|
|
|
|
sub output_toc ($%) { |
|
1277
|
27
|
|
|
27
|
1
|
58
|
my $self = shift; |
|
1278
|
27
|
|
|
|
|
667
|
my %args = ( |
|
1279
|
|
|
|
|
|
|
toc=>'', |
|
1280
|
|
|
|
|
|
|
input=>undef, |
|
1281
|
|
|
|
|
|
|
filenames=>undef, |
|
1282
|
|
|
|
|
|
|
bak=>$self->{bak}, |
|
1283
|
|
|
|
|
|
|
useorg=>$self->{useorg}, |
|
1284
|
|
|
|
|
|
|
inline=>$self->{inline}, |
|
1285
|
|
|
|
|
|
|
overwrite=>$self->{overwrite}, |
|
1286
|
|
|
|
|
|
|
to_string=>$self->{to_string}, |
|
1287
|
|
|
|
|
|
|
header=>$self->{header}, |
|
1288
|
|
|
|
|
|
|
footer=>$self->{footer}, |
|
1289
|
|
|
|
|
|
|
toc_only=>$self->{toc_only}, |
|
1290
|
|
|
|
|
|
|
title=>$self->{title}, |
|
1291
|
|
|
|
|
|
|
toclabel=>$self->{toclabel}, |
|
1292
|
|
|
|
|
|
|
outfile=>$self->{outfile}, |
|
1293
|
|
|
|
|
|
|
debug=>$self->{debug}, |
|
1294
|
|
|
|
|
|
|
quiet=>$self->{quiet}, |
|
1295
|
|
|
|
|
|
|
@_ |
|
1296
|
|
|
|
|
|
|
); |
|
1297
|
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
# |
|
1299
|
|
|
|
|
|
|
# Output to the files if we were making anchors |
|
1300
|
|
|
|
|
|
|
# |
|
1301
|
27
|
50
|
100
|
|
|
168
|
if ($args{make_anchors} |
|
|
|
|
66
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
&& !$args{to_string} |
|
1303
|
|
|
|
|
|
|
&& $args{overwrite}) |
|
1304
|
|
|
|
|
|
|
{ |
|
1305
|
0
|
|
|
|
|
0
|
my $ofh; |
|
1306
|
|
|
|
|
|
|
# start from 1 if we're going to be inlining the toc |
|
1307
|
|
|
|
|
|
|
# in the first file and not to an output file |
|
1308
|
0
|
0
|
0
|
|
|
0
|
my $start_from = (($args{make_toc} |
|
1309
|
|
|
|
|
|
|
&& $args{inline} |
|
1310
|
|
|
|
|
|
|
&& !$args{outfile}) |
|
1311
|
|
|
|
|
|
|
? 1 : 0); |
|
1312
|
0
|
|
|
|
|
0
|
for (my $i=$start_from; $i < @{$args{filenames}}; $i++) |
|
|
0
|
|
|
|
|
0
|
|
|
1313
|
|
|
|
|
|
|
{ |
|
1314
|
0
|
|
|
|
|
0
|
my $filename = $args{filenames}->[$i]; |
|
1315
|
0
|
|
|
|
|
0
|
my $bakfile = $filename . "." . $args{bak}; |
|
1316
|
0
|
0
|
0
|
|
|
0
|
if ($args{bak} |
|
|
|
|
0
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
&& !($args{useorg} && -e $bakfile)) |
|
1318
|
|
|
|
|
|
|
{ |
|
1319
|
|
|
|
|
|
|
# copy the file to a backup |
|
1320
|
0
|
0
|
|
|
|
0
|
print STDERR "Backing up ", $filename, " to ", |
|
1321
|
|
|
|
|
|
|
$bakfile, "\n" |
|
1322
|
|
|
|
|
|
|
unless $args{quiet}; |
|
1323
|
0
|
|
|
|
|
0
|
cp($filename, $bakfile); |
|
1324
|
|
|
|
|
|
|
} |
|
1325
|
0
|
0
|
|
|
|
0
|
open($ofh, "> $filename") |
|
1326
|
|
|
|
|
|
|
|| die "Error: unable to open ", $filename, ": $!\n"; |
|
1327
|
0
|
0
|
|
|
|
0
|
print STDERR "Overwriting ToC to ", $filename, "\n" |
|
1328
|
|
|
|
|
|
|
unless $args{quiet}; |
|
1329
|
0
|
|
|
|
|
0
|
print $ofh $args{input}->[$i]; |
|
1330
|
0
|
|
|
|
|
0
|
close($ofh); |
|
1331
|
|
|
|
|
|
|
} |
|
1332
|
|
|
|
|
|
|
} |
|
1333
|
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
# |
|
1335
|
|
|
|
|
|
|
# Construct and output the ToC |
|
1336
|
|
|
|
|
|
|
# |
|
1337
|
27
|
|
|
|
|
46
|
my $output = ''; |
|
1338
|
27
|
100
|
33
|
|
|
141
|
if ($args{make_toc}) |
|
|
|
50
|
33
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
{ |
|
1340
|
17
|
100
|
|
|
|
45
|
if ($args{toc}) |
|
1341
|
|
|
|
|
|
|
{ |
|
1342
|
16
|
|
|
|
|
33
|
my @toc = (); |
|
1343
|
|
|
|
|
|
|
# put the header at the start of the ToC if there is one |
|
1344
|
16
|
50
|
66
|
|
|
138
|
if ($args{header}) { |
|
|
|
100
|
|
|
|
|
|
|
1345
|
0
|
0
|
|
|
|
0
|
if (-f $args{header}) |
|
1346
|
|
|
|
|
|
|
{ |
|
1347
|
0
|
0
|
|
|
|
0
|
open(HEADER, $args{header}) |
|
1348
|
|
|
|
|
|
|
|| die "Error: unable to open ", $args{header}, ": $!\n"; |
|
1349
|
0
|
|
|
|
|
0
|
push @toc, |
|
1350
|
0
|
|
|
|
|
0
|
close (HEADER); |
|
1351
|
|
|
|
|
|
|
} |
|
1352
|
|
|
|
|
|
|
else # not a file |
|
1353
|
|
|
|
|
|
|
{ |
|
1354
|
0
|
|
|
|
|
0
|
push @toc, $args{header}; |
|
1355
|
|
|
|
|
|
|
} |
|
1356
|
|
|
|
|
|
|
} |
|
1357
|
|
|
|
|
|
|
# if we are outputing a standalone page, |
|
1358
|
|
|
|
|
|
|
# then make sure it can stand |
|
1359
|
|
|
|
|
|
|
elsif (!$args{toc_only} |
|
1360
|
|
|
|
|
|
|
&& !$args{inline}) { |
|
1361
|
|
|
|
|
|
|
|
|
1362
|
12
|
|
|
|
|
35
|
push @toc, qq|\n|, |
|
1363
|
|
|
|
|
|
|
"\n", |
|
1364
|
|
|
|
|
|
|
"\n"; |
|
1365
|
12
|
50
|
|
|
|
46
|
push @toc, "", $args{title}, "\n" if $args{title}; |
|
1366
|
12
|
|
|
|
|
30
|
push @toc, "\n", |
|
1367
|
|
|
|
|
|
|
"\n"; |
|
1368
|
|
|
|
|
|
|
} |
|
1369
|
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
# start the ToC with the ToC label |
|
1371
|
16
|
100
|
|
|
|
51
|
if ($args{toclabel}) { |
|
1372
|
15
|
|
|
|
|
31
|
push @toc, $args{toclabel}; |
|
1373
|
|
|
|
|
|
|
} |
|
1374
|
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
# and the actual ToC |
|
1376
|
16
|
|
|
|
|
35
|
push @toc, "\n", $args{toc}, "\n"; |
|
1377
|
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
# add the footer, if there is one |
|
1379
|
16
|
50
|
66
|
|
|
118
|
if ($args{footer}) { |
|
|
|
100
|
|
|
|
|
|
|
1380
|
0
|
0
|
|
|
|
0
|
if (-f $args{footer}) |
|
1381
|
|
|
|
|
|
|
{ |
|
1382
|
0
|
0
|
|
|
|
0
|
open(FOOTER, $args{footer}) |
|
1383
|
|
|
|
|
|
|
|| die "Error: unable to open ", $args{footer}, ": $!\n"; |
|
1384
|
0
|
|
|
|
|
0
|
push @toc, |
|
1385
|
0
|
|
|
|
|
0
|
close (FOOTER); |
|
1386
|
|
|
|
|
|
|
} |
|
1387
|
|
|
|
|
|
|
else |
|
1388
|
|
|
|
|
|
|
{ |
|
1389
|
0
|
|
|
|
|
0
|
push @toc, $args{footer}; |
|
1390
|
|
|
|
|
|
|
} |
|
1391
|
|
|
|
|
|
|
} |
|
1392
|
|
|
|
|
|
|
# if we are outputing a standalone page, |
|
1393
|
|
|
|
|
|
|
# then make sure it can stand |
|
1394
|
|
|
|
|
|
|
elsif (!$args{toc_only} |
|
1395
|
|
|
|
|
|
|
&& !$args{inline}) { |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
12
|
|
|
|
|
28
|
push @toc, "\n", "\n"; |
|
1398
|
|
|
|
|
|
|
} |
|
1399
|
|
|
|
|
|
|
|
|
1400
|
16
|
|
|
|
|
74
|
$output = join '', @toc; |
|
1401
|
|
|
|
|
|
|
} |
|
1402
|
|
|
|
|
|
|
else |
|
1403
|
|
|
|
|
|
|
{ |
|
1404
|
1
|
|
|
|
|
2
|
$output = "\n"; |
|
1405
|
|
|
|
|
|
|
} |
|
1406
|
|
|
|
|
|
|
} |
|
1407
|
|
|
|
|
|
|
elsif ($args{make_anchors} && (!$args{overwrite} || $args{to_string})) |
|
1408
|
|
|
|
|
|
|
{ |
|
1409
|
|
|
|
|
|
|
# if we're just making anchors, and we aren't overwriting |
|
1410
|
|
|
|
|
|
|
# the original file, we need to output it |
|
1411
|
10
|
|
|
|
|
25
|
$output = $args{input}->[0]; |
|
1412
|
|
|
|
|
|
|
} |
|
1413
|
|
|
|
|
|
|
|
|
1414
|
27
|
50
|
|
|
|
65
|
if ($output) |
|
1415
|
|
|
|
|
|
|
{ |
|
1416
|
|
|
|
|
|
|
# |
|
1417
|
|
|
|
|
|
|
# Sent the outfile to its final destination |
|
1418
|
|
|
|
|
|
|
# |
|
1419
|
27
|
|
|
|
|
40
|
my $file_needs_closing = 0; |
|
1420
|
27
|
|
|
|
|
46
|
my $ofh; |
|
1421
|
27
|
100
|
100
|
|
|
158
|
if ($args{to_string}) |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
{ |
|
1423
|
8
|
|
|
|
|
11
|
$ofh = undef; |
|
1424
|
|
|
|
|
|
|
} |
|
1425
|
|
|
|
|
|
|
elsif ($args{outfile} && $args{outfile} ne "-") { |
|
1426
|
17
|
50
|
|
|
|
2986
|
open($ofh, "> " . $args{outfile}) |
|
1427
|
|
|
|
|
|
|
|| die "Error: unable to open ", $args{outfile}, ": $!\n"; |
|
1428
|
17
|
|
|
|
|
55
|
$file_needs_closing = 1; |
|
1429
|
|
|
|
|
|
|
} |
|
1430
|
|
|
|
|
|
|
elsif (!$args{overwrite}) { |
|
1431
|
0
|
|
|
|
|
0
|
$ofh = *STDOUT; |
|
1432
|
0
|
|
|
|
|
0
|
$file_needs_closing = 0; |
|
1433
|
|
|
|
|
|
|
} |
|
1434
|
27
|
100
|
|
|
|
84
|
if ($args{inline}) { |
|
1435
|
|
|
|
|
|
|
# create the modified version of the first set of input |
|
1436
|
5
|
|
|
|
|
13
|
my $first_file = $args{filenames}->[0]; |
|
1437
|
5
|
|
|
|
|
14
|
my $bakfile = $first_file . "." . $args{bak}; |
|
1438
|
5
|
|
|
|
|
48
|
$output = $self->put_toc_inline(%args, |
|
1439
|
|
|
|
|
|
|
toc_str=>$output, |
|
1440
|
|
|
|
|
|
|
in_string=>$args{input}->[0], |
|
1441
|
|
|
|
|
|
|
filename=>$args{filenames}->[0], |
|
1442
|
|
|
|
|
|
|
); |
|
1443
|
|
|
|
|
|
|
|
|
1444
|
5
|
100
|
0
|
|
|
37
|
if ($args{to_string}) |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
{ |
|
1446
|
|
|
|
|
|
|
# just send to string, don't print anything |
|
1447
|
3
|
50
|
|
|
|
13
|
if ($args{debug}) |
|
1448
|
|
|
|
|
|
|
{ |
|
1449
|
0
|
|
|
|
|
0
|
print STDERR "======== to_string output_toc ========\n"; |
|
1450
|
0
|
|
|
|
|
0
|
print STDERR $output; |
|
1451
|
0
|
|
|
|
|
0
|
print STDERR "========----------------------========\n"; |
|
1452
|
|
|
|
|
|
|
} |
|
1453
|
|
|
|
|
|
|
} |
|
1454
|
|
|
|
|
|
|
elsif ($args{overwrite}) { |
|
1455
|
2
|
50
|
33
|
|
|
23
|
if ($args{bak} |
|
|
|
|
33
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
&& !($args{useorg} && -e $bakfile)) |
|
1457
|
|
|
|
|
|
|
{ |
|
1458
|
|
|
|
|
|
|
# copy the file to a backup |
|
1459
|
2
|
50
|
|
|
|
8
|
print STDERR "Backing up ", $first_file, " to ", |
|
1460
|
|
|
|
|
|
|
$bakfile, "\n" |
|
1461
|
|
|
|
|
|
|
unless $args{quiet}; |
|
1462
|
2
|
|
|
|
|
10
|
cp($first_file, $bakfile); |
|
1463
|
|
|
|
|
|
|
} |
|
1464
|
2
|
50
|
|
|
|
236
|
open($ofh, "> $first_file") |
|
1465
|
|
|
|
|
|
|
|| die "Error: unable to open ", $first_file, ": $!\n"; |
|
1466
|
2
|
|
|
|
|
8
|
$file_needs_closing = 1; |
|
1467
|
2
|
50
|
|
|
|
7
|
print STDERR "Overwriting ToC to ", $first_file, "\n" |
|
1468
|
|
|
|
|
|
|
unless $args{quiet}; |
|
1469
|
2
|
|
|
|
|
14
|
print $ofh $output; |
|
1470
|
|
|
|
|
|
|
} |
|
1471
|
|
|
|
|
|
|
elsif ($args{outfile} |
|
1472
|
|
|
|
|
|
|
&& $args{outfile} ne "-") { |
|
1473
|
0
|
0
|
|
|
|
0
|
print STDERR "Writing Inline ToC to ", $args{outfile}, "\n" |
|
1474
|
|
|
|
|
|
|
unless $args{quiet}; |
|
1475
|
0
|
|
|
|
|
0
|
print $ofh $output; |
|
1476
|
|
|
|
|
|
|
} |
|
1477
|
|
|
|
|
|
|
elsif ($args{outfile}) |
|
1478
|
|
|
|
|
|
|
{ |
|
1479
|
0
|
|
|
|
|
0
|
print $ofh $output; |
|
1480
|
|
|
|
|
|
|
} |
|
1481
|
|
|
|
|
|
|
} else { |
|
1482
|
22
|
100
|
33
|
|
|
135
|
if ($args{to_string}) |
|
|
|
50
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
{ |
|
1484
|
|
|
|
|
|
|
# just send to string, don't print anything |
|
1485
|
|
|
|
|
|
|
} |
|
1486
|
|
|
|
|
|
|
elsif ($args{outfile} && $args{outfile} ne "-") { |
|
1487
|
17
|
50
|
|
|
|
60
|
print STDERR "Writing ToC to ", $args{outfile}, "\n" |
|
1488
|
|
|
|
|
|
|
unless $args{quiet}; |
|
1489
|
17
|
|
|
|
|
358
|
print $ofh $output; |
|
1490
|
|
|
|
|
|
|
} |
|
1491
|
|
|
|
|
|
|
else |
|
1492
|
|
|
|
|
|
|
{ |
|
1493
|
0
|
|
|
|
|
0
|
print $ofh $output; |
|
1494
|
|
|
|
|
|
|
} |
|
1495
|
|
|
|
|
|
|
} |
|
1496
|
27
|
100
|
|
|
|
79
|
if ($file_needs_closing) { |
|
1497
|
19
|
|
|
|
|
1228
|
close($ofh); |
|
1498
|
|
|
|
|
|
|
} |
|
1499
|
|
|
|
|
|
|
} |
|
1500
|
|
|
|
|
|
|
|
|
1501
|
27
|
100
|
|
|
|
109
|
if ($args{to_string}) |
|
1502
|
|
|
|
|
|
|
{ |
|
1503
|
8
|
|
|
|
|
43
|
return $output; |
|
1504
|
|
|
|
|
|
|
} |
|
1505
|
|
|
|
|
|
|
else |
|
1506
|
|
|
|
|
|
|
{ |
|
1507
|
19
|
|
|
|
|
137
|
return 1; |
|
1508
|
|
|
|
|
|
|
} |
|
1509
|
|
|
|
|
|
|
} # output_toc |
|
1510
|
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
=head2 put_toc_inline |
|
1512
|
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
my $newhtml = $toc->put_toc_inline(toc_str=>$toc_str, |
|
1514
|
|
|
|
|
|
|
filename=>$filename, in_string=>$in_string); |
|
1515
|
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
Puts the given toc_str into the given input string; |
|
1517
|
|
|
|
|
|
|
returns a string. |
|
1518
|
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
=cut |
|
1520
|
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
sub put_toc_inline ($) { |
|
1522
|
5
|
|
|
5
|
1
|
11
|
my $self = shift; |
|
1523
|
5
|
|
|
|
|
69
|
my %args = ( |
|
1524
|
|
|
|
|
|
|
toc_str=>'', |
|
1525
|
|
|
|
|
|
|
filename=>'', |
|
1526
|
|
|
|
|
|
|
in_string=>'', |
|
1527
|
|
|
|
|
|
|
toc_tag=>$self->{toc_tag}, |
|
1528
|
|
|
|
|
|
|
toc_tag_replace=>$self->{toc_tag_replace}, |
|
1529
|
|
|
|
|
|
|
@_ |
|
1530
|
|
|
|
|
|
|
); |
|
1531
|
5
|
|
|
|
|
9
|
my $toc_str = $args{toc_str}; |
|
1532
|
5
|
|
|
|
|
10
|
my $infile = $args{filename}; |
|
1533
|
|
|
|
|
|
|
|
|
1534
|
5
|
|
|
|
|
9
|
my $html_str = ""; |
|
1535
|
|
|
|
|
|
|
|
|
1536
|
5
|
50
|
|
|
|
15
|
if ($args{in_string}) # use input string, not file |
|
1537
|
|
|
|
|
|
|
{ |
|
1538
|
5
|
|
|
|
|
10
|
$html_str = $args{in_string}; |
|
1539
|
|
|
|
|
|
|
} |
|
1540
|
|
|
|
|
|
|
else |
|
1541
|
|
|
|
|
|
|
{ |
|
1542
|
0
|
|
|
|
|
0
|
local $/; |
|
1543
|
0
|
0
|
|
|
|
0
|
open (FILE, $infile) || |
|
1544
|
|
|
|
|
|
|
die "Error: unable to open ", $infile, ": $!\n"; |
|
1545
|
|
|
|
|
|
|
|
|
1546
|
0
|
|
|
|
|
0
|
$html_str = ; |
|
1547
|
0
|
|
|
|
|
0
|
close (FILE); |
|
1548
|
|
|
|
|
|
|
} |
|
1549
|
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
# parse the file |
|
1552
|
5
|
|
|
|
|
28
|
my $hp = new HTML::SimpleParse(); |
|
1553
|
5
|
|
|
|
|
71
|
$hp->text($html_str); |
|
1554
|
5
|
|
|
|
|
38
|
$hp->parse(); |
|
1555
|
|
|
|
|
|
|
|
|
1556
|
5
|
|
|
|
|
4899
|
my $toc_tag = $args{toc_tag}; |
|
1557
|
5
|
|
|
|
|
12
|
my @newhtml = (); |
|
1558
|
|
|
|
|
|
|
|
|
1559
|
5
|
|
|
|
|
10
|
my $toc_done = 0; |
|
1560
|
|
|
|
|
|
|
# go through the HTML |
|
1561
|
5
|
|
|
|
|
12
|
my $tok; |
|
1562
|
|
|
|
|
|
|
my $i; |
|
1563
|
5
|
|
|
|
|
22
|
my @tree = $hp->tree(); |
|
1564
|
5
|
|
|
|
|
147
|
while (@tree) { |
|
1565
|
580
|
|
|
|
|
5547
|
$tok = shift @tree; |
|
1566
|
|
|
|
|
|
|
# look for the ToC tag in tags or comments |
|
1567
|
580
|
100
|
100
|
|
|
7564
|
if ($tok->{type} eq 'starttag' |
|
|
|
|
100
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
|| $tok->{type} eq 'endtag' |
|
1569
|
|
|
|
|
|
|
|| $tok->{type} eq 'comment') |
|
1570
|
|
|
|
|
|
|
{ |
|
1571
|
315
|
100
|
100
|
|
|
981
|
if (!$toc_done |
|
1572
|
|
|
|
|
|
|
&& $tok->{content} =~ m|$toc_tag|i) { |
|
1573
|
|
|
|
|
|
|
# some tags need to be preserved, with the ToC put after, |
|
1574
|
|
|
|
|
|
|
# while others need to be replaced |
|
1575
|
5
|
100
|
|
|
|
21
|
if (!$args{toc_tag_replace}) { |
|
1576
|
4
|
|
|
|
|
13
|
push @newhtml, $hp->execute($tok); |
|
1577
|
|
|
|
|
|
|
} |
|
1578
|
|
|
|
|
|
|
# put the ToC in |
|
1579
|
5
|
|
|
|
|
45
|
push @newhtml, $toc_str; |
|
1580
|
5
|
|
|
|
|
14
|
$toc_done = 1; |
|
1581
|
|
|
|
|
|
|
} |
|
1582
|
|
|
|
|
|
|
else { |
|
1583
|
310
|
|
|
|
|
786
|
push @newhtml, $hp->execute($tok); |
|
1584
|
|
|
|
|
|
|
} |
|
1585
|
|
|
|
|
|
|
} |
|
1586
|
|
|
|
|
|
|
else |
|
1587
|
|
|
|
|
|
|
{ |
|
1588
|
265
|
|
|
|
|
677
|
push @newhtml, $hp->execute($tok); |
|
1589
|
265
|
|
|
|
|
2473
|
next; |
|
1590
|
|
|
|
|
|
|
} |
|
1591
|
|
|
|
|
|
|
} |
|
1592
|
|
|
|
|
|
|
|
|
1593
|
5
|
|
|
|
|
475
|
return join('', @newhtml); |
|
1594
|
|
|
|
|
|
|
} |
|
1595
|
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
=head2 cp |
|
1597
|
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
cp($src, $dst); |
|
1599
|
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
Copies file $src to $dst. |
|
1601
|
|
|
|
|
|
|
Used for making backups of files. |
|
1602
|
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
=cut |
|
1604
|
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
sub cp ($$) { |
|
1606
|
2
|
|
|
2
|
1
|
6
|
my($src, $dst) = @_; |
|
1607
|
2
|
50
|
|
|
|
172
|
open (SRC, $src) || |
|
1608
|
|
|
|
|
|
|
die "Error: unable to open ", $src, ": $!\n"; |
|
1609
|
2
|
50
|
|
|
|
252
|
open (DST, "> $dst") || |
|
1610
|
|
|
|
|
|
|
die "Error: unable to open ", $dst, ": $!\n"; |
|
1611
|
2
|
|
|
|
|
248
|
print DST ; |
|
1612
|
2
|
|
|
|
|
46
|
close(SRC); |
|
1613
|
2
|
|
|
|
|
126
|
close(DST); |
|
1614
|
|
|
|
|
|
|
} |
|
1615
|
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
1; |
|
1617
|
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
=head1 FILE FORMATS |
|
1619
|
|
|
|
|
|
|
|
|
1620
|
|
|
|
|
|
|
=head2 Formatting the ToC |
|
1621
|
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
The B and other related options give you control on how the |
|
1623
|
|
|
|
|
|
|
ToC entries may look, but there are other options to affect the final |
|
1624
|
|
|
|
|
|
|
appearance of the ToC file created. |
|
1625
|
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
With the B option, the contents of the given file (or string) |
|
1627
|
|
|
|
|
|
|
will be prepended before the generated ToC. This allows you to have |
|
1628
|
|
|
|
|
|
|
introductory text, or any other text, before the ToC. |
|
1629
|
|
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
=over |
|
1631
|
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
=item Note: |
|
1633
|
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
If you use the B option, make sure the file specified |
|
1635
|
|
|
|
|
|
|
contains the opening HTML tag, the HEAD element (containing the |
|
1636
|
|
|
|
|
|
|
TITLE element), and the opening BODY tag. However, these |
|
1637
|
|
|
|
|
|
|
tags/elements should not be in the header file if the B |
|
1638
|
|
|
|
|
|
|
option is used. See L for information on what |
|
1639
|
|
|
|
|
|
|
the header file should contain for inlining the ToC. |
|
1640
|
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
=back |
|
1642
|
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
With the B option, the contents of the given string will be |
|
1644
|
|
|
|
|
|
|
prepended before the generated ToC (but after any text taken from a |
|
1645
|
|
|
|
|
|
|
B |
|
1646
|
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
With the B |
|
1648
|
|
|
|
|
|
|
after the generated ToC. |
|
1649
|
|
|
|
|
|
|
|
|
1650
|
|
|
|
|
|
|
=over |
|
1651
|
|
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
|
=item Note: |
|
1653
|
|
|
|
|
|
|
|
|
1654
|
|
|
|
|
|
|
If you use the B |
|
1655
|
|
|
|
|
|
|
and HTML tags (unless, of course, you are using the B option). |
|
1656
|
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
=back |
|
1658
|
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
If the B option is not specified, the appropriate starting |
|
1660
|
|
|
|
|
|
|
HTML markup will be added, unless the B option is specified. |
|
1661
|
|
|
|
|
|
|
If the B |
|
1662
|
|
|
|
|
|
|
HTML markup will be added, unless the B option is specified. |
|
1663
|
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
If you do not want/need to deal with header, and footer, files, then |
|
1665
|
|
|
|
|
|
|
you are allowed to specify the title, B option, of the ToC file; |
|
1666
|
|
|
|
|
|
|
and it allows you to specify a heading, or label, to put before ToC |
|
1667
|
|
|
|
|
|
|
entries' list, the B option. Both options have default values. |
|
1668
|
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
If you do not want HTML page tags to be supplied, and just want |
|
1670
|
|
|
|
|
|
|
the ToC itself, then specify the B option. |
|
1671
|
|
|
|
|
|
|
If there are no B or B |
|
1672
|
|
|
|
|
|
|
output the contents of B and the ToC itself. |
|
1673
|
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
=head2 Inlining the ToC |
|
1675
|
|
|
|
|
|
|
|
|
1676
|
|
|
|
|
|
|
The ability to incorporate the ToC directly into an HTML document |
|
1677
|
|
|
|
|
|
|
is supported via the B option. |
|
1678
|
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
Inlining will be done on the first file in the list of files processed, |
|
1680
|
|
|
|
|
|
|
and will only be done if that file contains an opening tag matching the |
|
1681
|
|
|
|
|
|
|
B value. |
|
1682
|
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
If B is true, then the first file in the list will be |
|
1684
|
|
|
|
|
|
|
overwritten, with the generated ToC inserted at the appropriate spot. |
|
1685
|
|
|
|
|
|
|
Otherwise a modified version of the first file is output to either STDOUT |
|
1686
|
|
|
|
|
|
|
or to the output file defined by the B option. |
|
1687
|
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
The options B and B are used to determine where |
|
1689
|
|
|
|
|
|
|
and how the ToC is inserted into the output. |
|
1690
|
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
B |
|
1692
|
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
$toc->generate_toc(inline=>1, |
|
1694
|
|
|
|
|
|
|
toc_tag => 'BODY', |
|
1695
|
|
|
|
|
|
|
toc_tag_replace => 0, |
|
1696
|
|
|
|
|
|
|
... |
|
1697
|
|
|
|
|
|
|
); |
|
1698
|
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
This will put the generated ToC after the BODY tag of the first file. |
|
1700
|
|
|
|
|
|
|
If the B option is specified, then the contents of the specified |
|
1701
|
|
|
|
|
|
|
file are inserted after the BODY tag. If the B option is not |
|
1702
|
|
|
|
|
|
|
empty, then the text specified by the B option is inserted. |
|
1703
|
|
|
|
|
|
|
Then the ToC is inserted, and finally, if the B |
|
1704
|
|
|
|
|
|
|
specified, it inserts the footer. Then the rest of the input file |
|
1705
|
|
|
|
|
|
|
follows as it was before. |
|
1706
|
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
B |
|
1708
|
|
|
|
|
|
|
|
|
1709
|
|
|
|
|
|
|
$toc->generate_toc(inline=>1, |
|
1710
|
|
|
|
|
|
|
toc_tag => '!--toc--', |
|
1711
|
|
|
|
|
|
|
toc_tag_replace => 1, |
|
1712
|
|
|
|
|
|
|
... |
|
1713
|
|
|
|
|
|
|
); |
|
1714
|
|
|
|
|
|
|
|
|
1715
|
|
|
|
|
|
|
This will put the generated ToC after the first comment of the form |
|
1716
|
|
|
|
|
|
|
, and that comment will be replaced by the ToC |
|
1717
|
|
|
|
|
|
|
(in the order |
|
1718
|
|
|
|
|
|
|
B |
|
1719
|
|
|
|
|
|
|
B |
|
1720
|
|
|
|
|
|
|
ToC |
|
1721
|
|
|
|
|
|
|
B |
|
1722
|
|
|
|
|
|
|
followed by the rest of the input file. |
|
1723
|
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
=over |
|
1725
|
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
=item Note: |
|
1727
|
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
The header file should not contain the beginning HTML tag |
|
1729
|
|
|
|
|
|
|
and HEAD element since the HTML file being processed should |
|
1730
|
|
|
|
|
|
|
already contain these tags/elements. |
|
1731
|
|
|
|
|
|
|
|
|
1732
|
|
|
|
|
|
|
=back |
|
1733
|
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
=head1 NOTES |
|
1735
|
|
|
|
|
|
|
|
|
1736
|
|
|
|
|
|
|
=over |
|
1737
|
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
=item * |
|
1739
|
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
HTML::GenToc is smart enough to detect anchors inside significant |
|
1741
|
|
|
|
|
|
|
elements. If the anchor defines the NAME attribute, HTML::GenToc uses |
|
1742
|
|
|
|
|
|
|
the value. Else, it adds its own NAME attribute to the anchor. |
|
1743
|
|
|
|
|
|
|
If B is true, then it likewise checks for and uses IDs. |
|
1744
|
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
=item * |
|
1746
|
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
The TITLE element is treated specially if specified in the B |
|
1748
|
|
|
|
|
|
|
option. It is illegal to insert anchors (A) into TITLE elements. |
|
1749
|
|
|
|
|
|
|
Therefore, HTML::GenToc will actually link to the filename itself |
|
1750
|
|
|
|
|
|
|
instead of the TITLE element of the document. |
|
1751
|
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
=item * |
|
1753
|
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
HTML::GenToc will ignore a significant element if it does not contain |
|
1755
|
|
|
|
|
|
|
any non-whitespace characters. A warning message is generated if |
|
1756
|
|
|
|
|
|
|
such a condition exists. |
|
1757
|
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
=item * |
|
1759
|
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
If you have a sequence of significant elements that change in a slightly |
|
1761
|
|
|
|
|
|
|
disordered fashion, such as H1 -> H3 -> H2 or even H2 -> H1, though |
|
1762
|
|
|
|
|
|
|
HTML::GenToc deals with this to create a list which is still good HTML, if |
|
1763
|
|
|
|
|
|
|
you are using an ordered list to that depth, then you will get strange |
|
1764
|
|
|
|
|
|
|
numbering, as an extra list element will have been inserted to nest the |
|
1765
|
|
|
|
|
|
|
elements at the correct level. |
|
1766
|
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
For example (H2 -> H1 with ol_num_levels=1): |
|
1768
|
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
1. |
|
1770
|
|
|
|
|
|
|
* My H2 Header |
|
1771
|
|
|
|
|
|
|
2. My H1 Header |
|
1772
|
|
|
|
|
|
|
|
|
1773
|
|
|
|
|
|
|
For example (H1 -> H3 -> H2 with ol_num_levels=0 and H3 also being |
|
1774
|
|
|
|
|
|
|
significant): |
|
1775
|
|
|
|
|
|
|
|
|
1776
|
|
|
|
|
|
|
1. My H1 Header |
|
1777
|
|
|
|
|
|
|
1. |
|
1778
|
|
|
|
|
|
|
1. My H3 Header |
|
1779
|
|
|
|
|
|
|
2. My H2 Header |
|
1780
|
|
|
|
|
|
|
2. My Second H1 Header |
|
1781
|
|
|
|
|
|
|
|
|
1782
|
|
|
|
|
|
|
In cases such as this it may be better not to use the B option. |
|
1783
|
|
|
|
|
|
|
|
|
1784
|
|
|
|
|
|
|
=back |
|
1785
|
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
=head1 CAVEATS |
|
1787
|
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
=over |
|
1789
|
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
=item * |
|
1791
|
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
Version 3.10 (and above) generates more verbose (SEO-friendly) anchors |
|
1793
|
|
|
|
|
|
|
than prior versions. Thus anchors generated with earlier versions will |
|
1794
|
|
|
|
|
|
|
not match version 3.10 anchors. |
|
1795
|
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
=item * |
|
1797
|
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
Version 3.00 (and above) of HTML::GenToc is not compatible with |
|
1799
|
|
|
|
|
|
|
Version 2.x of HTML::GenToc. It is now designed to do everything |
|
1800
|
|
|
|
|
|
|
in one pass, and has dropped certain options: the B option |
|
1801
|
|
|
|
|
|
|
is no longer used (it has been replaced with the B option); |
|
1802
|
|
|
|
|
|
|
the B option no longer exists; use the B option |
|
1803
|
|
|
|
|
|
|
instead; the B option is no longer supported. Also the old |
|
1804
|
|
|
|
|
|
|
array-parsing of arguments is no longer supported. There is no longer |
|
1805
|
|
|
|
|
|
|
a B method; everything is done with B. |
|
1806
|
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
It now generates lower-case tags rather than upper-case ones. |
|
1808
|
|
|
|
|
|
|
|
|
1809
|
|
|
|
|
|
|
=item * |
|
1810
|
|
|
|
|
|
|
|
|
1811
|
|
|
|
|
|
|
HTML::GenToc is not very efficient (memory and speed), and can be |
|
1812
|
|
|
|
|
|
|
slow for large documents. |
|
1813
|
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
=item * |
|
1815
|
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
Now that generation of anchors and of the ToC are done in one pass, |
|
1817
|
|
|
|
|
|
|
even more memory is used than was the case before. This is more notable |
|
1818
|
|
|
|
|
|
|
when processing multiple files, since all files are read into memory |
|
1819
|
|
|
|
|
|
|
before processing them. |
|
1820
|
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
=item * |
|
1822
|
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
Invalid markup will be generated if a significant element is |
|
1824
|
|
|
|
|
|
|
contained inside of an anchor. For example: |
|
1825
|
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
The FOO command |
|
1827
|
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
will be converted to (if H1 is a significant element), |
|
1829
|
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
The FOO command |
|
1831
|
|
|
|
|
|
|
|
|
1832
|
|
|
|
|
|
|
which is illegal since anchors cannot be nested. |
|
1833
|
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
It is better style to put anchor statements within the element to |
|
1835
|
|
|
|
|
|
|
be anchored. For example, the following is preferred: |
|
1836
|
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
HTML::GenToc will detect the "foo" name and use it. |
|
1840
|
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
=item * |
|
1842
|
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
name attributes without quotes are not recognized. |
|
1844
|
|
|
|
|
|
|
|
|
1845
|
|
|
|
|
|
|
=back |
|
1846
|
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
=head1 BUGS |
|
1848
|
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
Tell me about them. |
|
1850
|
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
=head1 REQUIRES |
|
1852
|
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
The installation of this module requires C. The module |
|
1854
|
|
|
|
|
|
|
depends on C, C and C and uses |
|
1855
|
|
|
|
|
|
|
C for debugging purposes. The hypertoc script depends on |
|
1856
|
|
|
|
|
|
|
C, C and C. Testing of this |
|
1857
|
|
|
|
|
|
|
distribution depends on C. |
|
1858
|
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
=head1 INSTALLATION |
|
1860
|
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
To install this module, run the following commands: |
|
1862
|
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
perl Build.PL |
|
1864
|
|
|
|
|
|
|
./Build |
|
1865
|
|
|
|
|
|
|
./Build test |
|
1866
|
|
|
|
|
|
|
./Build install |
|
1867
|
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
Or, if you're on a platform (like DOS or Windows) that doesn't like the |
|
1869
|
|
|
|
|
|
|
"./" notation, you can do this: |
|
1870
|
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
perl Build.PL |
|
1872
|
|
|
|
|
|
|
perl Build |
|
1873
|
|
|
|
|
|
|
perl Build test |
|
1874
|
|
|
|
|
|
|
perl Build install |
|
1875
|
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
In order to install somewhere other than the default, such as |
|
1877
|
|
|
|
|
|
|
in a directory under your home directory, like "/home/fred/perl" |
|
1878
|
|
|
|
|
|
|
go |
|
1879
|
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
perl Build.PL --install_base /home/fred/perl |
|
1881
|
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
as the first step instead. |
|
1883
|
|
|
|
|
|
|
|
|
1884
|
|
|
|
|
|
|
This will install the files underneath /home/fred/perl. |
|
1885
|
|
|
|
|
|
|
|
|
1886
|
|
|
|
|
|
|
You will then need to make sure that you alter the PERL5LIB variable to |
|
1887
|
|
|
|
|
|
|
find the modules, and the PATH variable to find the script. |
|
1888
|
|
|
|
|
|
|
|
|
1889
|
|
|
|
|
|
|
Therefore you will need to change: |
|
1890
|
|
|
|
|
|
|
your path, to include /home/fred/perl/script (where the script will be) |
|
1891
|
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
PATH=/home/fred/perl/script:${PATH} |
|
1893
|
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
the PERL5LIB variable to add /home/fred/perl/lib |
|
1895
|
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
PERL5LIB=/home/fred/perl/lib:${PERL5LIB} |
|
1897
|
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
1899
|
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
perl(1) |
|
1901
|
|
|
|
|
|
|
htmltoc(1) |
|
1902
|
|
|
|
|
|
|
hypertoc(1) |
|
1903
|
|
|
|
|
|
|
|
|
1904
|
|
|
|
|
|
|
=head1 AUTHOR |
|
1905
|
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
Kathryn Andersen (RUBYKAT) http://www.katspace.org/tools/hypertoc/ |
|
1907
|
|
|
|
|
|
|
|
|
1908
|
|
|
|
|
|
|
Based on htmltoc by Earl Hood ehood AT medusa.acs.uci.edu |
|
1909
|
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
Contributions by Dan Dascalescu, |
|
1911
|
|
|
|
|
|
|
|
|
1912
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
1913
|
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
Copyright (C) 1994-1997 Earl Hood, ehood AT medusa.acs.uci.edu |
|
1915
|
|
|
|
|
|
|
Copyright (C) 2002-2008 Kathryn Andersen |
|
1916
|
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify |
|
1918
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
|
1919
|
|
|
|
|
|
|
the Free Software Foundation; either version 2 of the License, or |
|
1920
|
|
|
|
|
|
|
(at your option) any later version. |
|
1921
|
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
|
1923
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
1924
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
1925
|
|
|
|
|
|
|
GNU General Public License for more details. |
|
1926
|
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
|
1928
|
|
|
|
|
|
|
along with this program; if not, write to the Free Software |
|
1929
|
|
|
|
|
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|
1930
|
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
=cut |