line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::GenToc; |
2
|
|
|
|
|
|
|
BEGIN { |
3
|
3
|
|
|
3
|
|
68896
|
$HTML::GenToc::VERSION = '3.20'; |
4
|
|
|
|
|
|
|
} |
5
|
3
|
|
|
3
|
|
29
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
156
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
HTML::GenToc - Generate a Table of Contents for HTML documents. |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 VERSION |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
version 3.20 |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 SYNOPSIS |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use HTML::GenToc; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# create a new object |
20
|
|
|
|
|
|
|
my $toc = new HTML::GenToc(); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $toc = new HTML::GenToc(title=>"Table of Contents", |
23
|
|
|
|
|
|
|
toc_entry=>{ |
24
|
|
|
|
|
|
|
H1=>1, |
25
|
|
|
|
|
|
|
H2=>2 |
26
|
|
|
|
|
|
|
}, |
27
|
|
|
|
|
|
|
toc_end=>{ |
28
|
|
|
|
|
|
|
H1=>'/H1', |
29
|
|
|
|
|
|
|
H2=>'/H2' |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# generate a ToC from a file |
34
|
|
|
|
|
|
|
$toc->generate_toc(input=>$html_file, |
35
|
|
|
|
|
|
|
footer=>$footer_file, |
36
|
|
|
|
|
|
|
header=>$header_file |
37
|
|
|
|
|
|
|
); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
HTML::GenToc generates anchors and a table of contents for |
43
|
|
|
|
|
|
|
HTML documents. Depending on the arguments, it will insert |
44
|
|
|
|
|
|
|
the information it generates, or output to a string, a separate file |
45
|
|
|
|
|
|
|
or STDOUT. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
While it defaults to taking H1 and H2 elements as the significant |
48
|
|
|
|
|
|
|
elements to put into the table of contents, any tag can be defined |
49
|
|
|
|
|
|
|
as a significant element. Also, it doesn't matter if the input |
50
|
|
|
|
|
|
|
HTML code is complete, pure HTML, one can input pseudo-html |
51
|
|
|
|
|
|
|
or page-fragments, which makes it suitable for using on templates |
52
|
|
|
|
|
|
|
and HTML meta-languages such as WML. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Also included in the distrubution is hypertoc, a script which uses the |
55
|
|
|
|
|
|
|
module so that one can process files on the command-line in a |
56
|
|
|
|
|
|
|
user-friendly manner. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head1 DETAILS |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
The ToC generated is a multi-level level list containing links to the |
61
|
|
|
|
|
|
|
significant elements. HTML::GenToc inserts the links into the ToC to |
62
|
|
|
|
|
|
|
significant elements at a level specified by the user. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
B |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
If H1s are specified as level 1, than they appear in the first |
67
|
|
|
|
|
|
|
level list of the ToC. If H2s are specified as a level 2, than |
68
|
|
|
|
|
|
|
they appear in a second level list in the ToC. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
Information on the significant elements and what level they should occur |
71
|
|
|
|
|
|
|
are passed in to the methods used by this object, or one can use the |
72
|
|
|
|
|
|
|
defaults. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
There are two phases to the ToC generation. The first phase is to |
75
|
|
|
|
|
|
|
put suitable anchors into the HTML documents, and the second phase |
76
|
|
|
|
|
|
|
is to generate the ToC from HTML documents which have anchors |
77
|
|
|
|
|
|
|
in them for the ToC to link to. |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
For more information on controlling the contents of the created ToC, see |
80
|
|
|
|
|
|
|
L. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
HTML::GenToc also supports the ability to incorporate the ToC into the HTML |
83
|
|
|
|
|
|
|
document itself via the B option. See L for more |
84
|
|
|
|
|
|
|
information. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
In order for HTML::GenToc to support linking to significant elements, |
87
|
|
|
|
|
|
|
HTML::GenToc inserts anchors into the significant elements. One can |
88
|
|
|
|
|
|
|
use HTML::GenToc as a filter, outputing the result to another file, |
89
|
|
|
|
|
|
|
or one can overwrite the original file, with the original backed |
90
|
|
|
|
|
|
|
up with a suffix (default: "org") appended to the filename. |
91
|
|
|
|
|
|
|
One can also output the result to a string. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head1 METHODS |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Default arguments can be set when the object is created, and overridden |
96
|
|
|
|
|
|
|
by setting arguments when the generate_toc method is called. |
97
|
|
|
|
|
|
|
Arguments are given as a hash of arguments. |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=cut |
100
|
|
|
|
|
|
|
|
101
|
3
|
|
|
3
|
|
295009
|
use Data::Dumper; |
|
3
|
|
|
|
|
52080
|
|
|
3
|
|
|
|
|
250
|
|
102
|
3
|
|
|
3
|
|
3014
|
use HTML::SimpleParse; |
|
3
|
|
|
|
|
9713
|
|
|
3
|
|
|
|
|
123
|
|
103
|
3
|
|
|
3
|
|
3089
|
use HTML::Entities; |
|
3
|
|
|
|
|
31445
|
|
|
3
|
|
|
|
|
359
|
|
104
|
3
|
|
|
3
|
|
3614
|
use HTML::LinkList; |
|
3
|
|
|
|
|
18412
|
|
|
3
|
|
|
|
|
17519
|
|
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
################################################################# |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#---------------------------------------------------------------# |
109
|
|
|
|
|
|
|
# Object interface |
110
|
|
|
|
|
|
|
#---------------------------------------------------------------# |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head2 Method -- new |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
$toc = new HTML::GenToc(); |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
$toc = new HTML::GenToc(toc_entry=>\%my_toc_entry, |
117
|
|
|
|
|
|
|
toc_end=>\%my_toc_end, |
118
|
|
|
|
|
|
|
bak=>'bak', |
119
|
|
|
|
|
|
|
... |
120
|
|
|
|
|
|
|
); |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Creates a new HTML::GenToc object. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
These arguments will be used as defaults in invocations of other methods. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
See L for possible arguments. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=cut |
129
|
|
|
|
|
|
|
sub new { |
130
|
5
|
|
|
5
|
1
|
3380
|
my $invocant = shift; |
131
|
|
|
|
|
|
|
|
132
|
5
|
|
33
|
|
|
44
|
my $class = ref($invocant) || $invocant; # Object or class name |
133
|
5
|
|
|
|
|
155
|
my $self = { |
134
|
|
|
|
|
|
|
debug => 0, |
135
|
|
|
|
|
|
|
bak => 'org', |
136
|
|
|
|
|
|
|
entrysep => ', ', |
137
|
|
|
|
|
|
|
footer => '', |
138
|
|
|
|
|
|
|
inline => 0, |
139
|
|
|
|
|
|
|
header => '', |
140
|
|
|
|
|
|
|
input => '', |
141
|
|
|
|
|
|
|
notoc_match => 'class="notoc"', |
142
|
|
|
|
|
|
|
ol => 0, |
143
|
|
|
|
|
|
|
ol_num_levels => 1, |
144
|
|
|
|
|
|
|
overwrite => 0, |
145
|
|
|
|
|
|
|
outfile => '-', |
146
|
|
|
|
|
|
|
quiet => 0, |
147
|
|
|
|
|
|
|
textonly => 0, |
148
|
|
|
|
|
|
|
title => 'Table of Contents', |
149
|
|
|
|
|
|
|
toclabel => 'Table of Contents', |
150
|
|
|
|
|
|
|
toc_tag => '^BODY', |
151
|
|
|
|
|
|
|
toc_tag_replace => 0, |
152
|
|
|
|
|
|
|
toc_only => 0, |
153
|
|
|
|
|
|
|
# define TOC entry elements |
154
|
|
|
|
|
|
|
toc_entry => { |
155
|
|
|
|
|
|
|
'H1'=>1, |
156
|
|
|
|
|
|
|
'H2'=>2, |
157
|
|
|
|
|
|
|
}, |
158
|
|
|
|
|
|
|
# TOC entry element terminators |
159
|
|
|
|
|
|
|
toc_end => { |
160
|
|
|
|
|
|
|
'H1'=>'/H1', |
161
|
|
|
|
|
|
|
'H2'=>'/H2', |
162
|
|
|
|
|
|
|
}, |
163
|
|
|
|
|
|
|
useorg => 0, |
164
|
|
|
|
|
|
|
@_ |
165
|
|
|
|
|
|
|
}; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# bless self |
168
|
5
|
|
|
|
|
19
|
bless($self, $class); |
169
|
|
|
|
|
|
|
|
170
|
5
|
50
|
|
|
|
40
|
if ($self->{debug}) |
171
|
|
|
|
|
|
|
{ |
172
|
0
|
|
|
|
|
0
|
print STDERR Dumper($self); |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
5
|
|
|
|
|
33
|
return $self; |
176
|
|
|
|
|
|
|
} # new |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=head2 generate_toc |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
$toc->generate_toc(outfile=>"index2.html"); |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
my $result_str = $toc->generate_toc(to_string=>1); |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
Generates a table of contents for the significant elements in the HTML |
185
|
|
|
|
|
|
|
documents, optionally generating anchors for them first. |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
B |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=over |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=item bak |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
bak => I |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
If the input file/files is/are being overwritten (B is on), copy |
196
|
|
|
|
|
|
|
the original file to "I.I". If the value is empty, B |
197
|
|
|
|
|
|
|
backup file will be created. |
198
|
|
|
|
|
|
|
(default:org) |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=item debug |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
debug => 1 |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Enable verbose debugging output. Used for debugging this module; |
205
|
|
|
|
|
|
|
in other words, don't bother. |
206
|
|
|
|
|
|
|
(default:off) |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=item entrysep |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
entrysep => I |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
Separator string for non- item entries |
213
|
|
|
|
|
|
|
(default: ", ") |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=item filenames |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
filenames => \@filenames |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
The filenames to use when creating table-of-contents links. |
220
|
|
|
|
|
|
|
This overrides the filenames given in the B option, |
221
|
|
|
|
|
|
|
and is expected to have exactly the same number of elements. |
222
|
|
|
|
|
|
|
This can also be used when passing in string-content to the B |
223
|
|
|
|
|
|
|
option, to give a (fake) filename to use for the links relating |
224
|
|
|
|
|
|
|
to that content. |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item footer |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
footer => I |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Either the filename of the file containing footer text for ToC; |
231
|
|
|
|
|
|
|
or a string containing the footer text. |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=item header |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
header => I |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Either the filename of the file containing header text for ToC; |
238
|
|
|
|
|
|
|
or a string containing the header text. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=item ignore_only_one |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
ignore_only_one => 1 |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
If there would be only one item in the ToC, don't make a ToC. |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=item ignore_sole_first |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
ignore_sole_first => 1 |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
If the first item in the ToC is of the highest level, |
251
|
|
|
|
|
|
|
AND it is the only one of that level, ignore it. |
252
|
|
|
|
|
|
|
This is useful in web-pages where there is only one H1 header |
253
|
|
|
|
|
|
|
but one doesn't know beforehand whether there will be only one. |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=item inline |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
inline => 1 |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
Put ToC in document at a given point. |
260
|
|
|
|
|
|
|
See L for more information. |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=item input |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
input => \@filenames |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
input => $content |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
This is expected to be either a reference to an array of filenames, |
269
|
|
|
|
|
|
|
or a string containing content to process. |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
The three main uses would be: |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=over |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=item (a) |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
you have more than one file to process, so pass in multiple filenames |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=item (b) |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
you have one file to process, so pass in its filename as the only array item |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=item (c) |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
you have HTML content to process, so pass in just the content as a string |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=back |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
(default:undefined) |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=item notoc_match |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
notoc_match => I |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
If there are certain individual tags you don't wish to include in the |
296
|
|
|
|
|
|
|
table of contents, even though they match the "significant elements", |
297
|
|
|
|
|
|
|
then if this pattern matches contents inside the tag (not the body), |
298
|
|
|
|
|
|
|
then that tag will not be included, either in generating anchors nor in |
299
|
|
|
|
|
|
|
generating the ToC. (default: C) |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=item ol |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
ol => 1 |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
Use an ordered list for level 1 ToC entries. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=item ol_num_levels |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
ol_num_levels => 2 |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
The number of levels deep the OL listing will go if B is true. |
312
|
|
|
|
|
|
|
If set to zero, will use an ordered list for all levels. |
313
|
|
|
|
|
|
|
(default:1) |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=item overwrite |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
overwrite => 1 |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
Overwrite the input file with the output. |
320
|
|
|
|
|
|
|
(default:off) |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=item outfile |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
outfile => I |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
File to write the output to. This is where the modified HTML |
327
|
|
|
|
|
|
|
output goes to. Note that it doesn't make sense to use this option if you |
328
|
|
|
|
|
|
|
are processing more than one file. If you give '-' as the filename, then |
329
|
|
|
|
|
|
|
output will go to STDOUT. |
330
|
|
|
|
|
|
|
(default: STDOUT) |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=item quiet |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
quiet => 1 |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Suppress informative messages. (default: off) |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=item textonly |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
textonly => 1 |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
Use only text content in significant elements. |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=item title |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
title => I |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
Title for ToC page (if not using B |
349
|
|
|
|
|
|
|
(default: "Table of Contents") |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=item toc_after |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
toc_after => \%toc_after_data |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
%toc_after_data = { I => I, |
356
|
|
|
|
|
|
|
I => I |
357
|
|
|
|
|
|
|
}; |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
toc_after => { H2=>'' } |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
For defining layout of significant elements in the ToC. |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
This expects a reference to a hash of |
364
|
|
|
|
|
|
|
tag=>suffix pairs. |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
The I is the HTML tag which marks the start of the element. The |
367
|
|
|
|
|
|
|
I is what is required to be appended to the Table of Contents |
368
|
|
|
|
|
|
|
entry generated for that tag. |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
(default: undefined) |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=item toc_before |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
toc_before => \%toc_before_data |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
%toc_before_data = { I => I, |
377
|
|
|
|
|
|
|
I => I |
378
|
|
|
|
|
|
|
}; |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
toc_before=>{ H2=>'' } |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
For defining the layout of significant elements in the ToC. The I |
383
|
|
|
|
|
|
|
is the HTML tag which marks the start of the element. The I is |
384
|
|
|
|
|
|
|
what is required to be prepended to the Table of Contents entry |
385
|
|
|
|
|
|
|
generated for that tag. |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
(default: undefined) |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=item toc_end |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
toc_end => \%toc_end_data |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
%toc_end_data = { I => I, |
394
|
|
|
|
|
|
|
I => I |
395
|
|
|
|
|
|
|
}; |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
toc_end => { H1 => '/H1', H2 => '/H2' } |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
For defining significant elements. The I is the HTML tag which |
400
|
|
|
|
|
|
|
marks the start of the element. The I the HTML tag which marks |
401
|
|
|
|
|
|
|
the end of the element. When matching in the input file, case is |
402
|
|
|
|
|
|
|
ignored (but make sure that all your I options referring to the |
403
|
|
|
|
|
|
|
same tag are exactly the same!). |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=item toc_entry |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
toc_entry => \%toc_entry_data |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
%toc_entry_data = { I => I, |
410
|
|
|
|
|
|
|
I => I |
411
|
|
|
|
|
|
|
}; |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
toc_entry => { H1 => 1, H2 => 2 } |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
For defining significant elements. The I is the HTML tag which marks |
416
|
|
|
|
|
|
|
the start of the element. The I is what level the tag is considered |
417
|
|
|
|
|
|
|
to be. The value of I must be numeric, and non-zero. If the value |
418
|
|
|
|
|
|
|
is negative, consective entries represented by the significant_element will |
419
|
|
|
|
|
|
|
be separated by the value set by B option. |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item toclabel |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
toclabel => I |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
HTML text that labels the ToC. Always used. |
426
|
|
|
|
|
|
|
(default: "Table of Contents") |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=item toc_tag |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
toc_tag => I |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
If a ToC is to be included inline, this is the pattern which is used to |
433
|
|
|
|
|
|
|
match the tag where the ToC should be put. This can be a start-tag, an |
434
|
|
|
|
|
|
|
end-tag or a comment, but the E should be left out; that is, if you |
435
|
|
|
|
|
|
|
want the ToC to be placed after the BODY tag, then give "BODY". If you |
436
|
|
|
|
|
|
|
want a special comment tag to make where the ToC should go, then include |
437
|
|
|
|
|
|
|
the comment marks, for example: "!--toc--" (default:BODY) |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=item toc_tag_replace |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
toc_tag_replace => 1 |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
In conjunction with B, this is a flag to say whether the given tag |
444
|
|
|
|
|
|
|
should be replaced, or if the ToC should be put after the tag. |
445
|
|
|
|
|
|
|
This can be useful if your toc_tag is a comment and you don't need it |
446
|
|
|
|
|
|
|
after you have the ToC in place. |
447
|
|
|
|
|
|
|
(default:false) |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=item toc_only |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
toc_only => 1 |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
Output only the Table of Contents, that is, the Table of Contents plus |
454
|
|
|
|
|
|
|
the toclabel. If there is a B |
455
|
|
|
|
|
|
|
output. |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
If B is false then if there is no B |
458
|
|
|
|
|
|
|
not true, then a suitable HTML page header will be output, and if there |
459
|
|
|
|
|
|
|
is no B |
460
|
|
|
|
|
|
|
be output. |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
(default:false) |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=item to_string |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
to_string => 1 |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Return the modified HTML output as a string. This I override |
469
|
|
|
|
|
|
|
other methods of output (unlike version 3.00). If I is false, |
470
|
|
|
|
|
|
|
the method will return 1 rather than a string. |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=item use_id |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
use_id => 1 |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
Use id="I" for anchors rather than anchors. |
477
|
|
|
|
|
|
|
However if an anchor already exists for a Significant Element, this |
478
|
|
|
|
|
|
|
won't make an id for that particular element. |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=item useorg |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
useorg => 1 |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
Use pre-existing backup files as the input source; that is, files of the |
485
|
|
|
|
|
|
|
form I.I (see B and B). |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=back |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=cut |
490
|
|
|
|
|
|
|
sub generate_toc ($%) { |
491
|
27
|
|
|
27
|
1
|
42718
|
my $self = shift; |
492
|
27
|
|
|
|
|
564
|
my %args = ( |
493
|
|
|
|
|
|
|
make_anchors=>1, |
494
|
|
|
|
|
|
|
make_toc=>1, |
495
|
|
|
|
|
|
|
input=>undef, |
496
|
|
|
|
|
|
|
filenames=>undef, |
497
|
|
|
|
|
|
|
bak=>$self->{bak}, |
498
|
|
|
|
|
|
|
debug=>$self->{debug}, |
499
|
|
|
|
|
|
|
useorg=>$self->{useorg}, |
500
|
|
|
|
|
|
|
use_id=>$self->{use_id}, |
501
|
|
|
|
|
|
|
notoc_match=>$self->{notoc_match}, |
502
|
|
|
|
|
|
|
toc_entry=>$self->{toc_entry}, |
503
|
|
|
|
|
|
|
toc_end=>$self->{toc_end}, |
504
|
|
|
|
|
|
|
overwrite=>$self->{overwrite}, |
505
|
|
|
|
|
|
|
ol=>$self->{ol}, |
506
|
|
|
|
|
|
|
ol_num_levels=>$self->{ol_num_levels}, |
507
|
|
|
|
|
|
|
entrysep=>$self->{entrysep}, |
508
|
|
|
|
|
|
|
ignore_only_one=>$self->{ignore_only_one}, |
509
|
|
|
|
|
|
|
@_ |
510
|
|
|
|
|
|
|
); |
511
|
|
|
|
|
|
|
|
512
|
27
|
50
|
|
|
|
125
|
if ($args{debug}) |
513
|
|
|
|
|
|
|
{ |
514
|
0
|
|
|
|
|
0
|
print STDERR Dumper(\%args); |
515
|
|
|
|
|
|
|
} |
516
|
27
|
50
|
|
|
|
92
|
if (!$args{input}) |
517
|
|
|
|
|
|
|
{ |
518
|
0
|
|
|
|
|
0
|
warn "generate_toc: no input given\n"; |
519
|
0
|
|
|
|
|
0
|
return ''; |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
# |
522
|
|
|
|
|
|
|
# get the input |
523
|
|
|
|
|
|
|
# |
524
|
27
|
|
|
|
|
62
|
my @filenames = (); |
525
|
27
|
|
|
|
|
47
|
my @input = (); |
526
|
27
|
100
|
|
|
|
89
|
if (ref $args{input} eq "ARRAY") |
527
|
|
|
|
|
|
|
{ |
528
|
19
|
|
|
|
|
26
|
@filenames = @{$args{input}}; |
|
19
|
|
|
|
|
43
|
|
529
|
19
|
|
|
|
|
28
|
my $i = 0; |
530
|
19
|
|
|
|
|
29
|
my $fh_needs_closing = 0; |
531
|
19
|
|
|
|
|
45
|
foreach my $fn (@filenames) |
532
|
|
|
|
|
|
|
{ |
533
|
19
|
|
|
|
|
27
|
my $infn = $fn; |
534
|
19
|
|
|
|
|
57
|
my $bakfile = $fn . "." . $args{bak}; |
535
|
19
|
0
|
33
|
|
|
71
|
if ($args{useorg} |
|
|
|
33
|
|
|
|
|
536
|
|
|
|
|
|
|
&& $args{bak} |
537
|
|
|
|
|
|
|
&& -e $bakfile) |
538
|
|
|
|
|
|
|
{ |
539
|
|
|
|
|
|
|
# use the old backup files as source |
540
|
0
|
|
|
|
|
0
|
$infn = $bakfile; |
541
|
|
|
|
|
|
|
} |
542
|
19
|
|
|
|
|
29
|
my $fh = undef; |
543
|
|
|
|
|
|
|
# using '-' means STDIN |
544
|
19
|
50
|
|
|
|
41
|
if ($infn eq '-') |
545
|
|
|
|
|
|
|
{ |
546
|
0
|
|
|
|
|
0
|
$fh = *STDIN; |
547
|
0
|
|
|
|
|
0
|
$fh_needs_closing = 0; |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
else |
550
|
|
|
|
|
|
|
{ |
551
|
19
|
50
|
|
|
|
6198
|
open ($fh, $infn) || |
552
|
|
|
|
|
|
|
die "Error: unable to open ", $infn, ": $!\n"; |
553
|
19
|
|
|
|
|
51
|
$fh_needs_closing = 1; |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
19
|
|
|
|
|
33
|
my $content = ''; |
557
|
|
|
|
|
|
|
{ |
558
|
19
|
|
|
|
|
37
|
local $/; # slurp entire file |
|
19
|
|
|
|
|
78
|
|
559
|
19
|
|
|
|
|
553
|
$content = <$fh>; |
560
|
19
|
50
|
|
|
|
245
|
close ($fh) if ($fh_needs_closing); |
561
|
|
|
|
|
|
|
} |
562
|
19
|
|
|
|
|
42
|
$input[$i] = $content; |
563
|
|
|
|
|
|
|
|
564
|
19
|
|
|
|
|
94
|
$i++; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
else |
568
|
|
|
|
|
|
|
{ |
569
|
8
|
|
|
|
|
14
|
$filenames[0] = ''; |
570
|
8
|
|
|
|
|
16
|
$input[0] = $args{input}; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
# overwrite the filenames array if a replacement |
573
|
|
|
|
|
|
|
# was passed in and has the same length |
574
|
27
|
50
|
66
|
|
|
127
|
if (defined $args{filenames} |
|
6
|
|
66
|
|
|
27
|
|
575
|
6
|
|
|
|
|
41
|
&& @{$args{filenames}} |
576
|
|
|
|
|
|
|
&& $#{$args{filenames}} == $#{filenames} |
577
|
|
|
|
|
|
|
) |
578
|
|
|
|
|
|
|
{ |
579
|
6
|
|
|
|
|
8
|
@filenames = @{$args{filenames}}; |
|
6
|
|
|
|
|
14
|
|
580
|
|
|
|
|
|
|
} |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
# |
583
|
|
|
|
|
|
|
# make the anchors |
584
|
|
|
|
|
|
|
# |
585
|
27
|
100
|
|
|
|
89
|
if ($args{make_anchors}) |
586
|
|
|
|
|
|
|
{ |
587
|
12
|
|
|
|
|
22
|
my $i = 0; |
588
|
12
|
|
|
|
|
27
|
foreach my $fn (@filenames) |
589
|
|
|
|
|
|
|
{ |
590
|
12
|
|
|
|
|
20
|
my $html_str = $input[$i]; |
591
|
12
|
|
|
|
|
103
|
$input[$i] = $self->make_anchors(%args, |
592
|
|
|
|
|
|
|
filename=>$fn, |
593
|
|
|
|
|
|
|
input=>$html_str); |
594
|
12
|
|
|
|
|
59
|
$i++; |
595
|
|
|
|
|
|
|
} |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
# |
599
|
|
|
|
|
|
|
# make the ToC |
600
|
|
|
|
|
|
|
# |
601
|
27
|
|
|
|
|
53
|
my $toc_str = ''; |
602
|
27
|
100
|
|
|
|
86
|
if ($args{make_toc}) |
603
|
|
|
|
|
|
|
{ |
604
|
17
|
|
|
|
|
38
|
my %labels = (); |
605
|
17
|
|
|
|
|
29
|
my @list_of_lists = (); |
606
|
17
|
|
|
|
|
28
|
my $i = 0; |
607
|
17
|
|
|
|
|
58
|
for (my $i = 0; $i < @filenames; $i++) |
608
|
|
|
|
|
|
|
{ |
609
|
17
|
|
|
|
|
149
|
my @the_list = $self->make_toc_list(%args, |
610
|
|
|
|
|
|
|
first_file=>$filenames[0], |
611
|
|
|
|
|
|
|
labels=>\%labels, |
612
|
|
|
|
|
|
|
filename=>$filenames[$i], |
613
|
|
|
|
|
|
|
input=>$input[$i]); |
614
|
17
|
100
|
66
|
|
|
119
|
if (!($args{ignore_only_one} |
615
|
|
|
|
|
|
|
and @the_list <= 1)) |
616
|
|
|
|
|
|
|
{ |
617
|
16
|
|
|
|
|
92
|
push @list_of_lists, @the_list; |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
} |
620
|
17
|
100
|
|
|
|
55
|
if (@list_of_lists > 0) |
621
|
|
|
|
|
|
|
{ |
622
|
|
|
|
|
|
|
# |
623
|
|
|
|
|
|
|
# create the appropriate format |
624
|
|
|
|
|
|
|
# |
625
|
16
|
|
|
|
|
84
|
my %formats = (); |
626
|
|
|
|
|
|
|
# check for non-list entries, flagged by negative levels |
627
|
16
|
|
|
|
|
29
|
while (my ($key, $val) = each %{$args{toc_entry}}) |
|
51
|
|
|
|
|
176
|
|
628
|
|
|
|
|
|
|
{ |
629
|
35
|
100
|
|
|
|
88
|
if ($val < 0) |
630
|
|
|
|
|
|
|
{ |
631
|
1
|
|
|
|
|
4
|
$formats{abs($val) - 1} = {}; |
632
|
1
|
|
|
|
|
4
|
$formats{abs($val) - 1}->{tree_head} = ' |
633
|
1
|
|
|
|
|
2
|
$formats{abs($val) - 1}->{tree_foot} = "\n\n"; |
634
|
1
|
|
|
|
|
4
|
$formats{abs($val) - 1}->{item_sep} = $args{entrysep}; |
635
|
1
|
|
|
|
|
2
|
$formats{abs($val) - 1}->{pre_item} = ''; |
636
|
1
|
|
|
|
|
3
|
$formats{abs($val) - 1}->{post_item} = ''; |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
# check for OL |
640
|
16
|
100
|
|
|
|
51
|
if ($args{ol}) |
641
|
|
|
|
|
|
|
{ |
642
|
4
|
|
|
|
|
12
|
$formats{0} = {}; |
643
|
4
|
|
|
|
|
14
|
$formats{0}->{tree_head} = ''; |
644
|
4
|
|
|
|
|
11
|
$formats{0}->{tree_foot} = "\n"; |
645
|
4
|
100
|
|
|
|
11
|
if ($args{ol_num_levels} > 0) |
646
|
|
|
|
|
|
|
{ |
647
|
3
|
|
|
|
|
9
|
$formats{$args{ol_num_levels}} = {}; |
648
|
3
|
|
|
|
|
10
|
$formats{$args{ol_num_levels}}->{tree_head} = ' |
649
|
3
|
|
|
|
|
9
|
$formats{$args{ol_num_levels}}->{tree_foot} = "\n"; |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
} |
652
|
16
|
|
|
|
|
160
|
$toc_str = HTML::LinkList::link_tree( |
653
|
|
|
|
|
|
|
%args, |
654
|
|
|
|
|
|
|
link_tree=>\@list_of_lists, |
655
|
|
|
|
|
|
|
labels=>\%labels, |
656
|
|
|
|
|
|
|
formats=>\%formats, |
657
|
|
|
|
|
|
|
); |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
# |
662
|
|
|
|
|
|
|
# put the output |
663
|
|
|
|
|
|
|
# |
664
|
27
|
|
|
|
|
18523
|
my $ret = $self->output_toc( |
665
|
|
|
|
|
|
|
%args, |
666
|
|
|
|
|
|
|
toc=>$toc_str, |
667
|
|
|
|
|
|
|
input=>\@input, |
668
|
|
|
|
|
|
|
filenames=>\@filenames, |
669
|
|
|
|
|
|
|
); |
670
|
|
|
|
|
|
|
|
671
|
27
|
|
|
|
|
267
|
return $ret; |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
} # generate_toc |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
=head1 INTERNAL METHODS |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
These methods are documented for developer purposes and aren't intended |
678
|
|
|
|
|
|
|
to be used externally. |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
=head2 make_anchor_name |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
$toc->make_anchor_name(content=>$content, |
683
|
|
|
|
|
|
|
anchors=>\%anchors); |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
Makes the anchor-name for one anchor. |
686
|
|
|
|
|
|
|
Bases the anchor on the content of the significant element. |
687
|
|
|
|
|
|
|
Ensures that anchors are unique. |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=cut |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
sub make_anchor_name ($%) { |
692
|
44
|
|
|
44
|
1
|
57
|
my $self = shift; |
693
|
44
|
|
|
|
|
190
|
my %args = ( |
694
|
|
|
|
|
|
|
content=>'', # will be overwritten by one of @_ |
695
|
|
|
|
|
|
|
anchors=>undef, |
696
|
|
|
|
|
|
|
@_ |
697
|
|
|
|
|
|
|
); |
698
|
44
|
|
|
|
|
78
|
my $name = $args{content}; # the anchor name will most often be very close to the token content |
699
|
|
|
|
|
|
|
|
700
|
44
|
50
|
|
|
|
174
|
if ($name !~ /^\s*$/) { |
701
|
|
|
|
|
|
|
# generate a SEO-friendly anchor right from the token content |
702
|
|
|
|
|
|
|
# The allowed character set is limited first by the URI specification |
703
|
|
|
|
|
|
|
# for fragments, http://tools.ietf.org/html/rfc3986#section-2: |
704
|
|
|
|
|
|
|
# characters then by the limitations of the values of 'id' and 'name' |
705
|
|
|
|
|
|
|
# attributes: http://www.w3.org/TR/REC-html40/types.html#type-name |
706
|
|
|
|
|
|
|
# Eventually, the only punctuation allowed in id values is [_.:-] |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
# we need to replace [#&;] only when they are NOT part of an HTML |
709
|
|
|
|
|
|
|
# entity. decode_entities saves us from crafting a nasty regexp |
710
|
44
|
|
|
|
|
228
|
decode_entities($name); |
711
|
|
|
|
|
|
|
# MediaWiki also uses the period, see |
712
|
|
|
|
|
|
|
# http://en.wikipedia.org/wiki/Hierarchies#Ethics.2C_behavioral_psychology.2C_philosophies_of_identity |
713
|
44
|
|
|
|
|
108
|
$name =~ s/([^\s\w_.:-])/'.'.sprintf('%02X', ord($1))/eg; |
|
17
|
|
|
|
|
91
|
|
714
|
|
|
|
|
|
|
|
715
|
44
|
|
|
|
|
156
|
$name =~ s/\s+/_/g; |
716
|
|
|
|
|
|
|
# "ID and NAME tokens must begin with a letter ([A-Za-z])" |
717
|
44
|
|
|
|
|
110
|
$name =~ s/^[^a-zA-Z]+//; |
718
|
|
|
|
|
|
|
} |
719
|
|
|
|
|
|
|
else |
720
|
|
|
|
|
|
|
{ |
721
|
0
|
|
|
|
|
0
|
$name = 'id'; |
722
|
|
|
|
|
|
|
} |
723
|
44
|
50
|
|
|
|
96
|
$name = 'id' if $name eq ''; |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
# check if it already exists; if so, add a number |
726
|
44
|
|
|
|
|
55
|
my $anch_num = 1; |
727
|
44
|
|
|
|
|
51
|
my $word_name = $name; |
728
|
44
|
|
|
|
|
75
|
my $name_key = lc $name; |
729
|
|
|
|
|
|
|
# Reference: http://www.w3.org/TR/REC-html40/struct/links.html#h-12.2.1 |
730
|
|
|
|
|
|
|
# Anchor names must be unique within a document. Anchor names that differ |
731
|
|
|
|
|
|
|
# only in case may not appear in the same document. |
732
|
44
|
|
66
|
|
|
168
|
while (defined $args{anchors}->{$name_key} |
733
|
|
|
|
|
|
|
&& $args{anchors}->{$name_key}) |
734
|
|
|
|
|
|
|
{ |
735
|
10
|
|
|
|
|
23
|
$name = $word_name . "_$anch_num"; |
736
|
10
|
|
|
|
|
15
|
$name_key = lc $name; |
737
|
10
|
|
|
|
|
40
|
$anch_num++; |
738
|
|
|
|
|
|
|
} |
739
|
|
|
|
|
|
|
|
740
|
44
|
|
|
|
|
149
|
return $name; |
741
|
|
|
|
|
|
|
} # make_anchor_name |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
=head2 make_anchors |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
my $new_html = $toc->make_anchors(input=>$html, |
746
|
|
|
|
|
|
|
notoc_match=>$notoc_match, |
747
|
|
|
|
|
|
|
use_id=>$use_id, |
748
|
|
|
|
|
|
|
toc_entry=>\%toc_entries, |
749
|
|
|
|
|
|
|
toc_end=>\%toc_ends, |
750
|
|
|
|
|
|
|
); |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Makes the anchors the given input string. |
753
|
|
|
|
|
|
|
Returns a string. |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
=cut |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
sub make_anchors ($%) { |
758
|
12
|
|
|
12
|
1
|
19
|
my $self = shift; |
759
|
12
|
|
|
|
|
183
|
my %args = ( |
760
|
|
|
|
|
|
|
input=>'', |
761
|
|
|
|
|
|
|
notoc_match=>$self->{notoc_match}, |
762
|
|
|
|
|
|
|
use_id=>$self->{use_id}, |
763
|
|
|
|
|
|
|
toc_entry=>$self->{toc_entry}, |
764
|
|
|
|
|
|
|
toc_end=>$self->{toc_end}, |
765
|
|
|
|
|
|
|
debug=>$self->{debug}, |
766
|
|
|
|
|
|
|
quiet=>$self->{quiet}, |
767
|
|
|
|
|
|
|
@_ |
768
|
|
|
|
|
|
|
); |
769
|
12
|
|
|
|
|
26
|
my $html_str = $args{input}; |
770
|
|
|
|
|
|
|
|
771
|
12
|
50
|
66
|
|
|
67
|
print STDERR "Making anchors for ", $args{filename}, "...\n" |
772
|
|
|
|
|
|
|
if (!$args{quiet} && $args{filename}); |
773
|
|
|
|
|
|
|
|
774
|
12
|
|
|
|
|
27
|
my @newhtml = (); |
775
|
12
|
|
|
|
|
17
|
my %anchors = (); |
776
|
|
|
|
|
|
|
# Note that the keys to the anchors hash should be lower-cased |
777
|
|
|
|
|
|
|
# since anchor names that differ only in case are not allowed. |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
# parse the HTML |
780
|
12
|
|
|
|
|
120
|
my $hp = new HTML::SimpleParse(); |
781
|
12
|
|
|
|
|
181
|
$hp->text($html_str); |
782
|
12
|
|
|
|
|
113
|
$hp->parse(); |
783
|
|
|
|
|
|
|
|
784
|
12
|
|
|
|
|
10428
|
my $tag; |
785
|
|
|
|
|
|
|
my $endtag; |
786
|
12
|
|
|
|
|
21
|
my $level = 0; |
787
|
12
|
|
|
|
|
20
|
my $tmp; |
788
|
12
|
|
|
|
|
21
|
my $adone = 0; |
789
|
12
|
|
|
|
|
22
|
my $name = ''; |
790
|
|
|
|
|
|
|
# go through the HTML |
791
|
12
|
|
|
|
|
20
|
my $tok; |
792
|
|
|
|
|
|
|
my $next_tok; |
793
|
0
|
|
|
|
|
0
|
my $i; |
794
|
12
|
|
|
|
|
23
|
my $notoc = $args{notoc_match}; |
795
|
12
|
|
|
|
|
77
|
my @tree = $hp->tree(); |
796
|
12
|
|
|
|
|
321
|
while (@tree) { |
797
|
1012
|
|
|
|
|
1276
|
$tok = shift @tree; |
798
|
1012
|
|
|
|
|
1187
|
$next_tok = $tree[0]; |
799
|
1012
|
100
|
|
|
|
19738
|
if ($tok->{type} ne 'starttag') |
800
|
|
|
|
|
|
|
{ |
801
|
698
|
|
|
|
|
1657
|
push @newhtml, $hp->execute($tok); |
802
|
698
|
|
|
|
|
7160
|
next; |
803
|
|
|
|
|
|
|
} |
804
|
|
|
|
|
|
|
# assert: we have a start tag |
805
|
314
|
|
|
|
|
381
|
$level = 0; |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
# check if tag included in TOC (significant element) |
808
|
314
|
|
|
|
|
324
|
foreach my $key (keys %{$args{toc_entry}}) { |
|
314
|
|
|
|
|
833
|
|
809
|
613
|
100
|
66
|
|
|
6435
|
if ($tok->{content} =~ /^$key/i |
|
|
|
100
|
|
|
|
|
810
|
|
|
|
|
|
|
&& (!$notoc |
811
|
|
|
|
|
|
|
|| $tok->{content} !~ /$notoc/)) { |
812
|
48
|
|
|
|
|
61
|
$tag = $key; |
813
|
|
|
|
|
|
|
# level of significant element |
814
|
48
|
|
|
|
|
88
|
$level = abs($args{toc_entry}->{$key}); |
815
|
|
|
|
|
|
|
# End tag of significant element |
816
|
48
|
|
|
|
|
92
|
$endtag = $args{toc_end}->{$key}; |
817
|
48
|
|
|
|
|
82
|
last; |
818
|
|
|
|
|
|
|
} |
819
|
|
|
|
|
|
|
} |
820
|
|
|
|
|
|
|
# if $level is not set, we didn't find a Significant tag |
821
|
314
|
100
|
|
|
|
8978
|
if (!$level) { |
822
|
266
|
|
|
|
|
739
|
push @newhtml, $hp->execute($tok); |
823
|
266
|
|
|
|
|
2858
|
next; |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
# assert: current tag is a Significant tag |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
# |
828
|
|
|
|
|
|
|
# Add A element or ID to document |
829
|
|
|
|
|
|
|
# |
830
|
48
|
|
|
|
|
59
|
my $name_in_anchor = 0; |
831
|
48
|
|
|
|
|
54
|
$adone = 0; |
832
|
48
|
|
|
|
|
59
|
$name = ''; |
833
|
48
|
|
|
|
|
60
|
my $sig_tok = $tok; |
834
|
48
|
50
|
|
|
|
123
|
if ($tag =~ /title/i) { # TITLE tag is a special case |
835
|
0
|
|
|
|
|
0
|
$adone = 1; |
836
|
|
|
|
|
|
|
} |
837
|
48
|
100
|
|
|
|
88
|
if ($args{use_id}) |
838
|
|
|
|
|
|
|
{ |
839
|
|
|
|
|
|
|
# is there an existing ID? |
840
|
9
|
100
|
|
|
|
49
|
if ($sig_tok->{content} =~ /ID\s*=\s*(['"])/i) { |
841
|
1
|
|
|
|
|
7
|
my $q = $1; |
842
|
1
|
|
|
|
|
33
|
($name) = $sig_tok->{content} =~ m/ID\s*=\s*$q([^$q]*)$q/i; |
843
|
1
|
50
|
|
|
|
7
|
if ($name) |
844
|
|
|
|
|
|
|
{ |
845
|
1
|
|
|
|
|
5
|
$anchors{lc $name} = $name; |
846
|
1
|
|
|
|
|
6
|
push @newhtml, $hp->execute($sig_tok); |
847
|
1
|
|
|
|
|
13
|
$adone = 1; |
848
|
|
|
|
|
|
|
} |
849
|
|
|
|
|
|
|
else # if the ID has no name, remove it! |
850
|
|
|
|
|
|
|
{ |
851
|
0
|
|
|
|
|
0
|
$sig_tok->{content} =~ s/ID\s*=\s*$q$q//i; |
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
} |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
else # not adding ID, move right along |
856
|
|
|
|
|
|
|
{ |
857
|
39
|
|
|
|
|
131
|
push @newhtml, $hp->execute($tok); |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
# Find the "name" of the significant element |
860
|
|
|
|
|
|
|
# Don't consume the tree, because ID behaves differently from A |
861
|
48
|
|
|
|
|
412
|
my $i = 0; |
862
|
48
|
|
66
|
|
|
527
|
while (!$name && $i < @tree) |
863
|
|
|
|
|
|
|
{ |
864
|
47
|
|
|
|
|
76
|
$tok = $tree[$i]; |
865
|
47
|
|
|
|
|
109
|
$next_tok = $tree[$i + 1]; |
866
|
47
|
100
|
33
|
|
|
226
|
if ($tok->{type} eq 'text') { |
|
|
50
|
33
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
867
|
44
|
|
|
|
|
153
|
$name = $self->make_anchor_name(content=>$tok->{content}, |
868
|
|
|
|
|
|
|
anchors=>\%anchors); |
869
|
|
|
|
|
|
|
# Anchor |
870
|
|
|
|
|
|
|
} elsif (!$adone && $tok->{type} eq 'starttag' |
871
|
|
|
|
|
|
|
&& $tok->{content} =~ /^A/i) |
872
|
|
|
|
|
|
|
{ |
873
|
3
|
50
|
|
|
|
24
|
if ($tok->{content} =~ /NAME\s*=\s*(['"])/i) { |
|
|
0
|
|
|
|
|
|
874
|
3
|
|
|
|
|
8
|
my $q = $1; |
875
|
3
|
|
|
|
|
43
|
($name) = $tok->{content} =~ m/NAME\s*=\s*$q([^$q]*)$q/i; |
876
|
3
|
|
|
|
|
6
|
$name_in_anchor = 1; |
877
|
|
|
|
|
|
|
} elsif ($next_tok->{type} eq 'text') { |
878
|
0
|
|
|
|
|
0
|
$name = $self->make_anchor_name(content=>$next_tok->{content}, |
879
|
|
|
|
|
|
|
anchors=>\%anchors); |
880
|
|
|
|
|
|
|
} |
881
|
|
|
|
|
|
|
} elsif ($tok->{type} eq 'starttag' |
882
|
|
|
|
|
|
|
|| $tok->{type} eq 'endtag') |
883
|
|
|
|
|
|
|
{ # Tag |
884
|
0
|
0
|
|
|
|
0
|
last if $tok->{content} =~ m|$endtag|i; |
885
|
|
|
|
|
|
|
} |
886
|
47
|
|
|
|
|
128
|
$i++; |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
# assert: there is a name, or there is no name to be found |
889
|
48
|
50
|
|
|
|
95
|
if (!$name) |
890
|
|
|
|
|
|
|
{ |
891
|
|
|
|
|
|
|
# make up a name |
892
|
0
|
|
|
|
|
0
|
$name = $self->make_anchor_name(content=>"TOC", |
893
|
|
|
|
|
|
|
anchors=>\%anchors); |
894
|
|
|
|
|
|
|
} |
895
|
48
|
100
|
100
|
|
|
221
|
if (!$adone && $args{use_id}) |
896
|
|
|
|
|
|
|
{ |
897
|
8
|
50
|
|
|
|
17
|
if (!$name_in_anchor) |
898
|
|
|
|
|
|
|
{ |
899
|
8
|
|
|
|
|
23
|
$anchors{lc $name} = $name; |
900
|
|
|
|
|
|
|
# add the ID |
901
|
8
|
|
|
|
|
22
|
$sig_tok->{content} .= " id='$name'"; |
902
|
8
|
|
|
|
|
26
|
push @newhtml, $hp->execute($sig_tok); |
903
|
8
|
|
|
|
|
79
|
$adone = 1; |
904
|
|
|
|
|
|
|
} |
905
|
|
|
|
|
|
|
else |
906
|
|
|
|
|
|
|
{ |
907
|
|
|
|
|
|
|
# we have an already-named anchor, so don't add an ID |
908
|
0
|
|
|
|
|
0
|
push @newhtml, $hp->execute($sig_tok); |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
} |
911
|
|
|
|
|
|
|
|
912
|
48
|
|
|
|
|
106
|
while (@tree) { |
913
|
102
|
|
|
|
|
231
|
$tok = shift @tree; |
914
|
102
|
|
|
|
|
130
|
$next_tok = $tree[0]; |
915
|
|
|
|
|
|
|
# Text |
916
|
102
|
100
|
66
|
|
|
519
|
if ($tok->{type} eq 'text') { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
917
|
48
|
100
|
66
|
|
|
240
|
if (!$adone && $tok->{content} !~ /^\s*$/) { |
918
|
36
|
|
|
|
|
94
|
$anchors{lc $name} = $name; |
919
|
|
|
|
|
|
|
# replace the text with an anchor containing the text |
920
|
36
|
|
|
|
|
96
|
push(@newhtml, qq|$tok->{content}|); |
921
|
36
|
|
|
|
|
83
|
$adone = 1; |
922
|
|
|
|
|
|
|
} else { |
923
|
12
|
|
|
|
|
35
|
push @newhtml, $hp->execute($tok); |
924
|
|
|
|
|
|
|
} |
925
|
|
|
|
|
|
|
# Anchor |
926
|
|
|
|
|
|
|
} elsif (!$adone && $tok->{type} eq 'starttag' |
927
|
|
|
|
|
|
|
&& $tok->{content} =~ /^A/i) |
928
|
|
|
|
|
|
|
{ |
929
|
|
|
|
|
|
|
# is there an existing NAME anchor? |
930
|
3
|
50
|
|
|
|
6
|
if ($name_in_anchor) { |
931
|
3
|
|
|
|
|
14
|
$anchors{lc $name} = $name; |
932
|
3
|
|
|
|
|
9
|
push @newhtml, $hp->execute($tok); |
933
|
|
|
|
|
|
|
} else { |
934
|
|
|
|
|
|
|
# add the current name anchor |
935
|
0
|
|
|
|
|
0
|
$tmp = $hp->execute($tok); |
936
|
0
|
|
|
|
|
0
|
$tmp =~ s/^(
|
937
|
0
|
|
|
|
|
0
|
push @newhtml, $tmp; |
938
|
0
|
|
|
|
|
0
|
$anchors{lc $name} = $name; |
939
|
|
|
|
|
|
|
} |
940
|
3
|
|
|
|
|
32
|
$adone = 1; |
941
|
|
|
|
|
|
|
} elsif ($tok->{type} eq 'starttag' |
942
|
|
|
|
|
|
|
|| $tok->{type} eq 'endtag') |
943
|
|
|
|
|
|
|
{ # Tag |
944
|
51
|
|
|
|
|
150
|
push @newhtml, $hp->execute($tok); |
945
|
51
|
100
|
|
|
|
873
|
last if $tok->{content} =~ m|$endtag|i; |
946
|
|
|
|
|
|
|
} |
947
|
|
|
|
|
|
|
else { |
948
|
0
|
|
|
|
|
0
|
push @newhtml, $hp->execute($tok); |
949
|
|
|
|
|
|
|
} |
950
|
|
|
|
|
|
|
} |
951
|
|
|
|
|
|
|
} |
952
|
12
|
|
|
|
|
179
|
my $out = join('', @newhtml); |
953
|
|
|
|
|
|
|
|
954
|
12
|
|
|
|
|
720
|
return $out; |
955
|
|
|
|
|
|
|
} # make_anchors |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
=head2 make_toc_list |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
my @toc_list = $toc->make_toc_list(input=>$html, |
960
|
|
|
|
|
|
|
labels=>\%labels, |
961
|
|
|
|
|
|
|
notoc_match=>$notoc_match, |
962
|
|
|
|
|
|
|
toc_entry=>\%toc_entry, |
963
|
|
|
|
|
|
|
toc_end=>\%toc_end, |
964
|
|
|
|
|
|
|
filename=>$filename); |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
Makes a list of lists which represents the structure and content |
967
|
|
|
|
|
|
|
of (a portion of) the ToC from one file. |
968
|
|
|
|
|
|
|
Also updates a list of labels for the ToC entries. |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
=cut |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
sub make_toc_list ($%) { |
973
|
17
|
|
|
17
|
1
|
32
|
my $self = shift; |
974
|
17
|
|
|
|
|
322
|
my %args = ( |
975
|
|
|
|
|
|
|
input=>'', |
976
|
|
|
|
|
|
|
filename=>'', |
977
|
|
|
|
|
|
|
labels=>undef, |
978
|
|
|
|
|
|
|
notoc_match=>$self->{notoc_match}, |
979
|
|
|
|
|
|
|
toc_entry=>$self->{toc_entry}, |
980
|
|
|
|
|
|
|
toc_end=>$self->{toc_end}, |
981
|
|
|
|
|
|
|
inline=>$self->{inline}, |
982
|
|
|
|
|
|
|
debug=>$self->{debug}, |
983
|
|
|
|
|
|
|
toc_before=>$self->{toc_before}, |
984
|
|
|
|
|
|
|
toc_after=>$self->{toc_after}, |
985
|
|
|
|
|
|
|
textonly=>$self->{textonly}, |
986
|
|
|
|
|
|
|
ignore_sole_first=>$self->{ignore_sole_first}, |
987
|
|
|
|
|
|
|
ignore_only_one=>$self->{ignore_only_one}, |
988
|
|
|
|
|
|
|
@_ |
989
|
|
|
|
|
|
|
); |
990
|
17
|
|
|
|
|
35
|
my $html_str = $args{input}; |
991
|
17
|
|
|
|
|
28
|
my $infile = $args{filename}; |
992
|
17
|
|
|
|
|
24
|
my $labels = $args{labels}; |
993
|
|
|
|
|
|
|
|
994
|
17
|
|
|
|
|
29
|
my $toc_str = ""; |
995
|
17
|
|
|
|
|
33
|
my @toc = (); |
996
|
17
|
|
|
|
|
28
|
my @list_of_paths = (); |
997
|
17
|
|
|
|
|
28
|
my %level_count = (); |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
# parse the HTML |
1000
|
17
|
|
|
|
|
133
|
my $hp = new HTML::SimpleParse(); |
1001
|
17
|
|
|
|
|
224
|
$hp->text($html_str); |
1002
|
17
|
|
|
|
|
132
|
$hp->parse(); |
1003
|
|
|
|
|
|
|
|
1004
|
17
|
|
|
|
|
21029
|
my $noli; |
1005
|
|
|
|
|
|
|
my $prevnoli; |
1006
|
17
|
|
|
|
|
39
|
my $before = ""; |
1007
|
17
|
|
|
|
|
29
|
my $after = ""; |
1008
|
17
|
|
|
|
|
35
|
my $tag; |
1009
|
|
|
|
|
|
|
my $endtag; |
1010
|
17
|
|
|
|
|
129
|
my $level = 0; |
1011
|
17
|
|
|
|
|
27
|
my $levelopen; |
1012
|
|
|
|
|
|
|
my $tmp; |
1013
|
0
|
|
|
|
|
0
|
my $content; |
1014
|
17
|
|
|
|
|
39
|
my $adone = 0; |
1015
|
17
|
|
|
|
|
34
|
my $name = "NOTOC"; # if no anchor is found... |
1016
|
17
|
|
|
|
|
22
|
my $is_title; |
1017
|
17
|
|
|
|
|
23
|
my $found_title = 0; |
1018
|
17
|
|
|
|
|
33
|
my $notoc = $args{notoc_match}; |
1019
|
|
|
|
|
|
|
# go through the HTML |
1020
|
17
|
|
|
|
|
19
|
my $tok; |
1021
|
17
|
|
|
|
|
73
|
my @tree = $hp->tree(); |
1022
|
17
|
|
|
|
|
573
|
while (@tree) { |
1023
|
1600
|
|
|
|
|
2087
|
$tok = shift @tree; |
1024
|
1600
|
|
|
|
|
1816
|
$level = 0; |
1025
|
1600
|
|
|
|
|
1584
|
$is_title = 0; |
1026
|
1600
|
|
|
|
|
1799
|
$tag = ''; |
1027
|
1600
|
100
|
|
|
|
3479
|
if ($tok->{type} eq 'starttag') |
1028
|
|
|
|
|
|
|
{ |
1029
|
|
|
|
|
|
|
# check if tag included in TOC |
1030
|
548
|
|
|
|
|
572
|
foreach my $key (keys %{$args{toc_entry}}) { |
|
548
|
|
|
|
|
1396
|
|
1031
|
1185
|
100
|
66
|
|
|
11489
|
if ($tok->{content} =~ /^$key/i |
|
|
|
100
|
|
|
|
|
1032
|
|
|
|
|
|
|
&& (!$notoc |
1033
|
|
|
|
|
|
|
|| $tok->{content} !~ /$notoc/)) { |
1034
|
69
|
|
|
|
|
98
|
$tag = $key; |
1035
|
69
|
50
|
|
|
|
250
|
if ($args{debug}) { |
1036
|
0
|
|
|
|
|
0
|
print STDERR "============\n"; |
1037
|
0
|
|
|
|
|
0
|
print STDERR "key = $key "; |
1038
|
0
|
|
|
|
|
0
|
print STDERR "tok->content = '", $tok->{content}, "' "; |
1039
|
0
|
|
|
|
|
0
|
print STDERR "tag = $tag"; |
1040
|
0
|
|
|
|
|
0
|
print STDERR "\n============\n"; |
1041
|
|
|
|
|
|
|
} |
1042
|
|
|
|
|
|
|
# level of significant element |
1043
|
69
|
|
|
|
|
115
|
$level = abs($args{toc_entry}->{$key}); |
1044
|
|
|
|
|
|
|
# no used in ToC listing |
1045
|
69
|
|
|
|
|
117
|
$noli = $args{toc_entry}->{$key} < 0; |
1046
|
|
|
|
|
|
|
# End tag of significant element |
1047
|
69
|
|
|
|
|
147
|
$endtag = $args{toc_end}->{$key}; |
1048
|
69
|
50
|
|
|
|
252
|
if (defined $args{toc_before}->{$key}) { |
1049
|
0
|
|
|
|
|
0
|
$before = $args{toc_before}->{$key}; |
1050
|
|
|
|
|
|
|
} else { |
1051
|
69
|
|
|
|
|
99
|
$before = ""; |
1052
|
|
|
|
|
|
|
} |
1053
|
69
|
50
|
|
|
|
153
|
if (defined $args{toc_after}->{$key}) { |
1054
|
0
|
|
|
|
|
0
|
$after = $args{toc_after}->{$key}; |
1055
|
|
|
|
|
|
|
} else { |
1056
|
69
|
|
|
|
|
200
|
$after = ""; |
1057
|
|
|
|
|
|
|
} |
1058
|
|
|
|
|
|
|
} |
1059
|
|
|
|
|
|
|
} |
1060
|
|
|
|
|
|
|
} |
1061
|
1600
|
100
|
|
|
|
3080
|
if (!$level) { |
1062
|
1531
|
|
|
|
|
2960
|
next; |
1063
|
|
|
|
|
|
|
} |
1064
|
69
|
50
|
|
|
|
149
|
if ($args{debug}) { |
1065
|
0
|
|
|
|
|
0
|
print STDERR "Chosen tag:$tag\n"; |
1066
|
|
|
|
|
|
|
} |
1067
|
|
|
|
|
|
|
# assert: we are at a Significant tag |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
# get A element or ID from document |
1070
|
|
|
|
|
|
|
# This assumes that there is one there |
1071
|
69
|
|
|
|
|
89
|
$content = ''; |
1072
|
69
|
|
|
|
|
75
|
$adone = 0; |
1073
|
69
|
50
|
|
|
|
135
|
if ($tag =~ /title/i) { # TITLE tag is a special case |
1074
|
0
|
0
|
|
|
|
0
|
if ($found_title) { |
1075
|
|
|
|
|
|
|
# don't need to find a title again, we found it |
1076
|
0
|
|
|
|
|
0
|
next; |
1077
|
|
|
|
|
|
|
} else { |
1078
|
0
|
|
|
|
|
0
|
$is_title = 1; $adone = 1; |
|
0
|
|
|
|
|
0
|
|
1079
|
0
|
|
|
|
|
0
|
$found_title = 1; |
1080
|
|
|
|
|
|
|
} |
1081
|
|
|
|
|
|
|
} |
1082
|
69
|
50
|
|
|
|
160
|
if ($args{debug}) { |
1083
|
0
|
|
|
|
|
0
|
print STDERR "is_title:$is_title\n"; |
1084
|
|
|
|
|
|
|
} |
1085
|
|
|
|
|
|
|
# check for an ID before we skip this tag |
1086
|
69
|
100
|
|
|
|
187
|
if ($tok->{content} =~ /ID\s*=\s*(['"])/i) { |
1087
|
8
|
|
|
|
|
18
|
my $q = $1; |
1088
|
8
|
|
|
|
|
95
|
($name) = $tok->{content} =~ m/ID\s*=\s*$q([^$q]*)$q/i; |
1089
|
8
|
|
|
|
|
18
|
$adone = 1; |
1090
|
|
|
|
|
|
|
} |
1091
|
69
|
|
|
|
|
152
|
while (@tree) { |
1092
|
310
|
|
|
|
|
421
|
$tok = shift @tree; |
1093
|
|
|
|
|
|
|
# Text |
1094
|
310
|
100
|
66
|
|
|
1914
|
if ($tok->{type} eq 'text') { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
1095
|
95
|
|
|
|
|
172
|
$content .= $tok->{content}; |
1096
|
95
|
50
|
|
|
|
301
|
if ($args{debug}) { |
1097
|
0
|
|
|
|
|
0
|
print STDERR "tok-content = ", $tok->{content}, "\n"; |
1098
|
0
|
|
|
|
|
0
|
print STDERR "content = $content\n"; |
1099
|
|
|
|
|
|
|
} |
1100
|
|
|
|
|
|
|
# Anchor |
1101
|
|
|
|
|
|
|
} elsif (!$adone && $tok->{type} eq 'starttag' |
1102
|
|
|
|
|
|
|
&& $tok->{content} =~ /^A/i) |
1103
|
|
|
|
|
|
|
{ |
1104
|
61
|
50
|
|
|
|
380
|
if ($tok->{content} =~ /NAME\s*=\s*(['"])/i) { |
1105
|
61
|
|
|
|
|
142
|
my $q = $1; |
1106
|
61
|
|
|
|
|
473
|
($name) = $tok->{content} =~ m/NAME\s*=\s*$q([^$q]*)$q/i; |
1107
|
61
|
|
|
|
|
193
|
$adone = 1; |
1108
|
|
|
|
|
|
|
} |
1109
|
|
|
|
|
|
|
} elsif ($tok->{type} eq 'starttag' |
1110
|
|
|
|
|
|
|
|| $tok->{type} eq 'endtag') |
1111
|
|
|
|
|
|
|
{ # Tag |
1112
|
154
|
50
|
|
|
|
376
|
if ($args{debug}) { |
1113
|
0
|
|
|
|
|
0
|
print STDERR "file = ", $infile, |
1114
|
|
|
|
|
|
|
" tag = $tag, endtag = '$endtag", |
1115
|
|
|
|
|
|
|
"' tok-type = ", $tok->{type}, |
1116
|
|
|
|
|
|
|
" tok-content = '", $tok->{content}, "'\n"; |
1117
|
|
|
|
|
|
|
} |
1118
|
154
|
100
|
|
|
|
735
|
last if $tok->{content} =~ m#$endtag#i; |
1119
|
85
|
50
|
33
|
|
|
661
|
$content .= $hp->execute($tok) |
1120
|
|
|
|
|
|
|
unless $args{textonly} |
1121
|
|
|
|
|
|
|
|| $tok->{content} =~ m#/?(hr|p|a|img)#i; |
1122
|
|
|
|
|
|
|
} |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
} |
1125
|
69
|
50
|
|
|
|
144
|
if ($args{debug}) { |
1126
|
0
|
|
|
|
|
0
|
print STDERR "Chosen content:'$content'\n"; |
1127
|
|
|
|
|
|
|
} |
1128
|
|
|
|
|
|
|
|
1129
|
69
|
50
|
|
|
|
218
|
if ($content =~ /^\s*$/) { # Check for empty content |
1130
|
0
|
|
|
|
|
0
|
warn "Warning: A $tag in $infile has no content; $tag skipped\n"; |
1131
|
0
|
|
|
|
|
0
|
next; |
1132
|
|
|
|
|
|
|
} else { |
1133
|
69
|
|
|
|
|
168
|
$content =~ s/^\s+//; # Strip beginning whitespace |
1134
|
69
|
|
|
|
|
171
|
$content =~ s/\s+$//; # Strip end whitespace |
1135
|
69
|
|
|
|
|
137
|
$content = $before . $content . $after; |
1136
|
|
|
|
|
|
|
} |
1137
|
|
|
|
|
|
|
# figure out the anchor link needed |
1138
|
69
|
|
|
|
|
84
|
my $link = ''; |
1139
|
69
|
100
|
66
|
|
|
239
|
if ($args{inline} and $args{first_file} eq $infile) |
1140
|
|
|
|
|
|
|
{ |
1141
|
19
|
50
|
|
|
|
57
|
$link = (!$is_title ? qq|#$name| : ''); |
1142
|
|
|
|
|
|
|
} |
1143
|
|
|
|
|
|
|
else |
1144
|
|
|
|
|
|
|
{ |
1145
|
50
|
50
|
|
|
|
197
|
$link .= join('', |
1146
|
|
|
|
|
|
|
qq|$infile|, |
1147
|
|
|
|
|
|
|
!$is_title ? qq|#$name| : ''); |
1148
|
|
|
|
|
|
|
} |
1149
|
|
|
|
|
|
|
# Assert: we know the info about this TOC entry |
1150
|
69
|
|
|
|
|
266
|
push @list_of_paths, { |
1151
|
|
|
|
|
|
|
level=>$level, |
1152
|
|
|
|
|
|
|
path=>$link, |
1153
|
|
|
|
|
|
|
}; |
1154
|
69
|
|
|
|
|
181
|
$labels->{$link} = $content; |
1155
|
69
|
|
|
|
|
134
|
$level_count{$level}++; |
1156
|
|
|
|
|
|
|
|
1157
|
69
|
|
|
|
|
80
|
$name = 'NOTOC'; |
1158
|
69
|
|
|
|
|
171
|
$prevnoli = $noli; |
1159
|
|
|
|
|
|
|
} # while tree |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
# If we want to ignore the first H1 if there's only one of them |
1162
|
|
|
|
|
|
|
# if the first item is a level-0 item |
1163
|
|
|
|
|
|
|
# and there is only one of them |
1164
|
|
|
|
|
|
|
# then remove it and readjust levels |
1165
|
17
|
100
|
66
|
|
|
155
|
if ($args{ignore_sole_first} |
|
|
100
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
1166
|
|
|
|
|
|
|
and $level_count{"1"} == 1 |
1167
|
|
|
|
|
|
|
and $list_of_paths[0]->{level} == 1) |
1168
|
|
|
|
|
|
|
{ |
1169
|
1
|
|
|
|
|
2
|
shift @list_of_paths; |
1170
|
1
|
|
|
|
|
6
|
for (my $i = 0; $i < @list_of_paths; $i++) |
1171
|
|
|
|
|
|
|
{ |
1172
|
1
|
|
|
|
|
4
|
$list_of_paths[$i]->{level}--; |
1173
|
|
|
|
|
|
|
} |
1174
|
|
|
|
|
|
|
} |
1175
|
|
|
|
|
|
|
elsif ($args{ignore_only_one} |
1176
|
|
|
|
|
|
|
and @list_of_paths == 1) |
1177
|
|
|
|
|
|
|
{ |
1178
|
1
|
|
|
|
|
12
|
return (); |
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
|
1181
|
16
|
|
|
|
|
39
|
my @list_of_lists = (); |
1182
|
16
|
|
|
|
|
90
|
@list_of_lists = $self->build_lol( |
1183
|
|
|
|
|
|
|
paths=>\@list_of_paths); |
1184
|
|
|
|
|
|
|
|
1185
|
16
|
|
|
|
|
956
|
return @list_of_lists; |
1186
|
|
|
|
|
|
|
} # make_toc_list |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
=head2 build_lol |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
Build a list of lists of paths, given a list |
1191
|
|
|
|
|
|
|
of hashes with info about paths. |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
=cut |
1194
|
|
|
|
|
|
|
sub build_lol { |
1195
|
36
|
|
|
36
|
1
|
53
|
my $self = shift; |
1196
|
36
|
|
|
|
|
229
|
my %args = ( |
1197
|
|
|
|
|
|
|
paths=>undef, |
1198
|
|
|
|
|
|
|
depth=>1, |
1199
|
|
|
|
|
|
|
prepend_list=>undef, |
1200
|
|
|
|
|
|
|
append_list=>undef, |
1201
|
|
|
|
|
|
|
@_ |
1202
|
|
|
|
|
|
|
); |
1203
|
36
|
|
|
|
|
66
|
my $paths_ref = $args{paths}; |
1204
|
36
|
|
|
|
|
47
|
my $depth = $args{depth}; |
1205
|
|
|
|
|
|
|
|
1206
|
36
|
|
|
|
|
54
|
my @list_of_lists = (); |
1207
|
36
|
|
|
|
|
40
|
while (@{$paths_ref}) |
|
123
|
|
|
|
|
300
|
|
1208
|
|
|
|
|
|
|
{ |
1209
|
96
|
|
|
|
|
149
|
my $toc_entry = $paths_ref->[0]; |
1210
|
96
|
|
|
|
|
130
|
my $path_depth = $toc_entry->{level}; |
1211
|
96
|
|
|
|
|
124
|
my $path = $toc_entry->{path}; |
1212
|
96
|
100
|
|
|
|
428
|
if ($path_depth == $depth) |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
{ |
1214
|
67
|
|
|
|
|
75
|
shift @{$paths_ref}; # use this path |
|
67
|
|
|
|
|
96
|
|
1215
|
67
|
|
|
|
|
159
|
push @list_of_lists, $path; |
1216
|
|
|
|
|
|
|
} |
1217
|
|
|
|
|
|
|
elsif ($path_depth > $depth) |
1218
|
|
|
|
|
|
|
{ |
1219
|
20
|
|
|
|
|
117
|
push @list_of_lists, [$self->build_lol( |
1220
|
|
|
|
|
|
|
%args, |
1221
|
|
|
|
|
|
|
prepend_list=>undef, |
1222
|
|
|
|
|
|
|
append_list=>undef, |
1223
|
|
|
|
|
|
|
paths=>$paths_ref, |
1224
|
|
|
|
|
|
|
depth=>$path_depth, |
1225
|
|
|
|
|
|
|
)]; |
1226
|
|
|
|
|
|
|
} |
1227
|
|
|
|
|
|
|
elsif ($path_depth < $depth) |
1228
|
|
|
|
|
|
|
{ |
1229
|
9
|
|
|
|
|
69
|
return @list_of_lists; |
1230
|
|
|
|
|
|
|
} |
1231
|
|
|
|
|
|
|
} |
1232
|
|
|
|
|
|
|
# prepend the given list to the top level |
1233
|
27
|
50
|
33
|
|
|
90
|
if (defined $args{prepend_list} and @{$args{prepend_list}}) |
|
0
|
|
|
|
|
0
|
|
1234
|
|
|
|
|
|
|
{ |
1235
|
|
|
|
|
|
|
# if the list of lists is a single item which is a list |
1236
|
|
|
|
|
|
|
# then add the extra list to that item |
1237
|
0
|
0
|
0
|
|
|
0
|
if ($#list_of_lists == 0 |
1238
|
|
|
|
|
|
|
and ref($list_of_lists[0]) eq "ARRAY") |
1239
|
|
|
|
|
|
|
{ |
1240
|
0
|
|
|
|
|
0
|
unshift @{$list_of_lists[0]}, @{$args{prepend_list}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1241
|
|
|
|
|
|
|
} |
1242
|
|
|
|
|
|
|
else |
1243
|
|
|
|
|
|
|
{ |
1244
|
0
|
|
|
|
|
0
|
unshift @list_of_lists, @{$args{prepend_list}}; |
|
0
|
|
|
|
|
0
|
|
1245
|
|
|
|
|
|
|
} |
1246
|
|
|
|
|
|
|
} |
1247
|
|
|
|
|
|
|
# append the given list to the top level |
1248
|
27
|
50
|
33
|
|
|
94
|
if (defined $args{append_list} and @{$args{append_list}}) |
|
0
|
|
|
|
|
0
|
|
1249
|
|
|
|
|
|
|
{ |
1250
|
|
|
|
|
|
|
# if the list of lists is a single item which is a list |
1251
|
|
|
|
|
|
|
# then add the extra list to that item |
1252
|
0
|
0
|
0
|
|
|
0
|
if ($#list_of_lists == 0 |
1253
|
|
|
|
|
|
|
and ref($list_of_lists[0]) eq "ARRAY") |
1254
|
|
|
|
|
|
|
{ |
1255
|
0
|
|
|
|
|
0
|
push @{$list_of_lists[0]}, @{$args{append_list}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1256
|
|
|
|
|
|
|
} |
1257
|
|
|
|
|
|
|
else |
1258
|
|
|
|
|
|
|
{ |
1259
|
0
|
|
|
|
|
0
|
push @list_of_lists, @{$args{append_list}}; |
|
0
|
|
|
|
|
0
|
|
1260
|
|
|
|
|
|
|
} |
1261
|
|
|
|
|
|
|
} |
1262
|
27
|
|
|
|
|
132
|
return @list_of_lists; |
1263
|
|
|
|
|
|
|
} # build_lol |
1264
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
=head2 output_toc |
1266
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
$self->output_toc(toc=>$toc_str, |
1268
|
|
|
|
|
|
|
input=>\@input, |
1269
|
|
|
|
|
|
|
filenames=>\@filenames); |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
Put the output (whether to file, STDOUT or string). |
1272
|
|
|
|
|
|
|
The "output" in this case could be the ToC, the modified |
1273
|
|
|
|
|
|
|
(anchors added) HTML, or both. |
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
=cut |
1276
|
|
|
|
|
|
|
sub output_toc ($%) { |
1277
|
27
|
|
|
27
|
1
|
58
|
my $self = shift; |
1278
|
27
|
|
|
|
|
667
|
my %args = ( |
1279
|
|
|
|
|
|
|
toc=>'', |
1280
|
|
|
|
|
|
|
input=>undef, |
1281
|
|
|
|
|
|
|
filenames=>undef, |
1282
|
|
|
|
|
|
|
bak=>$self->{bak}, |
1283
|
|
|
|
|
|
|
useorg=>$self->{useorg}, |
1284
|
|
|
|
|
|
|
inline=>$self->{inline}, |
1285
|
|
|
|
|
|
|
overwrite=>$self->{overwrite}, |
1286
|
|
|
|
|
|
|
to_string=>$self->{to_string}, |
1287
|
|
|
|
|
|
|
header=>$self->{header}, |
1288
|
|
|
|
|
|
|
footer=>$self->{footer}, |
1289
|
|
|
|
|
|
|
toc_only=>$self->{toc_only}, |
1290
|
|
|
|
|
|
|
title=>$self->{title}, |
1291
|
|
|
|
|
|
|
toclabel=>$self->{toclabel}, |
1292
|
|
|
|
|
|
|
outfile=>$self->{outfile}, |
1293
|
|
|
|
|
|
|
debug=>$self->{debug}, |
1294
|
|
|
|
|
|
|
quiet=>$self->{quiet}, |
1295
|
|
|
|
|
|
|
@_ |
1296
|
|
|
|
|
|
|
); |
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
# |
1299
|
|
|
|
|
|
|
# Output to the files if we were making anchors |
1300
|
|
|
|
|
|
|
# |
1301
|
27
|
50
|
100
|
|
|
168
|
if ($args{make_anchors} |
|
|
|
66
|
|
|
|
|
1302
|
|
|
|
|
|
|
&& !$args{to_string} |
1303
|
|
|
|
|
|
|
&& $args{overwrite}) |
1304
|
|
|
|
|
|
|
{ |
1305
|
0
|
|
|
|
|
0
|
my $ofh; |
1306
|
|
|
|
|
|
|
# start from 1 if we're going to be inlining the toc |
1307
|
|
|
|
|
|
|
# in the first file and not to an output file |
1308
|
0
|
0
|
0
|
|
|
0
|
my $start_from = (($args{make_toc} |
1309
|
|
|
|
|
|
|
&& $args{inline} |
1310
|
|
|
|
|
|
|
&& !$args{outfile}) |
1311
|
|
|
|
|
|
|
? 1 : 0); |
1312
|
0
|
|
|
|
|
0
|
for (my $i=$start_from; $i < @{$args{filenames}}; $i++) |
|
0
|
|
|
|
|
0
|
|
1313
|
|
|
|
|
|
|
{ |
1314
|
0
|
|
|
|
|
0
|
my $filename = $args{filenames}->[$i]; |
1315
|
0
|
|
|
|
|
0
|
my $bakfile = $filename . "." . $args{bak}; |
1316
|
0
|
0
|
0
|
|
|
0
|
if ($args{bak} |
|
|
|
0
|
|
|
|
|
1317
|
|
|
|
|
|
|
&& !($args{useorg} && -e $bakfile)) |
1318
|
|
|
|
|
|
|
{ |
1319
|
|
|
|
|
|
|
# copy the file to a backup |
1320
|
0
|
0
|
|
|
|
0
|
print STDERR "Backing up ", $filename, " to ", |
1321
|
|
|
|
|
|
|
$bakfile, "\n" |
1322
|
|
|
|
|
|
|
unless $args{quiet}; |
1323
|
0
|
|
|
|
|
0
|
cp($filename, $bakfile); |
1324
|
|
|
|
|
|
|
} |
1325
|
0
|
0
|
|
|
|
0
|
open($ofh, "> $filename") |
1326
|
|
|
|
|
|
|
|| die "Error: unable to open ", $filename, ": $!\n"; |
1327
|
0
|
0
|
|
|
|
0
|
print STDERR "Overwriting ToC to ", $filename, "\n" |
1328
|
|
|
|
|
|
|
unless $args{quiet}; |
1329
|
0
|
|
|
|
|
0
|
print $ofh $args{input}->[$i]; |
1330
|
0
|
|
|
|
|
0
|
close($ofh); |
1331
|
|
|
|
|
|
|
} |
1332
|
|
|
|
|
|
|
} |
1333
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
# |
1335
|
|
|
|
|
|
|
# Construct and output the ToC |
1336
|
|
|
|
|
|
|
# |
1337
|
27
|
|
|
|
|
46
|
my $output = ''; |
1338
|
27
|
100
|
33
|
|
|
141
|
if ($args{make_toc}) |
|
|
50
|
33
|
|
|
|
|
1339
|
|
|
|
|
|
|
{ |
1340
|
17
|
100
|
|
|
|
45
|
if ($args{toc}) |
1341
|
|
|
|
|
|
|
{ |
1342
|
16
|
|
|
|
|
33
|
my @toc = (); |
1343
|
|
|
|
|
|
|
# put the header at the start of the ToC if there is one |
1344
|
16
|
50
|
66
|
|
|
138
|
if ($args{header}) { |
|
|
100
|
|
|
|
|
|
1345
|
0
|
0
|
|
|
|
0
|
if (-f $args{header}) |
1346
|
|
|
|
|
|
|
{ |
1347
|
0
|
0
|
|
|
|
0
|
open(HEADER, $args{header}) |
1348
|
|
|
|
|
|
|
|| die "Error: unable to open ", $args{header}, ": $!\n"; |
1349
|
0
|
|
|
|
|
0
|
push @toc, |
1350
|
0
|
|
|
|
|
0
|
close (HEADER); |
1351
|
|
|
|
|
|
|
} |
1352
|
|
|
|
|
|
|
else # not a file |
1353
|
|
|
|
|
|
|
{ |
1354
|
0
|
|
|
|
|
0
|
push @toc, $args{header}; |
1355
|
|
|
|
|
|
|
} |
1356
|
|
|
|
|
|
|
} |
1357
|
|
|
|
|
|
|
# if we are outputing a standalone page, |
1358
|
|
|
|
|
|
|
# then make sure it can stand |
1359
|
|
|
|
|
|
|
elsif (!$args{toc_only} |
1360
|
|
|
|
|
|
|
&& !$args{inline}) { |
1361
|
|
|
|
|
|
|
|
1362
|
12
|
|
|
|
|
35
|
push @toc, qq|\n|, |
1363
|
|
|
|
|
|
|
"\n", |
1364
|
|
|
|
|
|
|
"\n"; |
1365
|
12
|
50
|
|
|
|
46
|
push @toc, "", $args{title}, "\n" if $args{title}; |
1366
|
12
|
|
|
|
|
30
|
push @toc, "\n", |
1367
|
|
|
|
|
|
|
"\n"; |
1368
|
|
|
|
|
|
|
} |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
# start the ToC with the ToC label |
1371
|
16
|
100
|
|
|
|
51
|
if ($args{toclabel}) { |
1372
|
15
|
|
|
|
|
31
|
push @toc, $args{toclabel}; |
1373
|
|
|
|
|
|
|
} |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
# and the actual ToC |
1376
|
16
|
|
|
|
|
35
|
push @toc, "\n", $args{toc}, "\n"; |
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
# add the footer, if there is one |
1379
|
16
|
50
|
66
|
|
|
118
|
if ($args{footer}) { |
|
|
100
|
|
|
|
|
|
1380
|
0
|
0
|
|
|
|
0
|
if (-f $args{footer}) |
1381
|
|
|
|
|
|
|
{ |
1382
|
0
|
0
|
|
|
|
0
|
open(FOOTER, $args{footer}) |
1383
|
|
|
|
|
|
|
|| die "Error: unable to open ", $args{footer}, ": $!\n"; |
1384
|
0
|
|
|
|
|
0
|
push @toc, |
1385
|
0
|
|
|
|
|
0
|
close (FOOTER); |
1386
|
|
|
|
|
|
|
} |
1387
|
|
|
|
|
|
|
else |
1388
|
|
|
|
|
|
|
{ |
1389
|
0
|
|
|
|
|
0
|
push @toc, $args{footer}; |
1390
|
|
|
|
|
|
|
} |
1391
|
|
|
|
|
|
|
} |
1392
|
|
|
|
|
|
|
# if we are outputing a standalone page, |
1393
|
|
|
|
|
|
|
# then make sure it can stand |
1394
|
|
|
|
|
|
|
elsif (!$args{toc_only} |
1395
|
|
|
|
|
|
|
&& !$args{inline}) { |
1396
|
|
|
|
|
|
|
|
1397
|
12
|
|
|
|
|
28
|
push @toc, "\n", "\n"; |
1398
|
|
|
|
|
|
|
} |
1399
|
|
|
|
|
|
|
|
1400
|
16
|
|
|
|
|
74
|
$output = join '', @toc; |
1401
|
|
|
|
|
|
|
} |
1402
|
|
|
|
|
|
|
else |
1403
|
|
|
|
|
|
|
{ |
1404
|
1
|
|
|
|
|
2
|
$output = "\n"; |
1405
|
|
|
|
|
|
|
} |
1406
|
|
|
|
|
|
|
} |
1407
|
|
|
|
|
|
|
elsif ($args{make_anchors} && (!$args{overwrite} || $args{to_string})) |
1408
|
|
|
|
|
|
|
{ |
1409
|
|
|
|
|
|
|
# if we're just making anchors, and we aren't overwriting |
1410
|
|
|
|
|
|
|
# the original file, we need to output it |
1411
|
10
|
|
|
|
|
25
|
$output = $args{input}->[0]; |
1412
|
|
|
|
|
|
|
} |
1413
|
|
|
|
|
|
|
|
1414
|
27
|
50
|
|
|
|
65
|
if ($output) |
1415
|
|
|
|
|
|
|
{ |
1416
|
|
|
|
|
|
|
# |
1417
|
|
|
|
|
|
|
# Sent the outfile to its final destination |
1418
|
|
|
|
|
|
|
# |
1419
|
27
|
|
|
|
|
40
|
my $file_needs_closing = 0; |
1420
|
27
|
|
|
|
|
46
|
my $ofh; |
1421
|
27
|
100
|
100
|
|
|
158
|
if ($args{to_string}) |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
{ |
1423
|
8
|
|
|
|
|
11
|
$ofh = undef; |
1424
|
|
|
|
|
|
|
} |
1425
|
|
|
|
|
|
|
elsif ($args{outfile} && $args{outfile} ne "-") { |
1426
|
17
|
50
|
|
|
|
2986
|
open($ofh, "> " . $args{outfile}) |
1427
|
|
|
|
|
|
|
|| die "Error: unable to open ", $args{outfile}, ": $!\n"; |
1428
|
17
|
|
|
|
|
55
|
$file_needs_closing = 1; |
1429
|
|
|
|
|
|
|
} |
1430
|
|
|
|
|
|
|
elsif (!$args{overwrite}) { |
1431
|
0
|
|
|
|
|
0
|
$ofh = *STDOUT; |
1432
|
0
|
|
|
|
|
0
|
$file_needs_closing = 0; |
1433
|
|
|
|
|
|
|
} |
1434
|
27
|
100
|
|
|
|
84
|
if ($args{inline}) { |
1435
|
|
|
|
|
|
|
# create the modified version of the first set of input |
1436
|
5
|
|
|
|
|
13
|
my $first_file = $args{filenames}->[0]; |
1437
|
5
|
|
|
|
|
14
|
my $bakfile = $first_file . "." . $args{bak}; |
1438
|
5
|
|
|
|
|
48
|
$output = $self->put_toc_inline(%args, |
1439
|
|
|
|
|
|
|
toc_str=>$output, |
1440
|
|
|
|
|
|
|
in_string=>$args{input}->[0], |
1441
|
|
|
|
|
|
|
filename=>$args{filenames}->[0], |
1442
|
|
|
|
|
|
|
); |
1443
|
|
|
|
|
|
|
|
1444
|
5
|
100
|
0
|
|
|
37
|
if ($args{to_string}) |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
{ |
1446
|
|
|
|
|
|
|
# just send to string, don't print anything |
1447
|
3
|
50
|
|
|
|
13
|
if ($args{debug}) |
1448
|
|
|
|
|
|
|
{ |
1449
|
0
|
|
|
|
|
0
|
print STDERR "======== to_string output_toc ========\n"; |
1450
|
0
|
|
|
|
|
0
|
print STDERR $output; |
1451
|
0
|
|
|
|
|
0
|
print STDERR "========----------------------========\n"; |
1452
|
|
|
|
|
|
|
} |
1453
|
|
|
|
|
|
|
} |
1454
|
|
|
|
|
|
|
elsif ($args{overwrite}) { |
1455
|
2
|
50
|
33
|
|
|
23
|
if ($args{bak} |
|
|
|
33
|
|
|
|
|
1456
|
|
|
|
|
|
|
&& !($args{useorg} && -e $bakfile)) |
1457
|
|
|
|
|
|
|
{ |
1458
|
|
|
|
|
|
|
# copy the file to a backup |
1459
|
2
|
50
|
|
|
|
8
|
print STDERR "Backing up ", $first_file, " to ", |
1460
|
|
|
|
|
|
|
$bakfile, "\n" |
1461
|
|
|
|
|
|
|
unless $args{quiet}; |
1462
|
2
|
|
|
|
|
10
|
cp($first_file, $bakfile); |
1463
|
|
|
|
|
|
|
} |
1464
|
2
|
50
|
|
|
|
236
|
open($ofh, "> $first_file") |
1465
|
|
|
|
|
|
|
|| die "Error: unable to open ", $first_file, ": $!\n"; |
1466
|
2
|
|
|
|
|
8
|
$file_needs_closing = 1; |
1467
|
2
|
50
|
|
|
|
7
|
print STDERR "Overwriting ToC to ", $first_file, "\n" |
1468
|
|
|
|
|
|
|
unless $args{quiet}; |
1469
|
2
|
|
|
|
|
14
|
print $ofh $output; |
1470
|
|
|
|
|
|
|
} |
1471
|
|
|
|
|
|
|
elsif ($args{outfile} |
1472
|
|
|
|
|
|
|
&& $args{outfile} ne "-") { |
1473
|
0
|
0
|
|
|
|
0
|
print STDERR "Writing Inline ToC to ", $args{outfile}, "\n" |
1474
|
|
|
|
|
|
|
unless $args{quiet}; |
1475
|
0
|
|
|
|
|
0
|
print $ofh $output; |
1476
|
|
|
|
|
|
|
} |
1477
|
|
|
|
|
|
|
elsif ($args{outfile}) |
1478
|
|
|
|
|
|
|
{ |
1479
|
0
|
|
|
|
|
0
|
print $ofh $output; |
1480
|
|
|
|
|
|
|
} |
1481
|
|
|
|
|
|
|
} else { |
1482
|
22
|
100
|
33
|
|
|
135
|
if ($args{to_string}) |
|
|
50
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
{ |
1484
|
|
|
|
|
|
|
# just send to string, don't print anything |
1485
|
|
|
|
|
|
|
} |
1486
|
|
|
|
|
|
|
elsif ($args{outfile} && $args{outfile} ne "-") { |
1487
|
17
|
50
|
|
|
|
60
|
print STDERR "Writing ToC to ", $args{outfile}, "\n" |
1488
|
|
|
|
|
|
|
unless $args{quiet}; |
1489
|
17
|
|
|
|
|
358
|
print $ofh $output; |
1490
|
|
|
|
|
|
|
} |
1491
|
|
|
|
|
|
|
else |
1492
|
|
|
|
|
|
|
{ |
1493
|
0
|
|
|
|
|
0
|
print $ofh $output; |
1494
|
|
|
|
|
|
|
} |
1495
|
|
|
|
|
|
|
} |
1496
|
27
|
100
|
|
|
|
79
|
if ($file_needs_closing) { |
1497
|
19
|
|
|
|
|
1228
|
close($ofh); |
1498
|
|
|
|
|
|
|
} |
1499
|
|
|
|
|
|
|
} |
1500
|
|
|
|
|
|
|
|
1501
|
27
|
100
|
|
|
|
109
|
if ($args{to_string}) |
1502
|
|
|
|
|
|
|
{ |
1503
|
8
|
|
|
|
|
43
|
return $output; |
1504
|
|
|
|
|
|
|
} |
1505
|
|
|
|
|
|
|
else |
1506
|
|
|
|
|
|
|
{ |
1507
|
19
|
|
|
|
|
137
|
return 1; |
1508
|
|
|
|
|
|
|
} |
1509
|
|
|
|
|
|
|
} # output_toc |
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
=head2 put_toc_inline |
1512
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
my $newhtml = $toc->put_toc_inline(toc_str=>$toc_str, |
1514
|
|
|
|
|
|
|
filename=>$filename, in_string=>$in_string); |
1515
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
Puts the given toc_str into the given input string; |
1517
|
|
|
|
|
|
|
returns a string. |
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
=cut |
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
sub put_toc_inline ($) { |
1522
|
5
|
|
|
5
|
1
|
11
|
my $self = shift; |
1523
|
5
|
|
|
|
|
69
|
my %args = ( |
1524
|
|
|
|
|
|
|
toc_str=>'', |
1525
|
|
|
|
|
|
|
filename=>'', |
1526
|
|
|
|
|
|
|
in_string=>'', |
1527
|
|
|
|
|
|
|
toc_tag=>$self->{toc_tag}, |
1528
|
|
|
|
|
|
|
toc_tag_replace=>$self->{toc_tag_replace}, |
1529
|
|
|
|
|
|
|
@_ |
1530
|
|
|
|
|
|
|
); |
1531
|
5
|
|
|
|
|
9
|
my $toc_str = $args{toc_str}; |
1532
|
5
|
|
|
|
|
10
|
my $infile = $args{filename}; |
1533
|
|
|
|
|
|
|
|
1534
|
5
|
|
|
|
|
9
|
my $html_str = ""; |
1535
|
|
|
|
|
|
|
|
1536
|
5
|
50
|
|
|
|
15
|
if ($args{in_string}) # use input string, not file |
1537
|
|
|
|
|
|
|
{ |
1538
|
5
|
|
|
|
|
10
|
$html_str = $args{in_string}; |
1539
|
|
|
|
|
|
|
} |
1540
|
|
|
|
|
|
|
else |
1541
|
|
|
|
|
|
|
{ |
1542
|
0
|
|
|
|
|
0
|
local $/; |
1543
|
0
|
0
|
|
|
|
0
|
open (FILE, $infile) || |
1544
|
|
|
|
|
|
|
die "Error: unable to open ", $infile, ": $!\n"; |
1545
|
|
|
|
|
|
|
|
1546
|
0
|
|
|
|
|
0
|
$html_str = ; |
1547
|
0
|
|
|
|
|
0
|
close (FILE); |
1548
|
|
|
|
|
|
|
} |
1549
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
# parse the file |
1552
|
5
|
|
|
|
|
28
|
my $hp = new HTML::SimpleParse(); |
1553
|
5
|
|
|
|
|
71
|
$hp->text($html_str); |
1554
|
5
|
|
|
|
|
38
|
$hp->parse(); |
1555
|
|
|
|
|
|
|
|
1556
|
5
|
|
|
|
|
4899
|
my $toc_tag = $args{toc_tag}; |
1557
|
5
|
|
|
|
|
12
|
my @newhtml = (); |
1558
|
|
|
|
|
|
|
|
1559
|
5
|
|
|
|
|
10
|
my $toc_done = 0; |
1560
|
|
|
|
|
|
|
# go through the HTML |
1561
|
5
|
|
|
|
|
12
|
my $tok; |
1562
|
|
|
|
|
|
|
my $i; |
1563
|
5
|
|
|
|
|
22
|
my @tree = $hp->tree(); |
1564
|
5
|
|
|
|
|
147
|
while (@tree) { |
1565
|
580
|
|
|
|
|
5547
|
$tok = shift @tree; |
1566
|
|
|
|
|
|
|
# look for the ToC tag in tags or comments |
1567
|
580
|
100
|
100
|
|
|
7564
|
if ($tok->{type} eq 'starttag' |
|
|
|
100
|
|
|
|
|
1568
|
|
|
|
|
|
|
|| $tok->{type} eq 'endtag' |
1569
|
|
|
|
|
|
|
|| $tok->{type} eq 'comment') |
1570
|
|
|
|
|
|
|
{ |
1571
|
315
|
100
|
100
|
|
|
981
|
if (!$toc_done |
1572
|
|
|
|
|
|
|
&& $tok->{content} =~ m|$toc_tag|i) { |
1573
|
|
|
|
|
|
|
# some tags need to be preserved, with the ToC put after, |
1574
|
|
|
|
|
|
|
# while others need to be replaced |
1575
|
5
|
100
|
|
|
|
21
|
if (!$args{toc_tag_replace}) { |
1576
|
4
|
|
|
|
|
13
|
push @newhtml, $hp->execute($tok); |
1577
|
|
|
|
|
|
|
} |
1578
|
|
|
|
|
|
|
# put the ToC in |
1579
|
5
|
|
|
|
|
45
|
push @newhtml, $toc_str; |
1580
|
5
|
|
|
|
|
14
|
$toc_done = 1; |
1581
|
|
|
|
|
|
|
} |
1582
|
|
|
|
|
|
|
else { |
1583
|
310
|
|
|
|
|
786
|
push @newhtml, $hp->execute($tok); |
1584
|
|
|
|
|
|
|
} |
1585
|
|
|
|
|
|
|
} |
1586
|
|
|
|
|
|
|
else |
1587
|
|
|
|
|
|
|
{ |
1588
|
265
|
|
|
|
|
677
|
push @newhtml, $hp->execute($tok); |
1589
|
265
|
|
|
|
|
2473
|
next; |
1590
|
|
|
|
|
|
|
} |
1591
|
|
|
|
|
|
|
} |
1592
|
|
|
|
|
|
|
|
1593
|
5
|
|
|
|
|
475
|
return join('', @newhtml); |
1594
|
|
|
|
|
|
|
} |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
=head2 cp |
1597
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
cp($src, $dst); |
1599
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
Copies file $src to $dst. |
1601
|
|
|
|
|
|
|
Used for making backups of files. |
1602
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
=cut |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
sub cp ($$) { |
1606
|
2
|
|
|
2
|
1
|
6
|
my($src, $dst) = @_; |
1607
|
2
|
50
|
|
|
|
172
|
open (SRC, $src) || |
1608
|
|
|
|
|
|
|
die "Error: unable to open ", $src, ": $!\n"; |
1609
|
2
|
50
|
|
|
|
252
|
open (DST, "> $dst") || |
1610
|
|
|
|
|
|
|
die "Error: unable to open ", $dst, ": $!\n"; |
1611
|
2
|
|
|
|
|
248
|
print DST ; |
1612
|
2
|
|
|
|
|
46
|
close(SRC); |
1613
|
2
|
|
|
|
|
126
|
close(DST); |
1614
|
|
|
|
|
|
|
} |
1615
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
1; |
1617
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
=head1 FILE FORMATS |
1619
|
|
|
|
|
|
|
|
1620
|
|
|
|
|
|
|
=head2 Formatting the ToC |
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
The B and other related options give you control on how the |
1623
|
|
|
|
|
|
|
ToC entries may look, but there are other options to affect the final |
1624
|
|
|
|
|
|
|
appearance of the ToC file created. |
1625
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
With the B option, the contents of the given file (or string) |
1627
|
|
|
|
|
|
|
will be prepended before the generated ToC. This allows you to have |
1628
|
|
|
|
|
|
|
introductory text, or any other text, before the ToC. |
1629
|
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
=over |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
=item Note: |
1633
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
If you use the B option, make sure the file specified |
1635
|
|
|
|
|
|
|
contains the opening HTML tag, the HEAD element (containing the |
1636
|
|
|
|
|
|
|
TITLE element), and the opening BODY tag. However, these |
1637
|
|
|
|
|
|
|
tags/elements should not be in the header file if the B |
1638
|
|
|
|
|
|
|
option is used. See L for information on what |
1639
|
|
|
|
|
|
|
the header file should contain for inlining the ToC. |
1640
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
=back |
1642
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
With the B option, the contents of the given string will be |
1644
|
|
|
|
|
|
|
prepended before the generated ToC (but after any text taken from a |
1645
|
|
|
|
|
|
|
B |
1646
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
With the B |
1648
|
|
|
|
|
|
|
after the generated ToC. |
1649
|
|
|
|
|
|
|
|
1650
|
|
|
|
|
|
|
=over |
1651
|
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
|
=item Note: |
1653
|
|
|
|
|
|
|
|
1654
|
|
|
|
|
|
|
If you use the B |
1655
|
|
|
|
|
|
|
and HTML tags (unless, of course, you are using the B option). |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
=back |
1658
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
If the B option is not specified, the appropriate starting |
1660
|
|
|
|
|
|
|
HTML markup will be added, unless the B option is specified. |
1661
|
|
|
|
|
|
|
If the B |
1662
|
|
|
|
|
|
|
HTML markup will be added, unless the B option is specified. |
1663
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
If you do not want/need to deal with header, and footer, files, then |
1665
|
|
|
|
|
|
|
you are allowed to specify the title, B option, of the ToC file; |
1666
|
|
|
|
|
|
|
and it allows you to specify a heading, or label, to put before ToC |
1667
|
|
|
|
|
|
|
entries' list, the B option. Both options have default values. |
1668
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
If you do not want HTML page tags to be supplied, and just want |
1670
|
|
|
|
|
|
|
the ToC itself, then specify the B option. |
1671
|
|
|
|
|
|
|
If there are no B or B |
1672
|
|
|
|
|
|
|
output the contents of B and the ToC itself. |
1673
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
=head2 Inlining the ToC |
1675
|
|
|
|
|
|
|
|
1676
|
|
|
|
|
|
|
The ability to incorporate the ToC directly into an HTML document |
1677
|
|
|
|
|
|
|
is supported via the B option. |
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
Inlining will be done on the first file in the list of files processed, |
1680
|
|
|
|
|
|
|
and will only be done if that file contains an opening tag matching the |
1681
|
|
|
|
|
|
|
B value. |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
If B is true, then the first file in the list will be |
1684
|
|
|
|
|
|
|
overwritten, with the generated ToC inserted at the appropriate spot. |
1685
|
|
|
|
|
|
|
Otherwise a modified version of the first file is output to either STDOUT |
1686
|
|
|
|
|
|
|
or to the output file defined by the B option. |
1687
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
The options B and B are used to determine where |
1689
|
|
|
|
|
|
|
and how the ToC is inserted into the output. |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
B |
1692
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
$toc->generate_toc(inline=>1, |
1694
|
|
|
|
|
|
|
toc_tag => 'BODY', |
1695
|
|
|
|
|
|
|
toc_tag_replace => 0, |
1696
|
|
|
|
|
|
|
... |
1697
|
|
|
|
|
|
|
); |
1698
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
This will put the generated ToC after the BODY tag of the first file. |
1700
|
|
|
|
|
|
|
If the B option is specified, then the contents of the specified |
1701
|
|
|
|
|
|
|
file are inserted after the BODY tag. If the B option is not |
1702
|
|
|
|
|
|
|
empty, then the text specified by the B option is inserted. |
1703
|
|
|
|
|
|
|
Then the ToC is inserted, and finally, if the B |
1704
|
|
|
|
|
|
|
specified, it inserts the footer. Then the rest of the input file |
1705
|
|
|
|
|
|
|
follows as it was before. |
1706
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
B |
1708
|
|
|
|
|
|
|
|
1709
|
|
|
|
|
|
|
$toc->generate_toc(inline=>1, |
1710
|
|
|
|
|
|
|
toc_tag => '!--toc--', |
1711
|
|
|
|
|
|
|
toc_tag_replace => 1, |
1712
|
|
|
|
|
|
|
... |
1713
|
|
|
|
|
|
|
); |
1714
|
|
|
|
|
|
|
|
1715
|
|
|
|
|
|
|
This will put the generated ToC after the first comment of the form |
1716
|
|
|
|
|
|
|
, and that comment will be replaced by the ToC |
1717
|
|
|
|
|
|
|
(in the order |
1718
|
|
|
|
|
|
|
B |
1719
|
|
|
|
|
|
|
B |
1720
|
|
|
|
|
|
|
ToC |
1721
|
|
|
|
|
|
|
B |
1722
|
|
|
|
|
|
|
followed by the rest of the input file. |
1723
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
=over |
1725
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
=item Note: |
1727
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
The header file should not contain the beginning HTML tag |
1729
|
|
|
|
|
|
|
and HEAD element since the HTML file being processed should |
1730
|
|
|
|
|
|
|
already contain these tags/elements. |
1731
|
|
|
|
|
|
|
|
1732
|
|
|
|
|
|
|
=back |
1733
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
=head1 NOTES |
1735
|
|
|
|
|
|
|
|
1736
|
|
|
|
|
|
|
=over |
1737
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
=item * |
1739
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
HTML::GenToc is smart enough to detect anchors inside significant |
1741
|
|
|
|
|
|
|
elements. If the anchor defines the NAME attribute, HTML::GenToc uses |
1742
|
|
|
|
|
|
|
the value. Else, it adds its own NAME attribute to the anchor. |
1743
|
|
|
|
|
|
|
If B is true, then it likewise checks for and uses IDs. |
1744
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
=item * |
1746
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
The TITLE element is treated specially if specified in the B |
1748
|
|
|
|
|
|
|
option. It is illegal to insert anchors (A) into TITLE elements. |
1749
|
|
|
|
|
|
|
Therefore, HTML::GenToc will actually link to the filename itself |
1750
|
|
|
|
|
|
|
instead of the TITLE element of the document. |
1751
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
=item * |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
HTML::GenToc will ignore a significant element if it does not contain |
1755
|
|
|
|
|
|
|
any non-whitespace characters. A warning message is generated if |
1756
|
|
|
|
|
|
|
such a condition exists. |
1757
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
=item * |
1759
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
If you have a sequence of significant elements that change in a slightly |
1761
|
|
|
|
|
|
|
disordered fashion, such as H1 -> H3 -> H2 or even H2 -> H1, though |
1762
|
|
|
|
|
|
|
HTML::GenToc deals with this to create a list which is still good HTML, if |
1763
|
|
|
|
|
|
|
you are using an ordered list to that depth, then you will get strange |
1764
|
|
|
|
|
|
|
numbering, as an extra list element will have been inserted to nest the |
1765
|
|
|
|
|
|
|
elements at the correct level. |
1766
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
For example (H2 -> H1 with ol_num_levels=1): |
1768
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
1. |
1770
|
|
|
|
|
|
|
* My H2 Header |
1771
|
|
|
|
|
|
|
2. My H1 Header |
1772
|
|
|
|
|
|
|
|
1773
|
|
|
|
|
|
|
For example (H1 -> H3 -> H2 with ol_num_levels=0 and H3 also being |
1774
|
|
|
|
|
|
|
significant): |
1775
|
|
|
|
|
|
|
|
1776
|
|
|
|
|
|
|
1. My H1 Header |
1777
|
|
|
|
|
|
|
1. |
1778
|
|
|
|
|
|
|
1. My H3 Header |
1779
|
|
|
|
|
|
|
2. My H2 Header |
1780
|
|
|
|
|
|
|
2. My Second H1 Header |
1781
|
|
|
|
|
|
|
|
1782
|
|
|
|
|
|
|
In cases such as this it may be better not to use the B option. |
1783
|
|
|
|
|
|
|
|
1784
|
|
|
|
|
|
|
=back |
1785
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
=head1 CAVEATS |
1787
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
=over |
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
=item * |
1791
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
Version 3.10 (and above) generates more verbose (SEO-friendly) anchors |
1793
|
|
|
|
|
|
|
than prior versions. Thus anchors generated with earlier versions will |
1794
|
|
|
|
|
|
|
not match version 3.10 anchors. |
1795
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
=item * |
1797
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
Version 3.00 (and above) of HTML::GenToc is not compatible with |
1799
|
|
|
|
|
|
|
Version 2.x of HTML::GenToc. It is now designed to do everything |
1800
|
|
|
|
|
|
|
in one pass, and has dropped certain options: the B option |
1801
|
|
|
|
|
|
|
is no longer used (it has been replaced with the B option); |
1802
|
|
|
|
|
|
|
the B option no longer exists; use the B option |
1803
|
|
|
|
|
|
|
instead; the B option is no longer supported. Also the old |
1804
|
|
|
|
|
|
|
array-parsing of arguments is no longer supported. There is no longer |
1805
|
|
|
|
|
|
|
a B method; everything is done with B. |
1806
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
It now generates lower-case tags rather than upper-case ones. |
1808
|
|
|
|
|
|
|
|
1809
|
|
|
|
|
|
|
=item * |
1810
|
|
|
|
|
|
|
|
1811
|
|
|
|
|
|
|
HTML::GenToc is not very efficient (memory and speed), and can be |
1812
|
|
|
|
|
|
|
slow for large documents. |
1813
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
=item * |
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
Now that generation of anchors and of the ToC are done in one pass, |
1817
|
|
|
|
|
|
|
even more memory is used than was the case before. This is more notable |
1818
|
|
|
|
|
|
|
when processing multiple files, since all files are read into memory |
1819
|
|
|
|
|
|
|
before processing them. |
1820
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
=item * |
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
Invalid markup will be generated if a significant element is |
1824
|
|
|
|
|
|
|
contained inside of an anchor. For example: |
1825
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
The FOO command |
1827
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
will be converted to (if H1 is a significant element), |
1829
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
The FOO command |
1831
|
|
|
|
|
|
|
|
1832
|
|
|
|
|
|
|
which is illegal since anchors cannot be nested. |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
It is better style to put anchor statements within the element to |
1835
|
|
|
|
|
|
|
be anchored. For example, the following is preferred: |
1836
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
HTML::GenToc will detect the "foo" name and use it. |
1840
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
=item * |
1842
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
name attributes without quotes are not recognized. |
1844
|
|
|
|
|
|
|
|
1845
|
|
|
|
|
|
|
=back |
1846
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
=head1 BUGS |
1848
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
Tell me about them. |
1850
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
=head1 REQUIRES |
1852
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
The installation of this module requires C. The module |
1854
|
|
|
|
|
|
|
depends on C, C and C and uses |
1855
|
|
|
|
|
|
|
C for debugging purposes. The hypertoc script depends on |
1856
|
|
|
|
|
|
|
C, C and C. Testing of this |
1857
|
|
|
|
|
|
|
distribution depends on C. |
1858
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
=head1 INSTALLATION |
1860
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
To install this module, run the following commands: |
1862
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
perl Build.PL |
1864
|
|
|
|
|
|
|
./Build |
1865
|
|
|
|
|
|
|
./Build test |
1866
|
|
|
|
|
|
|
./Build install |
1867
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
Or, if you're on a platform (like DOS or Windows) that doesn't like the |
1869
|
|
|
|
|
|
|
"./" notation, you can do this: |
1870
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
perl Build.PL |
1872
|
|
|
|
|
|
|
perl Build |
1873
|
|
|
|
|
|
|
perl Build test |
1874
|
|
|
|
|
|
|
perl Build install |
1875
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
In order to install somewhere other than the default, such as |
1877
|
|
|
|
|
|
|
in a directory under your home directory, like "/home/fred/perl" |
1878
|
|
|
|
|
|
|
go |
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
perl Build.PL --install_base /home/fred/perl |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
as the first step instead. |
1883
|
|
|
|
|
|
|
|
1884
|
|
|
|
|
|
|
This will install the files underneath /home/fred/perl. |
1885
|
|
|
|
|
|
|
|
1886
|
|
|
|
|
|
|
You will then need to make sure that you alter the PERL5LIB variable to |
1887
|
|
|
|
|
|
|
find the modules, and the PATH variable to find the script. |
1888
|
|
|
|
|
|
|
|
1889
|
|
|
|
|
|
|
Therefore you will need to change: |
1890
|
|
|
|
|
|
|
your path, to include /home/fred/perl/script (where the script will be) |
1891
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
PATH=/home/fred/perl/script:${PATH} |
1893
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
the PERL5LIB variable to add /home/fred/perl/lib |
1895
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
PERL5LIB=/home/fred/perl/lib:${PERL5LIB} |
1897
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
=head1 SEE ALSO |
1899
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
perl(1) |
1901
|
|
|
|
|
|
|
htmltoc(1) |
1902
|
|
|
|
|
|
|
hypertoc(1) |
1903
|
|
|
|
|
|
|
|
1904
|
|
|
|
|
|
|
=head1 AUTHOR |
1905
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
Kathryn Andersen (RUBYKAT) http://www.katspace.org/tools/hypertoc/ |
1907
|
|
|
|
|
|
|
|
1908
|
|
|
|
|
|
|
Based on htmltoc by Earl Hood ehood AT medusa.acs.uci.edu |
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
Contributions by Dan Dascalescu, |
1911
|
|
|
|
|
|
|
|
1912
|
|
|
|
|
|
|
=head1 COPYRIGHT |
1913
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
Copyright (C) 1994-1997 Earl Hood, ehood AT medusa.acs.uci.edu |
1915
|
|
|
|
|
|
|
Copyright (C) 2002-2008 Kathryn Andersen |
1916
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify |
1918
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
1919
|
|
|
|
|
|
|
the Free Software Foundation; either version 2 of the License, or |
1920
|
|
|
|
|
|
|
(at your option) any later version. |
1921
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
1923
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
1924
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
1925
|
|
|
|
|
|
|
GNU General Public License for more details. |
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
1928
|
|
|
|
|
|
|
along with this program; if not, write to the Free Software |
1929
|
|
|
|
|
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
1930
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
=cut |