| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTML::TextToHTML; |
|
2
|
|
|
|
|
|
|
{ |
|
3
|
|
|
|
|
|
|
$HTML::TextToHTML::VERSION = '2.5201'; |
|
4
|
|
|
|
|
|
|
} |
|
5
|
6
|
|
|
6
|
|
148975
|
use 5.8.1; |
|
|
6
|
|
|
|
|
89
|
|
|
|
6
|
|
|
|
|
308
|
|
|
6
|
6
|
|
|
6
|
|
33
|
use strict; |
|
|
6
|
|
|
|
|
10
|
|
|
|
6
|
|
|
|
|
1136
|
|
|
7
|
|
|
|
|
|
|
#------------------------------------------------------------------------ |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 NAME |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
HTML::TextToHTML - convert plain text file to HTML. |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 VERSION |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
version 2.5201 |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
From the command line: |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
txt2html I |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
From Scripts: |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use HTML::TextToHTML; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# create a new object |
|
28
|
|
|
|
|
|
|
my $conv = new HTML::TextToHTML(); |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# convert a file |
|
31
|
|
|
|
|
|
|
$conv->txt2html(infile=>[$text_file], |
|
32
|
|
|
|
|
|
|
outfile=>$html_file, |
|
33
|
|
|
|
|
|
|
title=>"Wonderful Things", |
|
34
|
|
|
|
|
|
|
mail=>1, |
|
35
|
|
|
|
|
|
|
]); |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# reset arguments |
|
38
|
|
|
|
|
|
|
$conv->args(infile=>[], mail=>0); |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# convert a string |
|
41
|
|
|
|
|
|
|
$newstring = $conv->process_chunk($mystring) |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
HTML::TextToHTML converts plain text files to HTML. The txt2html script |
|
46
|
|
|
|
|
|
|
uses this module to do the same from the command-line. |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
It supports headings, tables, lists, simple character markup, and |
|
49
|
|
|
|
|
|
|
hyperlinking, and is highly customizable. It recognizes some of the |
|
50
|
|
|
|
|
|
|
apparent structure of the source document (mostly whitespace and |
|
51
|
|
|
|
|
|
|
typographic layout), and attempts to mark that structure explicitly |
|
52
|
|
|
|
|
|
|
using HTML. The purpose for this tool is to provide an easier way of |
|
53
|
|
|
|
|
|
|
converting existing text documents to HTML format, giving something nicer |
|
54
|
|
|
|
|
|
|
than just whapping the text into a big PRE block. |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head2 History |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
The original txt2html script was written by Seth Golub (see |
|
59
|
|
|
|
|
|
|
http://www.aigeek.com/txt2html/), and converted to a perl module by |
|
60
|
|
|
|
|
|
|
Kathryn Andersen (see http://www.katspace.com/tools/text_to_html/) and |
|
61
|
|
|
|
|
|
|
made into a sourceforge project by Sun Tong (see |
|
62
|
|
|
|
|
|
|
http://sourceforge.net/projects/txt2html/). Earlier versions of the |
|
63
|
|
|
|
|
|
|
HTML::TextToHTML module called the included script texthyper so as not |
|
64
|
|
|
|
|
|
|
to clash with the original txt2html script, but now the projects have |
|
65
|
|
|
|
|
|
|
all been merged. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 OPTIONS |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
All arguments can be set when the object is created, and further options |
|
70
|
|
|
|
|
|
|
can be set when calling the actual txt2html method. Arguments |
|
71
|
|
|
|
|
|
|
to methods can take a hash of arguments. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Note that all option-names must match exactly -- no abbreviations are |
|
74
|
|
|
|
|
|
|
allowed. The argument-keys are expected to have values matching those |
|
75
|
|
|
|
|
|
|
required for that argument -- whether that be a boolean, a string, a |
|
76
|
|
|
|
|
|
|
reference to an array or a reference to a hash. These will replace any |
|
77
|
|
|
|
|
|
|
value for that argument that might have been there before. |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=over |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=item append_file |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
append_file=>I |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
If you want something appended by default, put the filename here. |
|
86
|
|
|
|
|
|
|
The appended text will not be processed at all, so make sure it's |
|
87
|
|
|
|
|
|
|
plain text or correct HTML. i.e. do not have things like: |
|
88
|
|
|
|
|
|
|
Mary Andersen Ekitty@example.comE |
|
89
|
|
|
|
|
|
|
but instead, have: |
|
90
|
|
|
|
|
|
|
Mary Andersen <kitty@example.com> |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
(default: nothing) |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item append_head |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
append_head=>I |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
If you want something appended to the head by default, put the filename here. |
|
99
|
|
|
|
|
|
|
The appended text will not be processed at all, so make sure it's |
|
100
|
|
|
|
|
|
|
plain text or correct HTML. i.e. do not have things like: |
|
101
|
|
|
|
|
|
|
Mary Andersen Ekitty@example.comE |
|
102
|
|
|
|
|
|
|
but instead, have: |
|
103
|
|
|
|
|
|
|
Mary Andersen <kitty@example.com> |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
(default: nothing) |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=item body_deco |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
body_deco=>I |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Body decoration string: a string to be added to the BODY tag so that |
|
112
|
|
|
|
|
|
|
one can set attributes to the BODY (such as class, style, bgcolor etc) |
|
113
|
|
|
|
|
|
|
For example, "class='withimage'". |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=item bold_delimiter |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
bold_delimiter=>I |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
This defines what character (or string) is taken to be the delimiter of |
|
120
|
|
|
|
|
|
|
text which is to be interpreted as bold (that is, to be given a STRONG |
|
121
|
|
|
|
|
|
|
tag). If this is empty, then no bolding of text will be done. |
|
122
|
|
|
|
|
|
|
(default: #) |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item bullets |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
bullets=>I |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
This defines what single characters are taken to be "bullet" characters |
|
129
|
|
|
|
|
|
|
for unordered lists. Note that because this is used as a character |
|
130
|
|
|
|
|
|
|
class, if you use '-' it must come first. |
|
131
|
|
|
|
|
|
|
(default:-=o*\267) |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=item bullets_ordered |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
bullets_ordered=>I |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
This defines what single characters are taken to be "bullet" placeholder |
|
138
|
|
|
|
|
|
|
characters for ordered lists. Ordered lists are normally marked by |
|
139
|
|
|
|
|
|
|
a number or letter followed by '.' or ')' or ']' or ':'. If an ordered |
|
140
|
|
|
|
|
|
|
bullet is used, then it simply indicates that this is an ordered list, |
|
141
|
|
|
|
|
|
|
without giving explicit numbers. |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Note that because this is used as a character class, if you use '-' it |
|
144
|
|
|
|
|
|
|
must come first. |
|
145
|
|
|
|
|
|
|
(default:nothing) |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=item caps_tag |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
caps_tag=>I |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Tag to put around all-caps lines |
|
152
|
|
|
|
|
|
|
(default: STRONG) |
|
153
|
|
|
|
|
|
|
If an empty tag is given, then no tag will be put around all-caps lines. |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=item custom_heading_regexp |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
custom_heading_regexp=>\@custom_headings |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Add patterns for headings. Header levels are assigned by regexp in the |
|
160
|
|
|
|
|
|
|
order seen in the input text. When a line matches a custom header |
|
161
|
|
|
|
|
|
|
regexp, it is tagged as a header. If it's the first time that |
|
162
|
|
|
|
|
|
|
particular regexp has matched, the next available header level is |
|
163
|
|
|
|
|
|
|
associated with it and applied to the line. Any later matches of that |
|
164
|
|
|
|
|
|
|
regexp will use the same header level. Therefore, if you want to match |
|
165
|
|
|
|
|
|
|
numbered header lines, you could use something like this: |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
my @custom_headings = ('^ *\d+\. \w+', |
|
168
|
|
|
|
|
|
|
'^ *\d+\.\d+\. \w+', |
|
169
|
|
|
|
|
|
|
'^ *\d+\.\d+\.\d+\. \w+'); |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
... |
|
172
|
|
|
|
|
|
|
custom_heading_regexp=>\@custom_headings, |
|
173
|
|
|
|
|
|
|
... |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Then lines like |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
" 1. Examples " |
|
178
|
|
|
|
|
|
|
" 1.1. Things" |
|
179
|
|
|
|
|
|
|
and " 4.2.5. Cold Fusion" |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
Would be marked as H1, H2, and H3 (assuming they were found in that |
|
182
|
|
|
|
|
|
|
order, and that no other header styles were encountered). |
|
183
|
|
|
|
|
|
|
If you prefer that the first one specified always be H1, the second |
|
184
|
|
|
|
|
|
|
always be H2, the third H3, etc, then use the "explicit_headings" |
|
185
|
|
|
|
|
|
|
option. |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
This expects a reference to an array of strings. |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
(default: none) |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=item default_link_dict |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
default_link_dict=>I |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
The name of the default "user" link dictionary. |
|
196
|
|
|
|
|
|
|
(default: "$ENV{'HOME'}/.txt2html.dict" -- this is the same as for |
|
197
|
|
|
|
|
|
|
the txt2html script. If there is no $ENV{HOME} then it is just '.txt2html.dict') |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=item demoronize |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
demoronize=>1 |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Convert Microsoft-generated character codes that are non-ISO codes into |
|
204
|
|
|
|
|
|
|
something more reasonable. |
|
205
|
|
|
|
|
|
|
(default:true) |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=item doctype |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
doctype=>I |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
This gets put in the DOCTYPE field at the top of the document, unless it's |
|
212
|
|
|
|
|
|
|
empty. |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
Default : |
|
215
|
|
|
|
|
|
|
'-//W3C//DTD HTML 4.01//EN" |
|
216
|
|
|
|
|
|
|
"http://www.w3.org/TR/html4/strict.dtd' |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
If B is true, the contents of this is ignored, unless it's |
|
219
|
|
|
|
|
|
|
empty, in which case no DOCTYPE declaration is output. |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=item eight_bit_clean |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
eight_bit_clean=>1 |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
If false, convert Latin-1 characters to HTML entities. |
|
226
|
|
|
|
|
|
|
If true, this conversion is disabled; also "demoronize" is set to |
|
227
|
|
|
|
|
|
|
false, since this also changes 8-bit characters. |
|
228
|
|
|
|
|
|
|
(default: false) |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=item escape_HTML_chars |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
escape_HTML_chars=>1 |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
turn & E E into & > < |
|
235
|
|
|
|
|
|
|
(default: true) |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=item explicit_headings |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
explicit_headings=>1 |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
Don't try to find any headings except the ones specified in the |
|
242
|
|
|
|
|
|
|
--custom_heading_regexp option. |
|
243
|
|
|
|
|
|
|
Also, the custom headings will not be assigned levels in the order they |
|
244
|
|
|
|
|
|
|
are encountered in the document, but in the order they are specified on |
|
245
|
|
|
|
|
|
|
the custom_heading_regexp option. |
|
246
|
|
|
|
|
|
|
(default: false) |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=item extract |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
extract=>1 |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
Extract Mode; don't put HTML headers or footers on the result, just |
|
253
|
|
|
|
|
|
|
the plain HTML (thus making the result suitable for inserting into |
|
254
|
|
|
|
|
|
|
another document (or as part of the output of a CGI script). |
|
255
|
|
|
|
|
|
|
(default: false) |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=item hrule_min |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
hrule_min=>I |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
Min number of ---s for an HRule. |
|
262
|
|
|
|
|
|
|
(default: 4) |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=item indent_width |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
indent_width=>I |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
Indents this many spaces for each level of a list. |
|
269
|
|
|
|
|
|
|
(default: 2) |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=item indent_par_break |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
indent_par_break=>1 |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
Treat paragraphs marked solely by indents as breaks with indents. |
|
276
|
|
|
|
|
|
|
That is, instead of taking a three-space indent as a new paragraph, |
|
277
|
|
|
|
|
|
|
put in a and three non-breaking spaces instead. |
|
278
|
|
|
|
|
|
|
(see also --preserve_indent) |
|
279
|
|
|
|
|
|
|
(default: false) |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=item infile |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
infile=>\@my_files |
|
284
|
|
|
|
|
|
|
infile=>['chapter1.txt', 'chapter2.txt'] |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
The name of the input file(s). |
|
287
|
|
|
|
|
|
|
This expects a reference to an array of filenames. |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
The special filename '-' designates STDIN. |
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
See also L and L. |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
(default:-) |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=item inhandle |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
inhandle=>\@my_handles |
|
298
|
|
|
|
|
|
|
inhandle=>[\*MYINHANDLE, \*STDIN] |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
An array of input filehandles; use this instead of |
|
301
|
|
|
|
|
|
|
L or L to use a filehandle or filehandles |
|
302
|
|
|
|
|
|
|
as input. |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=item instring |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
instring=>\@my_strings |
|
307
|
|
|
|
|
|
|
instring=>[$string1, $string2] |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
An array of input strings; use this instead of |
|
310
|
|
|
|
|
|
|
L or L to use a string or strings |
|
311
|
|
|
|
|
|
|
as input. |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=item italic_delimiter |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
italic_delimiter=>I |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
This defines what character (or string) is taken to be the delimiter of |
|
318
|
|
|
|
|
|
|
text which is to be interpreted as italic (that is, to be given a EM |
|
319
|
|
|
|
|
|
|
tag). If this is empty, no italicising of text will be done. |
|
320
|
|
|
|
|
|
|
(default: *) |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=item underline_delimiter |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
underline_delimiter=>I |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
This defines what character (or string) is taken to be the delimiter of |
|
327
|
|
|
|
|
|
|
text which is to be interpreted as underlined (that is, to be given a U |
|
328
|
|
|
|
|
|
|
tag). If this is empty, no underlining of text will be done. |
|
329
|
|
|
|
|
|
|
(default: _) |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=item links_dictionaries |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
links_dictionaries=>\@my_link_dicts |
|
334
|
|
|
|
|
|
|
links_dictionaries=>['url_links.dict', 'format_links.dict'] |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
File(s) to use as a link-dictionary. There can be more than one of |
|
337
|
|
|
|
|
|
|
these. These are in addition to the Global Link Dictionary and the User |
|
338
|
|
|
|
|
|
|
Link Dictionary. This expects a reference to an array of filenames. |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=item link_only |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
link_only=>1 |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
Do no escaping or marking up at all, except for processing the links |
|
345
|
|
|
|
|
|
|
dictionary file and applying it. This is useful if you want to use |
|
346
|
|
|
|
|
|
|
the linking feature on an HTML document. If the HTML is a |
|
347
|
|
|
|
|
|
|
complete document (includes HTML,HEAD,BODY tags, etc) then you'll |
|
348
|
|
|
|
|
|
|
probably want to use the --extract option also. |
|
349
|
|
|
|
|
|
|
(default: false) |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=item lower_case_tags |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
lower_case_tags=>1 |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
Force all tags to be in lower-case. |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=item mailmode |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
mailmode=>1 |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
Deal with mail headers & quoted text. The mail header paragraph is |
|
362
|
|
|
|
|
|
|
given the class 'mail_header', and mail-quoted text is given the class |
|
363
|
|
|
|
|
|
|
'quote_mail'. |
|
364
|
|
|
|
|
|
|
(default: false) |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=item make_anchors |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
make_anchors=>0 |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Should we try to make anchors in headings? |
|
371
|
|
|
|
|
|
|
(default: true) |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=item make_links |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
make_links=>0 |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
Should we try to build links? If this is false, then the links |
|
378
|
|
|
|
|
|
|
dictionaries are not consulted and only structural text-to-HTML |
|
379
|
|
|
|
|
|
|
conversion is done. (default: true) |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=item make_tables |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
make_tables=>1 |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Should we try to build tables? If true, spots tables and marks them up |
|
386
|
|
|
|
|
|
|
appropriately. See L for information on how tables |
|
387
|
|
|
|
|
|
|
should be formatted. |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
This overrides the detection of lists; if something looks like a table, |
|
390
|
|
|
|
|
|
|
it is taken as a table, and list-checking is not done for that |
|
391
|
|
|
|
|
|
|
paragraph. |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
(default: false) |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=item min_caps_length |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
min_caps_length=>I |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
min sequential CAPS for an all-caps line |
|
400
|
|
|
|
|
|
|
(default: 3) |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=item outfile |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
outfile=>I |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
The name of the output file. If it is "-" then the output goes |
|
407
|
|
|
|
|
|
|
to Standard Output. |
|
408
|
|
|
|
|
|
|
(default: - ) |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=item outhandle |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
The output filehandle; if this is given then the output goes |
|
413
|
|
|
|
|
|
|
to this filehandle instead of to the file given in L. |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=item par_indent |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
par_indent=>I |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Minumum number of spaces indented in first lines of paragraphs. |
|
420
|
|
|
|
|
|
|
Only used when there's no blank line |
|
421
|
|
|
|
|
|
|
preceding the new paragraph. |
|
422
|
|
|
|
|
|
|
(default: 2) |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=item preformat_trigger_lines |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
preformat_trigger_lines=>I |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
How many lines of preformatted-looking text are needed to switch to |
|
429
|
|
|
|
|
|
|
<= 0 : Preformat entire document |
|
430
|
|
|
|
|
|
|
1 : one line triggers |
|
431
|
|
|
|
|
|
|
>= 2 : two lines trigger |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
(default: 2) |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=item endpreformat_trigger_lines |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
endpreformat_trigger_lines=>I |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
How many lines of unpreformatted-looking text are needed to switch from |
|
440
|
|
|
|
|
|
|
<= 0 : Never preformat within document |
|
441
|
|
|
|
|
|
|
1 : one line triggers |
|
442
|
|
|
|
|
|
|
>= 2 : two lines trigger |
|
443
|
|
|
|
|
|
|
(default: 2) |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
NOTE for preformat_trigger_lines and endpreformat_trigger_lines: |
|
446
|
|
|
|
|
|
|
A zero takes precedence. If one is zero, the other is ignored. |
|
447
|
|
|
|
|
|
|
If both are zero, entire document is preformatted. |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=item preformat_start_marker |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
preformat_start_marker=>I |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
What flags the start of a preformatted section if --use_preformat_marker |
|
454
|
|
|
|
|
|
|
is true. |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
(default: "^(:?(:?<)|<)PRE(:?(:?>)|>)\$") |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
=item preformat_end_marker |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
preformat_end_marker=>I |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
What flags the end of a preformatted section if --use_preformat_marker |
|
463
|
|
|
|
|
|
|
is true. |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
(default: "^(:?(:?<)|<)/PRE(:?(:?>)|>)\$") |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=item preformat_whitespace_min |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
preformat_whitespace_min=>I |
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
Minimum number of consecutive whitespace characters to trigger |
|
472
|
|
|
|
|
|
|
normal preformatting. |
|
473
|
|
|
|
|
|
|
NOTE: Tabs are expanded to spaces before this check is made. |
|
474
|
|
|
|
|
|
|
That means if B is 8 and this is 5, then one tab may be |
|
475
|
|
|
|
|
|
|
expanded to 8 spaces, which is enough to trigger preformatting. |
|
476
|
|
|
|
|
|
|
(default: 5) |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=item prepend_file |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
prepend_file=>I |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
If you want something prepended to the processed body text, put the |
|
483
|
|
|
|
|
|
|
filename here. The prepended text will not be processed at all, so make |
|
484
|
|
|
|
|
|
|
sure it's plain text or correct HTML. |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
(default: nothing) |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=item preserve_indent |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
preserve_indent=>1 |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
Preserve the first-line indentation of paragraphs marked with indents |
|
493
|
|
|
|
|
|
|
by replacing the spaces of the first line with non-breaking spaces. |
|
494
|
|
|
|
|
|
|
(default: false) |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
=item short_line_length |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
short_line_length=>I |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Lines this short (or shorter) must be intentionally broken and are kept |
|
501
|
|
|
|
|
|
|
that short. |
|
502
|
|
|
|
|
|
|
(default: 40) |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=item style_url |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
style_url=>I |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
This gives the URL of a stylesheet; a LINK tag will be added to the |
|
509
|
|
|
|
|
|
|
output. |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
=item tab_width |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
tab_width=>I |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
How many spaces equal a tab? |
|
516
|
|
|
|
|
|
|
(default: 8) |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=item table_type |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
table_type=>{ ALIGN=>0, PGSQL=>0, BORDER=>1, DELIM=>0 } |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
This determines which types of tables will be recognised when "make_tables" |
|
523
|
|
|
|
|
|
|
is true. The possible types are ALIGN, PGSQL, BORDER and DELIM. |
|
524
|
|
|
|
|
|
|
(default: all types are true) |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=item title |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
title=>I |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
You can specify a title. Otherwise it will use a blank one. |
|
531
|
|
|
|
|
|
|
(default: nothing) |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=item titlefirst |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
titlefirst=>1 |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Use the first non-blank line as the title. (See also "title") |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=item underline_length_tolerance |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
underline_length_tolerance=>I |
|
542
|
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
How much longer or shorter can underlines be and still be underlines? |
|
544
|
|
|
|
|
|
|
(default: 1) |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=item underline_offset_tolerance |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
underline_offset_tolerance=>I |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
How far offset can underlines be and still be underlines? |
|
551
|
|
|
|
|
|
|
(default: 1) |
|
552
|
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=item unhyphenation |
|
554
|
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
unhyphenation=>0 |
|
556
|
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
Enables unhyphenation of text. |
|
558
|
|
|
|
|
|
|
(default: true) |
|
559
|
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=item use_mosaic_header |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
use_mosaic_header=>1 |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
Use this option if you want to force the heading styles to match what Mosaic |
|
565
|
|
|
|
|
|
|
outputs. (Underlined with "***"s is H1, |
|
566
|
|
|
|
|
|
|
with "==="s is H2, with "+++" is H3, with "---" is H4, with "~~~" is H5 |
|
567
|
|
|
|
|
|
|
and with "..." is H6) |
|
568
|
|
|
|
|
|
|
This was the behavior of txt2html up to version 1.10. |
|
569
|
|
|
|
|
|
|
(default: false) |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=item use_preformat_marker |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
use_preformat_marker=>1 |
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Turn on preformatting when encountering "" on a line by itself, and turn |
|
576
|
|
|
|
|
|
|
it off when there's a line containing only "". |
|
577
|
|
|
|
|
|
|
When such preformatted text is detected, the PRE tag will be given the |
|
578
|
|
|
|
|
|
|
class 'quote_explicit'. |
|
579
|
|
|
|
|
|
|
(default: off) |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=item xhtml |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
xhtml=>1 |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
Try to make the output conform to the XHTML standard, including |
|
586
|
|
|
|
|
|
|
closing all open tags and marking empty tags correctly. This |
|
587
|
|
|
|
|
|
|
turns on --lower_case_tags and overrides the --doctype option. |
|
588
|
|
|
|
|
|
|
Note that if you add a header or a footer file, it is up to you |
|
589
|
|
|
|
|
|
|
to make it conform; the header/footer isn't touched by this. |
|
590
|
|
|
|
|
|
|
Likewise, if you make link-dictionary entries that break XHTML, |
|
591
|
|
|
|
|
|
|
then this won't fix them, except to the degree of putting all tags |
|
592
|
|
|
|
|
|
|
into lower-case. |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
(default: true) |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=back |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=head1 DEBUGGING |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
There are global variables for setting types and levels |
|
601
|
|
|
|
|
|
|
of debugging. These should only be used by developers. |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=over |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
=item $HTML::TextToHTML::Debug |
|
606
|
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
$HTML::TextToHTML::Debug = 1; |
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
Enable copious debugging output. |
|
610
|
|
|
|
|
|
|
(default: false) |
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=item $HTML::TextToHTML::DictDebug |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
$HTML::TextToHTML::DictDebug = I; |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
Debug mode for link dictionaries. Bitwise-Or what you want to see: |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
1: The parsing of the dictionary |
|
619
|
|
|
|
|
|
|
2: The code that will make the links |
|
620
|
|
|
|
|
|
|
4: When each rule matches something |
|
621
|
|
|
|
|
|
|
8: When each tag is created |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
(default: 0) |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
=back |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=cut |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
our $Debug = 0; |
|
630
|
|
|
|
|
|
|
our $DictDebug = 0; |
|
631
|
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
=head1 METHODS |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=cut |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
#------------------------------------------------------------------------ |
|
637
|
6
|
|
|
6
|
|
5318
|
use YAML::Syck; |
|
|
6
|
|
|
|
|
22693
|
|
|
|
6
|
|
|
|
|
2434
|
|
|
638
|
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
our $PROG = 'HTML::TextToHTML'; |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
#------------------------------------------------------------------------ |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
######################################## |
|
644
|
|
|
|
|
|
|
# Definitions (Don't change these) |
|
645
|
|
|
|
|
|
|
# |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
# These are just constants I use for making bit vectors to keep track |
|
648
|
|
|
|
|
|
|
# of what modes I'm in and what actions I've taken on the current and |
|
649
|
|
|
|
|
|
|
# previous lines. |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
our $NONE = 0; |
|
652
|
|
|
|
|
|
|
our $LIST = 1; |
|
653
|
|
|
|
|
|
|
our $HRULE = 2; |
|
654
|
|
|
|
|
|
|
our $PAR = 4; |
|
655
|
|
|
|
|
|
|
our $PRE = 8; |
|
656
|
|
|
|
|
|
|
our $END = 16; |
|
657
|
|
|
|
|
|
|
our $BREAK = 32; |
|
658
|
|
|
|
|
|
|
our $HEADER = 64; |
|
659
|
|
|
|
|
|
|
our $MAILHEADER = 128; |
|
660
|
|
|
|
|
|
|
our $MAILQUOTE = 256; |
|
661
|
|
|
|
|
|
|
our $CAPS = 512; |
|
662
|
|
|
|
|
|
|
our $LINK = 1024; |
|
663
|
|
|
|
|
|
|
our $PRE_EXPLICIT = 2048; |
|
664
|
|
|
|
|
|
|
our $TABLE = 4096; |
|
665
|
|
|
|
|
|
|
our $IND_BREAK = 8192; |
|
666
|
|
|
|
|
|
|
our $LIST_START = 16384; |
|
667
|
|
|
|
|
|
|
our $LIST_ITEM = 32768; |
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
# Constants for Link-processing |
|
670
|
|
|
|
|
|
|
# bit-vectors for what to do with a particular link-dictionary entry |
|
671
|
|
|
|
|
|
|
our $LINK_NOCASE = 1; |
|
672
|
|
|
|
|
|
|
our $LINK_EVAL = 2; |
|
673
|
|
|
|
|
|
|
our $LINK_HTML = 4; |
|
674
|
|
|
|
|
|
|
our $LINK_ONCE = 8; |
|
675
|
|
|
|
|
|
|
our $LINK_SECT_ONCE = 16; |
|
676
|
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
# Constants for Ordered Lists and Unordered Lists. |
|
678
|
|
|
|
|
|
|
# And Definition Lists. |
|
679
|
|
|
|
|
|
|
# I use this in the list stack to keep track of what's what. |
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
our $OL = 1; |
|
682
|
|
|
|
|
|
|
our $UL = 2; |
|
683
|
|
|
|
|
|
|
our $DL = 3; |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
# Constants for table types |
|
686
|
|
|
|
|
|
|
our $TAB_ALIGN = 1; |
|
687
|
|
|
|
|
|
|
our $TAB_PGSQL = 2; |
|
688
|
|
|
|
|
|
|
our $TAB_BORDER = 3; |
|
689
|
|
|
|
|
|
|
our $TAB_DELIM = 4; |
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
# Constants for tags |
|
692
|
|
|
|
|
|
|
use constant { |
|
693
|
6
|
|
|
|
|
174798
|
TAG_START => 1, |
|
694
|
|
|
|
|
|
|
TAG_END => 2, |
|
695
|
|
|
|
|
|
|
TAG_EMPTY => 3, |
|
696
|
6
|
|
|
6
|
|
51
|
}; |
|
|
6
|
|
|
|
|
10
|
|
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
# Character entity names |
|
699
|
|
|
|
|
|
|
# characters to replace with entities |
|
700
|
|
|
|
|
|
|
our %char_entities = ( |
|
701
|
|
|
|
|
|
|
"\241", "¡", "\242", "¢", "\243", "£", |
|
702
|
|
|
|
|
|
|
"\244", "¤", "\245", "¥", "\246", "¦", |
|
703
|
|
|
|
|
|
|
"\247", "§", "\250", "¨", "\251", "©", |
|
704
|
|
|
|
|
|
|
"\252", "ª", "\253", "«", "\254", "¬", |
|
705
|
|
|
|
|
|
|
"\255", "", "\256", "®", "\257", "&hibar;", |
|
706
|
|
|
|
|
|
|
"\260", "°", "\261", "±", "\262", "²", |
|
707
|
|
|
|
|
|
|
"\263", "³", "\264", "´", "\265", "µ", |
|
708
|
|
|
|
|
|
|
"\266", "¶", "\270", "¸", "\271", "¹", |
|
709
|
|
|
|
|
|
|
"\272", "º", "\273", "»", "\274", "¼", |
|
710
|
|
|
|
|
|
|
"\275", "½", "\276", "¾", "\277", "¿", |
|
711
|
|
|
|
|
|
|
"\300", "À", "\301", "Á", "\302", "Â", |
|
712
|
|
|
|
|
|
|
"\303", "Ã", "\304", "Ä", "\305", "Å", |
|
713
|
|
|
|
|
|
|
"\306", "Æ", "\307", "Ç", "\310", "È", |
|
714
|
|
|
|
|
|
|
"\311", "É", "\312", "Ê", "\313", "Ë", |
|
715
|
|
|
|
|
|
|
"\314", "Ì", "\315", "Í", "\316", "Î", |
|
716
|
|
|
|
|
|
|
"\317", "Ï", "\320", "Ð", "\321", "Ñ", |
|
717
|
|
|
|
|
|
|
"\322", "Ò", "\323", "Ó", "\324", "Ô", |
|
718
|
|
|
|
|
|
|
"\325", "Õ", "\326", "Ö", "\327", "×", |
|
719
|
|
|
|
|
|
|
"\330", "Ø", "\331", "Ù", "\332", "Ú", |
|
720
|
|
|
|
|
|
|
"\333", "Û", "\334", "Ü", "\335", "Ý", |
|
721
|
|
|
|
|
|
|
"\336", "Þ", "\337", "ß", "\340", "à", |
|
722
|
|
|
|
|
|
|
"\341", "á", "\342", "â", "\343", "ã", |
|
723
|
|
|
|
|
|
|
"\344", "ä", "\345", "å", "\346", "æ", |
|
724
|
|
|
|
|
|
|
"\347", "ç", "\350", "è", "\351", "é", |
|
725
|
|
|
|
|
|
|
"\352", "ê", "\353", "ë", "\354", "ì", |
|
726
|
|
|
|
|
|
|
"\355", "í", "\356", "î", "\357", "ï", |
|
727
|
|
|
|
|
|
|
"\360", "ð", "\361", "ñ", "\362", "ò", |
|
728
|
|
|
|
|
|
|
"\363", "ó", "\364", "ô", "\365", "õ", |
|
729
|
|
|
|
|
|
|
"\366", "ö", "\367", "÷", "\370", "ø", |
|
730
|
|
|
|
|
|
|
"\371", "ù", "\372", "ú", "\373", "û", |
|
731
|
|
|
|
|
|
|
"\374", "ü", "\375", "ý", "\376", "þ", |
|
732
|
|
|
|
|
|
|
"\377", "ÿ", "\267", "·", |
|
733
|
|
|
|
|
|
|
); |
|
734
|
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
# alignments for tables |
|
736
|
|
|
|
|
|
|
our @alignments = ('', '', ' ALIGN="RIGHT"', ' ALIGN="CENTER"'); |
|
737
|
|
|
|
|
|
|
our @lc_alignments = ('', '', ' align="right"', ' align="center"'); |
|
738
|
|
|
|
|
|
|
our @xhtml_alignments = |
|
739
|
|
|
|
|
|
|
('', '', ' style="text-align: right;"', ' style="text-align: center;"'); |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
#---------------------------------------------------------------# |
|
742
|
|
|
|
|
|
|
# Object interface |
|
743
|
|
|
|
|
|
|
#---------------------------------------------------------------# |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
=head2 new |
|
746
|
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
$conv = new HTML::TextToHTML() |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
$conv = new HTML::TextToHTML(titlefirst=>1, |
|
750
|
|
|
|
|
|
|
... |
|
751
|
|
|
|
|
|
|
); |
|
752
|
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
Create a new object with new. If arguments are given, these arguments |
|
754
|
|
|
|
|
|
|
will be used in invocations of other methods. |
|
755
|
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
See L for the possible values of the arguments. |
|
757
|
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=cut |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
sub new |
|
761
|
|
|
|
|
|
|
{ |
|
762
|
26
|
|
|
26
|
1
|
39005
|
my $invocant = shift; |
|
763
|
26
|
|
|
|
|
74
|
my $self = {}; |
|
764
|
|
|
|
|
|
|
|
|
765
|
26
|
|
33
|
|
|
205
|
my $class = ref($invocant) || $invocant; # Object or class name |
|
766
|
26
|
|
|
|
|
113
|
init_our_data($self); |
|
767
|
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
# bless self |
|
769
|
26
|
|
|
|
|
70
|
bless($self, $class); |
|
770
|
|
|
|
|
|
|
|
|
771
|
26
|
|
|
|
|
118
|
$self->args(@_); |
|
772
|
|
|
|
|
|
|
|
|
773
|
26
|
|
|
|
|
93
|
return $self; |
|
774
|
|
|
|
|
|
|
} # new |
|
775
|
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=head2 args |
|
777
|
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
$conv->args(short_line_length=>60, |
|
779
|
|
|
|
|
|
|
titlefirst=>1, |
|
780
|
|
|
|
|
|
|
.... |
|
781
|
|
|
|
|
|
|
); |
|
782
|
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
Updates the current arguments/options of the HTML::TextToHTML object. |
|
784
|
|
|
|
|
|
|
Takes hash of arguments, which will be used in invocations of other |
|
785
|
|
|
|
|
|
|
methods. |
|
786
|
|
|
|
|
|
|
See L for the possible values of the arguments. |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
=cut |
|
789
|
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
sub args |
|
791
|
|
|
|
|
|
|
{ |
|
792
|
96
|
|
|
96
|
1
|
4877
|
my $self = shift; |
|
793
|
96
|
|
|
|
|
461
|
my %args = @_; |
|
794
|
|
|
|
|
|
|
|
|
795
|
96
|
100
|
|
|
|
281
|
if (%args) |
|
796
|
|
|
|
|
|
|
{ |
|
797
|
71
|
50
|
|
|
|
187
|
if ($Debug) |
|
798
|
|
|
|
|
|
|
{ |
|
799
|
0
|
|
|
|
|
0
|
print STDERR "========args(hash)========\n"; |
|
800
|
0
|
|
|
|
|
0
|
print STDERR Dump(%args); |
|
801
|
|
|
|
|
|
|
} |
|
802
|
71
|
|
|
|
|
96
|
my $arg; |
|
803
|
|
|
|
|
|
|
my $val; |
|
804
|
71
|
|
|
|
|
308
|
while (($arg, $val) = each %args) |
|
805
|
|
|
|
|
|
|
{ |
|
806
|
281
|
50
|
|
|
|
673
|
if (defined $val) |
|
807
|
|
|
|
|
|
|
{ |
|
808
|
281
|
50
|
|
|
|
595
|
if ($arg =~ /^-/) |
|
809
|
|
|
|
|
|
|
{ |
|
810
|
0
|
|
|
|
|
0
|
$arg =~ s/^-//; # get rid of first dash |
|
811
|
0
|
|
|
|
|
0
|
$arg =~ s/^-//; # get rid of possible second dash |
|
812
|
|
|
|
|
|
|
} |
|
813
|
281
|
50
|
|
|
|
632
|
if ($Debug) |
|
814
|
|
|
|
|
|
|
{ |
|
815
|
0
|
|
|
|
|
0
|
print STDERR "--", $arg; |
|
816
|
|
|
|
|
|
|
} |
|
817
|
281
|
|
|
|
|
491
|
$self->{$arg} = $val; |
|
818
|
281
|
50
|
|
|
|
1053
|
if ($Debug) |
|
819
|
|
|
|
|
|
|
{ |
|
820
|
0
|
|
|
|
|
0
|
print STDERR " ", $val, "\n"; |
|
821
|
|
|
|
|
|
|
} |
|
822
|
|
|
|
|
|
|
} |
|
823
|
|
|
|
|
|
|
} |
|
824
|
|
|
|
|
|
|
} |
|
825
|
96
|
|
|
|
|
290
|
$self->deal_with_options(); |
|
826
|
96
|
50
|
|
|
|
225
|
if ($Debug) |
|
827
|
|
|
|
|
|
|
{ |
|
828
|
0
|
|
|
|
|
0
|
print STDERR Dump($self); |
|
829
|
|
|
|
|
|
|
} |
|
830
|
|
|
|
|
|
|
|
|
831
|
96
|
|
|
|
|
313
|
return 1; |
|
832
|
|
|
|
|
|
|
} # args |
|
833
|
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
=head2 process_chunk |
|
835
|
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
$newstring = $conv->process_chunk($mystring); |
|
837
|
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
Convert a string to a HTML fragment. This assumes that this string is |
|
839
|
|
|
|
|
|
|
at the least, a single paragraph, but it can contain more than that. |
|
840
|
|
|
|
|
|
|
This returns the processed string. If you want to pass arguments to |
|
841
|
|
|
|
|
|
|
alter the behaviour of this conversion, you need to do that earlier, |
|
842
|
|
|
|
|
|
|
either when you create the object, or with the L method. |
|
843
|
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
$newstring = $conv->process_chunk($mystring, |
|
845
|
|
|
|
|
|
|
close_tags=>0); |
|
846
|
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
If there are open tags (such as lists) in the input string, |
|
848
|
|
|
|
|
|
|
process_chunk will automatically close them, unless you specify not |
|
849
|
|
|
|
|
|
|
to, with the close_tags option. |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
$newstring = $conv->process_chunk($mystring, |
|
852
|
|
|
|
|
|
|
is_fragment=>1); |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
If you want this string to be treated as a fragment, and not assumed to |
|
855
|
|
|
|
|
|
|
be a paragraph, set is_fragment to true. If there is more than one |
|
856
|
|
|
|
|
|
|
paragraph in the string (ie it contains blank lines) then this option |
|
857
|
|
|
|
|
|
|
will be ignored. |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
=cut |
|
860
|
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
sub process_chunk ($$;%) |
|
862
|
|
|
|
|
|
|
{ |
|
863
|
345
|
|
|
345
|
1
|
5661
|
my $self = shift; |
|
864
|
345
|
|
|
|
|
750
|
my $chunk = shift; |
|
865
|
345
|
|
|
|
|
1219
|
my %args = ( |
|
866
|
|
|
|
|
|
|
close_tags => 1, |
|
867
|
|
|
|
|
|
|
is_fragment => 0, |
|
868
|
|
|
|
|
|
|
@_ |
|
869
|
|
|
|
|
|
|
); |
|
870
|
|
|
|
|
|
|
|
|
871
|
345
|
|
|
|
|
639
|
my $ret_str = ''; |
|
872
|
345
|
|
|
|
|
2347
|
my @paras = split(/\r?\n\r?\n/, $chunk); |
|
873
|
345
|
|
|
|
|
486
|
my $ind = 0; |
|
874
|
345
|
100
|
|
|
|
943
|
if (@paras == 1) # just one paragraph |
|
875
|
|
|
|
|
|
|
{ |
|
876
|
342
|
|
|
|
|
1240
|
$ret_str .= $self->process_para( |
|
877
|
|
|
|
|
|
|
$chunk, |
|
878
|
|
|
|
|
|
|
close_tags => $args{close_tags}, |
|
879
|
|
|
|
|
|
|
is_fragment => $args{is_fragment} |
|
880
|
|
|
|
|
|
|
); |
|
881
|
|
|
|
|
|
|
} |
|
882
|
|
|
|
|
|
|
else |
|
883
|
|
|
|
|
|
|
{ |
|
884
|
3
|
|
|
|
|
9
|
my $ind = 0; |
|
885
|
3
|
|
|
|
|
8
|
foreach my $para (@paras) |
|
886
|
|
|
|
|
|
|
{ |
|
887
|
|
|
|
|
|
|
# if the paragraph doesn't end with a newline, add one |
|
888
|
96
|
100
|
|
|
|
395
|
$para .= "\n" if ($para !~ /\n$/); |
|
889
|
96
|
100
|
|
|
|
330
|
if ($ind == @paras - 1) # last one |
|
890
|
|
|
|
|
|
|
{ |
|
891
|
2
|
|
|
|
|
14
|
$ret_str .= $self->process_para( |
|
892
|
|
|
|
|
|
|
$para, |
|
893
|
|
|
|
|
|
|
close_tags => $args{close_tags}, |
|
894
|
|
|
|
|
|
|
is_fragment => 0 |
|
895
|
|
|
|
|
|
|
); |
|
896
|
|
|
|
|
|
|
} |
|
897
|
|
|
|
|
|
|
else |
|
898
|
|
|
|
|
|
|
{ |
|
899
|
94
|
|
|
|
|
375
|
$ret_str .= $self->process_para( |
|
900
|
|
|
|
|
|
|
$para, |
|
901
|
|
|
|
|
|
|
close_tags => 0, |
|
902
|
|
|
|
|
|
|
is_fragment => 0 |
|
903
|
|
|
|
|
|
|
); |
|
904
|
|
|
|
|
|
|
} |
|
905
|
96
|
|
|
|
|
299
|
$ind++; |
|
906
|
|
|
|
|
|
|
} |
|
907
|
|
|
|
|
|
|
} |
|
908
|
345
|
|
|
|
|
1418
|
$ret_str; |
|
909
|
|
|
|
|
|
|
} # process_chunk |
|
910
|
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=head2 process_para |
|
912
|
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
$newstring = $conv->process_para($mystring); |
|
914
|
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
Convert a string to a HTML fragment. This assumes that this string is |
|
916
|
|
|
|
|
|
|
at the most a single paragraph, with no blank lines in it. If you don't |
|
917
|
|
|
|
|
|
|
know whether your string will contain blank lines or not, use the |
|
918
|
|
|
|
|
|
|
L method instead. |
|
919
|
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
This returns the processed string. If you want to pass arguments to |
|
921
|
|
|
|
|
|
|
alter the behaviour of this conversion, you need to do that earlier, |
|
922
|
|
|
|
|
|
|
either when you create the object, or with the L method. |
|
923
|
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
$newstring = $conv->process_para($mystring, |
|
925
|
|
|
|
|
|
|
close_tags=>0); |
|
926
|
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
If there are open tags (such as lists) in the input string, process_para |
|
928
|
|
|
|
|
|
|
will automatically close them, unless you specify not to, with the |
|
929
|
|
|
|
|
|
|
close_tags option. |
|
930
|
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
$newstring = $conv->process_para($mystring, |
|
932
|
|
|
|
|
|
|
is_fragment=>1); |
|
933
|
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
If you want this string to be treated as a fragment, and not assumed to be |
|
935
|
|
|
|
|
|
|
a paragraph, set is_fragment to true. |
|
936
|
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
=cut |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
sub process_para ($$;%) |
|
940
|
|
|
|
|
|
|
{ |
|
941
|
440
|
|
|
440
|
1
|
2573
|
my $self = shift; |
|
942
|
440
|
|
|
|
|
884
|
my $para = shift; |
|
943
|
440
|
|
|
|
|
1745
|
my %args = ( |
|
944
|
|
|
|
|
|
|
close_tags => 1, |
|
945
|
|
|
|
|
|
|
is_fragment => 0, |
|
946
|
|
|
|
|
|
|
@_ |
|
947
|
|
|
|
|
|
|
); |
|
948
|
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
# if this is an external call, do certain initializations |
|
950
|
440
|
|
|
|
|
1168
|
$self->do_init_call(); |
|
951
|
|
|
|
|
|
|
|
|
952
|
440
|
|
|
|
|
702
|
my $para_action = $NONE; |
|
953
|
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
# tables and mailheaders don't carry over from one para to the next |
|
955
|
440
|
100
|
|
|
|
1172
|
if ($self->{__mode} & $TABLE) |
|
956
|
|
|
|
|
|
|
{ |
|
957
|
6
|
|
|
|
|
16
|
$self->{__mode} ^= $TABLE; |
|
958
|
|
|
|
|
|
|
} |
|
959
|
440
|
100
|
|
|
|
1218
|
if ($self->{__mode} & $MAILHEADER) |
|
960
|
|
|
|
|
|
|
{ |
|
961
|
11
|
|
|
|
|
18
|
$self->{__mode} ^= $MAILHEADER; |
|
962
|
|
|
|
|
|
|
} |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
# convert Microsoft character codes into sensible characters |
|
965
|
440
|
50
|
|
|
|
1059
|
if ($self->{demoronize}) |
|
966
|
|
|
|
|
|
|
{ |
|
967
|
440
|
|
|
|
|
1234
|
demoronize_char($para); |
|
968
|
|
|
|
|
|
|
} |
|
969
|
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
# if we are not just linking, we are discerning structure |
|
971
|
440
|
50
|
|
|
|
1092
|
if (!$self->{link_only}) |
|
972
|
|
|
|
|
|
|
{ |
|
973
|
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
# Chop trailing whitespace and DOS CRs |
|
975
|
440
|
|
|
|
|
731
|
$para =~ s/[ \011]*\015$//; |
|
976
|
|
|
|
|
|
|
# Chop leading whitespace and DOS CRs |
|
977
|
440
|
|
|
|
|
1072
|
$para =~ s/^[ \011]*\015//; |
|
978
|
440
|
|
|
|
|
1015
|
$para =~ s/\r//g; # remove any stray carriage returns |
|
979
|
|
|
|
|
|
|
|
|
980
|
440
|
|
|
|
|
694
|
my @done_lines = (); # lines which have been processed |
|
981
|
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
# The PRE_EXPLICIT structure can carry over from one |
|
983
|
|
|
|
|
|
|
# paragraph to the next, but it is ended with the |
|
984
|
|
|
|
|
|
|
# explicit end-tag designated for it. |
|
985
|
|
|
|
|
|
|
# Therefore we can shortcut for this by checking |
|
986
|
|
|
|
|
|
|
# for the end of the PRE_EXPLICIT and chomping off |
|
987
|
|
|
|
|
|
|
# the preformatted string part of this para before |
|
988
|
|
|
|
|
|
|
# we have to split it into lines. |
|
989
|
|
|
|
|
|
|
# Note that after this check, we could *still* be |
|
990
|
|
|
|
|
|
|
# in PRE_EXPLICIT mode. |
|
991
|
440
|
50
|
|
|
|
1335
|
if ($self->{__mode} & $PRE_EXPLICIT) |
|
992
|
|
|
|
|
|
|
{ |
|
993
|
0
|
|
|
|
|
0
|
my $pre_str = |
|
994
|
|
|
|
|
|
|
$self->split_end_explicit_preformat(para_ref => \$para); |
|
995
|
0
|
0
|
|
|
|
0
|
if ($pre_str) |
|
996
|
|
|
|
|
|
|
{ |
|
997
|
0
|
|
|
|
|
0
|
push @done_lines, $pre_str; |
|
998
|
|
|
|
|
|
|
} |
|
999
|
|
|
|
|
|
|
} |
|
1000
|
|
|
|
|
|
|
|
|
1001
|
440
|
50
|
33
|
|
|
2197
|
if (defined $para && $para ne "") |
|
1002
|
|
|
|
|
|
|
{ |
|
1003
|
|
|
|
|
|
|
# |
|
1004
|
|
|
|
|
|
|
# Now we split the paragraph into lines |
|
1005
|
|
|
|
|
|
|
# |
|
1006
|
440
|
|
|
|
|
750
|
my $para_len = length($para); |
|
1007
|
440
|
|
|
|
|
1743
|
my @para_lines = split(/^/, $para); |
|
1008
|
440
|
|
|
|
|
694
|
my @para_line_len = (); |
|
1009
|
440
|
|
|
|
|
770
|
my @para_line_indent = (); |
|
1010
|
440
|
|
|
|
|
603
|
my @para_line_action = (); |
|
1011
|
440
|
|
|
|
|
773
|
my $i = 0; |
|
1012
|
440
|
|
|
|
|
967
|
foreach my $line (@para_lines) |
|
1013
|
|
|
|
|
|
|
{ |
|
1014
|
|
|
|
|
|
|
# Change all tabs to spaces |
|
1015
|
1603
|
|
|
|
|
3879
|
while ($line =~ /\011/) |
|
1016
|
|
|
|
|
|
|
{ |
|
1017
|
124
|
|
|
|
|
198
|
my $tw = $self->{tab_width}; |
|
1018
|
124
|
|
|
|
|
347
|
$line =~ s/\011/" " x ($tw - (length($`) % $tw))/e; |
|
|
124
|
|
|
|
|
622
|
|
|
1019
|
|
|
|
|
|
|
} |
|
1020
|
1603
|
|
|
|
|
2127
|
push @para_line_len, length($line); |
|
1021
|
1603
|
100
|
|
|
|
4022
|
if ($line =~ /^\s*$/) |
|
1022
|
|
|
|
|
|
|
{ |
|
1023
|
|
|
|
|
|
|
# if the line is blank, use the previous indent |
|
1024
|
|
|
|
|
|
|
# if there is one |
|
1025
|
26
|
100
|
|
|
|
88
|
push @para_line_indent, |
|
1026
|
|
|
|
|
|
|
($i == 0 ? 0 : $para_line_indent[$i - 1]); |
|
1027
|
|
|
|
|
|
|
} |
|
1028
|
|
|
|
|
|
|
else |
|
1029
|
|
|
|
|
|
|
{ |
|
1030
|
|
|
|
|
|
|
# count the number of leading spaces |
|
1031
|
1577
|
|
|
|
|
4243
|
my ($ws) = $line =~ /^( *)[^ ]/; |
|
1032
|
1577
|
|
|
|
|
2655
|
push @para_line_indent, length($ws); |
|
1033
|
|
|
|
|
|
|
} |
|
1034
|
1603
|
|
|
|
|
1785
|
push @para_line_action, $NONE; |
|
1035
|
1603
|
|
|
|
|
2473
|
$i++; |
|
1036
|
|
|
|
|
|
|
} |
|
1037
|
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
# There are two more structures which carry over from one |
|
1039
|
|
|
|
|
|
|
# paragraph to the next: LIST, PRE |
|
1040
|
|
|
|
|
|
|
# There are also certain things which will immediately end |
|
1041
|
|
|
|
|
|
|
# multi-paragraph LIST and PRE, if found at the start |
|
1042
|
|
|
|
|
|
|
# of a paragraph: |
|
1043
|
|
|
|
|
|
|
# A list will be ended by |
|
1044
|
|
|
|
|
|
|
# TABLE, MAILHEADER, HEADER, custom-header |
|
1045
|
|
|
|
|
|
|
# A PRE will be ended by |
|
1046
|
|
|
|
|
|
|
# TABLE, MAILHEADER and non-pre text |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
440
|
|
|
|
|
713
|
my $is_table = 0; |
|
1049
|
440
|
|
|
|
|
461
|
my $table_type = 0; |
|
1050
|
440
|
|
|
|
|
476
|
my $is_mailheader = 0; |
|
1051
|
440
|
|
|
|
|
494
|
my $is_header = 0; |
|
1052
|
440
|
|
|
|
|
512
|
my $is_custom_header = 0; |
|
1053
|
440
|
100
|
|
|
|
496
|
if (@{$self->{custom_heading_regexp}}) |
|
|
440
|
|
|
|
|
1348
|
|
|
1054
|
|
|
|
|
|
|
{ |
|
1055
|
155
|
|
|
|
|
428
|
$is_custom_header = |
|
1056
|
|
|
|
|
|
|
$self->is_custom_heading(line => $para_lines[0]); |
|
1057
|
|
|
|
|
|
|
} |
|
1058
|
440
|
100
|
100
|
|
|
1756
|
if ( $self->{make_tables} |
|
1059
|
|
|
|
|
|
|
&& @para_lines > 1) |
|
1060
|
|
|
|
|
|
|
{ |
|
1061
|
123
|
|
|
|
|
408
|
$table_type = $self->get_table_type( |
|
1062
|
|
|
|
|
|
|
rows_ref => \@para_lines, |
|
1063
|
|
|
|
|
|
|
para_len => $para_len |
|
1064
|
|
|
|
|
|
|
); |
|
1065
|
123
|
|
|
|
|
216
|
$is_table = ($table_type != 0); |
|
1066
|
|
|
|
|
|
|
} |
|
1067
|
440
|
100
|
66
|
|
|
2916
|
if ( !$self->{explicit_headings} |
|
|
|
|
100
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
&& @para_lines > 1 |
|
1069
|
|
|
|
|
|
|
&& !$is_table) |
|
1070
|
|
|
|
|
|
|
{ |
|
1071
|
269
|
|
|
|
|
1357
|
$is_header = $self->is_heading( |
|
1072
|
|
|
|
|
|
|
line_ref => \$para_lines[0], |
|
1073
|
|
|
|
|
|
|
next_ref => \$para_lines[1] |
|
1074
|
|
|
|
|
|
|
); |
|
1075
|
|
|
|
|
|
|
} |
|
1076
|
|
|
|
|
|
|
# Note that it is concievable that someone has |
|
1077
|
|
|
|
|
|
|
# partially disabled mailmode by making a custom header |
|
1078
|
|
|
|
|
|
|
# which matches the start of mail. |
|
1079
|
|
|
|
|
|
|
# This is stupid, but allowable, so we check. |
|
1080
|
440
|
100
|
100
|
|
|
2395
|
if ( $self->{mailmode} |
|
|
|
|
100
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
&& !$is_table |
|
1082
|
|
|
|
|
|
|
&& !$is_custom_header) |
|
1083
|
|
|
|
|
|
|
{ |
|
1084
|
196
|
|
|
|
|
528
|
$is_mailheader = $self->is_mailheader(rows_ref => \@para_lines); |
|
1085
|
|
|
|
|
|
|
} |
|
1086
|
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
# end the list if we can end it |
|
1088
|
440
|
100
|
66
|
|
|
1652
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
($self->{__mode} & $LIST) |
|
1090
|
|
|
|
|
|
|
&& ( $is_table |
|
1091
|
|
|
|
|
|
|
|| $is_mailheader |
|
1092
|
|
|
|
|
|
|
|| $is_header |
|
1093
|
|
|
|
|
|
|
|| $is_custom_header) |
|
1094
|
|
|
|
|
|
|
) |
|
1095
|
|
|
|
|
|
|
{ |
|
1096
|
7
|
|
|
|
|
14
|
my $list_end = ''; |
|
1097
|
7
|
|
|
|
|
14
|
my $action = 0; |
|
1098
|
7
|
|
|
|
|
59
|
$self->endlist( |
|
1099
|
|
|
|
|
|
|
num_lists => $self->{__listnum}, |
|
1100
|
|
|
|
|
|
|
prev_ref => \$list_end, |
|
1101
|
|
|
|
|
|
|
line_action_ref => \$action |
|
1102
|
|
|
|
|
|
|
); |
|
1103
|
7
|
|
|
|
|
11
|
push @done_lines, $list_end; |
|
1104
|
7
|
|
|
|
|
20
|
$self->{__prev_para_action} |= $END; |
|
1105
|
|
|
|
|
|
|
} |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
# end the PRE if we can end it |
|
1108
|
440
|
100
|
66
|
|
|
1465
|
if ( |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
($self->{__mode} & $PRE) |
|
1110
|
|
|
|
|
|
|
&& !($self->{__mode} & $PRE_EXPLICIT) |
|
1111
|
|
|
|
|
|
|
&& ( $is_table |
|
1112
|
|
|
|
|
|
|
|| $is_mailheader |
|
1113
|
|
|
|
|
|
|
|| !$self->is_preformatted($para_lines[0])) |
|
1114
|
|
|
|
|
|
|
&& ($self->{preformat_trigger_lines} != 0) |
|
1115
|
|
|
|
|
|
|
) |
|
1116
|
|
|
|
|
|
|
{ |
|
1117
|
14
|
|
|
|
|
23
|
my $pre_end = ''; |
|
1118
|
14
|
|
|
|
|
44
|
my $tag = $self->close_tag('pre'); |
|
1119
|
14
|
|
|
|
|
30
|
$pre_end = "${tag}\n"; |
|
1120
|
14
|
|
|
|
|
31
|
$self->{__mode} ^= ($PRE & $self->{__mode}); |
|
1121
|
14
|
|
|
|
|
27
|
push @done_lines, $pre_end; |
|
1122
|
14
|
|
|
|
|
26
|
$self->{__prev_para_action} |= $END; |
|
1123
|
|
|
|
|
|
|
} |
|
1124
|
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
# The PRE and PRE_EXPLICIT structure can carry over |
|
1126
|
|
|
|
|
|
|
# from one paragraph to the next, but because we don't |
|
1127
|
|
|
|
|
|
|
# want trailing newlines, such newlines would have been |
|
1128
|
|
|
|
|
|
|
# gotten rid of in the previous call. However, with |
|
1129
|
|
|
|
|
|
|
# a preformatted text, we do want the blank lines in it |
|
1130
|
|
|
|
|
|
|
# to be preserved, so let's add a blank line in here. |
|
1131
|
440
|
100
|
|
|
|
1032
|
if ($self->{__mode} & $PRE) |
|
1132
|
|
|
|
|
|
|
{ |
|
1133
|
6
|
|
|
|
|
11
|
push @done_lines, "\n"; |
|
1134
|
|
|
|
|
|
|
} |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
# Now, we do certain things which are only found at the |
|
1137
|
|
|
|
|
|
|
# start of a paragraph: |
|
1138
|
|
|
|
|
|
|
# HEADER, custom-header, TABLE and MAILHEADER |
|
1139
|
|
|
|
|
|
|
# These could concievably eat the rest of the paragraph. |
|
1140
|
|
|
|
|
|
|
|
|
1141
|
440
|
100
|
|
|
|
1218
|
if ($is_custom_header) |
|
|
|
100
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
{ |
|
1143
|
|
|
|
|
|
|
# custom header eats the first line |
|
1144
|
9
|
|
|
|
|
18
|
my $header = shift @para_lines; |
|
1145
|
9
|
|
|
|
|
12
|
shift @para_line_len; |
|
1146
|
9
|
|
|
|
|
15
|
shift @para_line_indent; |
|
1147
|
9
|
|
|
|
|
11
|
shift @para_line_action; |
|
1148
|
9
|
|
|
|
|
31
|
$self->custom_heading(line_ref => \$header); |
|
1149
|
9
|
|
|
|
|
25
|
push @done_lines, $header; |
|
1150
|
9
|
|
|
|
|
24
|
$self->{__prev_para_action} |= $HEADER; |
|
1151
|
|
|
|
|
|
|
} |
|
1152
|
|
|
|
|
|
|
elsif ($is_header) |
|
1153
|
|
|
|
|
|
|
{ |
|
1154
|
|
|
|
|
|
|
# normal header eats the first two lines |
|
1155
|
54
|
|
|
|
|
98
|
my $header = shift @para_lines; |
|
1156
|
54
|
|
|
|
|
74
|
shift @para_line_len; |
|
1157
|
54
|
|
|
|
|
77
|
shift @para_line_indent; |
|
1158
|
54
|
|
|
|
|
88
|
shift @para_line_action; |
|
1159
|
54
|
|
|
|
|
71
|
my $underline = shift @para_lines; |
|
1160
|
54
|
|
|
|
|
69
|
shift @para_line_len; |
|
1161
|
54
|
|
|
|
|
84
|
shift @para_line_indent; |
|
1162
|
54
|
|
|
|
|
73
|
shift @para_line_action; |
|
1163
|
54
|
|
|
|
|
213
|
$self->heading( |
|
1164
|
|
|
|
|
|
|
line_ref => \$header, |
|
1165
|
|
|
|
|
|
|
next_ref => \$underline |
|
1166
|
|
|
|
|
|
|
); |
|
1167
|
54
|
|
|
|
|
121
|
push @done_lines, $header; |
|
1168
|
54
|
|
|
|
|
151
|
$self->{__prev_para_action} |= $HEADER; |
|
1169
|
|
|
|
|
|
|
} |
|
1170
|
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
# do the table stuff on the array of lines |
|
1172
|
440
|
100
|
100
|
|
|
1495
|
if ($self->{make_tables} && $is_table) |
|
1173
|
|
|
|
|
|
|
{ |
|
1174
|
22
|
50
|
|
|
|
93
|
if ( |
|
1175
|
|
|
|
|
|
|
$self->tablestuff( |
|
1176
|
|
|
|
|
|
|
table_type => $table_type, |
|
1177
|
|
|
|
|
|
|
rows_ref => \@para_lines, |
|
1178
|
|
|
|
|
|
|
para_len => $para_len |
|
1179
|
|
|
|
|
|
|
) |
|
1180
|
|
|
|
|
|
|
) |
|
1181
|
|
|
|
|
|
|
{ |
|
1182
|
|
|
|
|
|
|
# this has used up all the lines |
|
1183
|
22
|
|
|
|
|
75
|
push @done_lines, @para_lines; |
|
1184
|
22
|
|
|
|
|
56
|
@para_lines = (); |
|
1185
|
|
|
|
|
|
|
} |
|
1186
|
|
|
|
|
|
|
} |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
# check of this para is a mail-header |
|
1189
|
440
|
50
|
66
|
|
|
1163
|
if ( $is_mailheader |
|
|
|
|
66
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
&& !($self->{__mode} & $TABLE) |
|
1191
|
|
|
|
|
|
|
&& @para_lines) |
|
1192
|
|
|
|
|
|
|
{ |
|
1193
|
11
|
|
|
|
|
39
|
$self->mailheader(rows_ref => \@para_lines); |
|
1194
|
|
|
|
|
|
|
# this has used up all the lines |
|
1195
|
11
|
|
|
|
|
165
|
push @done_lines, @para_lines; |
|
1196
|
11
|
|
|
|
|
25
|
@para_lines = (); |
|
1197
|
|
|
|
|
|
|
} |
|
1198
|
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
# |
|
1200
|
|
|
|
|
|
|
# Now go through the paragraph lines one at a time |
|
1201
|
|
|
|
|
|
|
# Note that we won't have TABLE, MAILHEADER, HEADER modes |
|
1202
|
|
|
|
|
|
|
# because they would have eaten the lines |
|
1203
|
|
|
|
|
|
|
# |
|
1204
|
440
|
|
|
|
|
614
|
my $prev = ''; |
|
1205
|
440
|
|
|
|
|
699
|
my $prev_action = $self->{__prev_para_action}; |
|
1206
|
440
|
|
|
|
|
1129
|
for (my $i = 0; $i < @para_lines; $i++) |
|
1207
|
|
|
|
|
|
|
{ |
|
1208
|
1166
|
|
|
|
|
1396
|
my $prev_ref; |
|
1209
|
|
|
|
|
|
|
my $prev_action_ref; |
|
1210
|
0
|
|
|
|
|
0
|
my $prev_line_indent; |
|
1211
|
0
|
|
|
|
|
0
|
my $prev_line_len; |
|
1212
|
1166
|
100
|
|
|
|
2157
|
if ($i == 0) |
|
1213
|
|
|
|
|
|
|
{ |
|
1214
|
350
|
|
|
|
|
523
|
$prev_ref = \$prev; |
|
1215
|
350
|
|
|
|
|
498
|
$prev_action_ref = \$prev_action; |
|
1216
|
350
|
|
|
|
|
441
|
$prev_line_indent = 0; |
|
1217
|
350
|
|
|
|
|
461
|
$prev_line_len = 0; |
|
1218
|
|
|
|
|
|
|
} |
|
1219
|
|
|
|
|
|
|
else |
|
1220
|
|
|
|
|
|
|
{ |
|
1221
|
816
|
|
|
|
|
1256
|
$prev_ref = \$para_lines[$i - 1]; |
|
1222
|
816
|
|
|
|
|
1098
|
$prev_action_ref = \$para_line_action[$i - 1]; |
|
1223
|
816
|
|
|
|
|
1001
|
$prev_line_indent = $para_line_indent[$i - 1]; |
|
1224
|
816
|
|
|
|
|
1171
|
$prev_line_len = $para_line_len[$i - 1]; |
|
1225
|
|
|
|
|
|
|
} |
|
1226
|
1166
|
|
|
|
|
1159
|
my $next_ref; |
|
1227
|
1166
|
100
|
|
|
|
1996
|
if ($i == $#para_lines) |
|
1228
|
|
|
|
|
|
|
{ |
|
1229
|
350
|
|
|
|
|
488
|
$next_ref = undef; |
|
1230
|
|
|
|
|
|
|
} |
|
1231
|
|
|
|
|
|
|
else |
|
1232
|
|
|
|
|
|
|
{ |
|
1233
|
816
|
|
|
|
|
1340
|
$next_ref = \$para_lines[$i + 1]; |
|
1234
|
|
|
|
|
|
|
} |
|
1235
|
|
|
|
|
|
|
|
|
1236
|
1166
|
50
|
|
|
|
3954
|
$para_lines[$i] = escape($para_lines[$i]) |
|
1237
|
|
|
|
|
|
|
if ($self->{escape_HTML_chars}); |
|
1238
|
|
|
|
|
|
|
|
|
1239
|
1166
|
100
|
100
|
|
|
4347
|
if ($self->{mailmode} |
|
1240
|
|
|
|
|
|
|
&& !($self->{__mode} & ($PRE_EXPLICIT))) |
|
1241
|
|
|
|
|
|
|
{ |
|
1242
|
519
|
|
|
|
|
1385
|
$self->mailquote( |
|
1243
|
|
|
|
|
|
|
line_ref => \$para_lines[$i], |
|
1244
|
|
|
|
|
|
|
line_action_ref => \$para_line_action[$i], |
|
1245
|
|
|
|
|
|
|
prev_ref => $prev_ref, |
|
1246
|
|
|
|
|
|
|
prev_action_ref => $prev_action_ref, |
|
1247
|
|
|
|
|
|
|
next_ref => $next_ref |
|
1248
|
|
|
|
|
|
|
); |
|
1249
|
|
|
|
|
|
|
} |
|
1250
|
|
|
|
|
|
|
|
|
1251
|
1166
|
100
|
66
|
|
|
3439
|
if ( ($self->{__mode} & $PRE) |
|
1252
|
|
|
|
|
|
|
&& ($self->{preformat_trigger_lines} != 0)) |
|
1253
|
|
|
|
|
|
|
{ |
|
1254
|
85
|
|
|
|
|
248
|
$self->endpreformat( |
|
1255
|
|
|
|
|
|
|
para_lines_ref => \@para_lines, |
|
1256
|
|
|
|
|
|
|
para_action_ref => \@para_line_action, |
|
1257
|
|
|
|
|
|
|
ind => $i, |
|
1258
|
|
|
|
|
|
|
prev_ref => $prev_ref |
|
1259
|
|
|
|
|
|
|
); |
|
1260
|
|
|
|
|
|
|
} |
|
1261
|
|
|
|
|
|
|
|
|
1262
|
1166
|
100
|
|
|
|
2702
|
if (!($self->{__mode} & $PRE)) |
|
1263
|
|
|
|
|
|
|
{ |
|
1264
|
1086
|
|
|
|
|
2922
|
$self->hrule( |
|
1265
|
|
|
|
|
|
|
para_lines_ref => \@para_lines, |
|
1266
|
|
|
|
|
|
|
para_action_ref => \@para_line_action, |
|
1267
|
|
|
|
|
|
|
ind => $i |
|
1268
|
|
|
|
|
|
|
); |
|
1269
|
|
|
|
|
|
|
} |
|
1270
|
1166
|
100
|
66
|
|
|
6679
|
if (!($self->{__mode} & ($PRE)) |
|
1271
|
|
|
|
|
|
|
&& ($para_lines[$i] !~ /^\s*$/)) |
|
1272
|
|
|
|
|
|
|
{ |
|
1273
|
1060
|
|
|
|
|
3083
|
$self->liststuff( |
|
1274
|
|
|
|
|
|
|
para_lines_ref => \@para_lines, |
|
1275
|
|
|
|
|
|
|
para_action_ref => \@para_line_action, |
|
1276
|
|
|
|
|
|
|
para_line_indent_ref => \@para_line_indent, |
|
1277
|
|
|
|
|
|
|
ind => $i, |
|
1278
|
|
|
|
|
|
|
prev_ref => $prev_ref |
|
1279
|
|
|
|
|
|
|
); |
|
1280
|
|
|
|
|
|
|
} |
|
1281
|
1166
|
100
|
100
|
|
|
8049
|
if ( !($para_line_action[$i] & ($HEADER | $LIST)) |
|
|
|
|
66
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
&& !($self->{__mode} & ($LIST | $PRE)) |
|
1283
|
|
|
|
|
|
|
&& $self->{__preformat_enabled}) |
|
1284
|
|
|
|
|
|
|
{ |
|
1285
|
823
|
|
|
|
|
2876
|
$self->preformat( |
|
1286
|
|
|
|
|
|
|
mode_ref => \$self->{__mode}, |
|
1287
|
|
|
|
|
|
|
line_ref => \$para_lines[$i], |
|
1288
|
|
|
|
|
|
|
line_action_ref => \$para_line_action[$i], |
|
1289
|
|
|
|
|
|
|
prev_ref => $prev_ref, |
|
1290
|
|
|
|
|
|
|
next_ref => $next_ref, |
|
1291
|
|
|
|
|
|
|
prev_action_ref => $prev_action_ref |
|
1292
|
|
|
|
|
|
|
); |
|
1293
|
|
|
|
|
|
|
} |
|
1294
|
1166
|
100
|
|
|
|
3127
|
if (!($self->{__mode} & ($PRE))) |
|
1295
|
|
|
|
|
|
|
{ |
|
1296
|
1065
|
|
|
|
|
3999
|
$self->paragraph( |
|
1297
|
|
|
|
|
|
|
line_ref => \$para_lines[$i], |
|
1298
|
|
|
|
|
|
|
line_action_ref => \$para_line_action[$i], |
|
1299
|
|
|
|
|
|
|
prev_ref => $prev_ref, |
|
1300
|
|
|
|
|
|
|
prev_action_ref => $prev_action_ref, |
|
1301
|
|
|
|
|
|
|
line_indent => $para_line_indent[$i], |
|
1302
|
|
|
|
|
|
|
prev_indent => $prev_line_indent, |
|
1303
|
|
|
|
|
|
|
is_fragment => $args{is_fragment}, |
|
1304
|
|
|
|
|
|
|
ind => $i, |
|
1305
|
|
|
|
|
|
|
); |
|
1306
|
|
|
|
|
|
|
} |
|
1307
|
1166
|
100
|
|
|
|
3215
|
if (!($self->{__mode} & ($PRE | $LIST))) |
|
1308
|
|
|
|
|
|
|
{ |
|
1309
|
802
|
|
|
|
|
2496
|
$self->shortline( |
|
1310
|
|
|
|
|
|
|
line_ref => \$para_lines[$i], |
|
1311
|
|
|
|
|
|
|
line_action_ref => \$para_line_action[$i], |
|
1312
|
|
|
|
|
|
|
prev_ref => $prev_ref, |
|
1313
|
|
|
|
|
|
|
prev_action_ref => $prev_action_ref, |
|
1314
|
|
|
|
|
|
|
prev_line_len => $prev_line_len |
|
1315
|
|
|
|
|
|
|
); |
|
1316
|
|
|
|
|
|
|
} |
|
1317
|
1166
|
100
|
|
|
|
2879
|
if (!($self->{__mode} & ($PRE))) |
|
1318
|
|
|
|
|
|
|
{ |
|
1319
|
1065
|
|
|
|
|
3272
|
$self->caps( |
|
1320
|
|
|
|
|
|
|
line_ref => \$para_lines[$i], |
|
1321
|
|
|
|
|
|
|
line_action_ref => \$para_line_action[$i] |
|
1322
|
|
|
|
|
|
|
); |
|
1323
|
|
|
|
|
|
|
} |
|
1324
|
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
# put the "prev" line in front of the first line |
|
1326
|
1166
|
100
|
100
|
|
|
6802
|
$para_lines[$i] = $prev . $para_lines[$i] |
|
1327
|
|
|
|
|
|
|
if ($i == 0 && ($prev !~ /^\s*$/)); |
|
1328
|
|
|
|
|
|
|
} |
|
1329
|
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
# para action is the action of the last line of the para |
|
1331
|
440
|
|
|
|
|
795
|
$para_action = $para_line_action[$#para_line_action]; |
|
1332
|
440
|
100
|
|
|
|
1069
|
$para_action = $NONE if (!defined $para_action); |
|
1333
|
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
# push them on the done lines |
|
1335
|
440
|
|
|
|
|
903
|
push @done_lines, @para_lines; |
|
1336
|
440
|
|
|
|
|
1421
|
@para_lines = (); |
|
1337
|
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
} |
|
1339
|
|
|
|
|
|
|
# now put the para back together as one string |
|
1340
|
440
|
|
|
|
|
1367
|
$para = join('', @done_lines); |
|
1341
|
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
# if this is a paragraph, and we are in XHTML mode, |
|
1343
|
|
|
|
|
|
|
# close an open paragraph. |
|
1344
|
440
|
100
|
|
|
|
1056
|
if ($self->{xhtml}) |
|
1345
|
|
|
|
|
|
|
{ |
|
1346
|
367
|
|
|
|
|
468
|
my $open_tag = @{$self->{__tags}}[$#{$self->{__tags}}]; |
|
|
367
|
|
|
|
|
674
|
|
|
|
367
|
|
|
|
|
680
|
|
|
1347
|
367
|
100
|
100
|
|
|
1868
|
if (defined $open_tag && $open_tag eq 'p') |
|
1348
|
|
|
|
|
|
|
{ |
|
1349
|
233
|
|
|
|
|
620
|
$para .= $self->close_tag('p'); |
|
1350
|
|
|
|
|
|
|
} |
|
1351
|
|
|
|
|
|
|
} |
|
1352
|
|
|
|
|
|
|
|
|
1353
|
440
|
100
|
66
|
|
|
7267
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
$self->{unhyphenation} |
|
1355
|
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
# ends in hyphen & next line starts w/letters |
|
1357
|
|
|
|
|
|
|
&& ($para =~ /[^\W\d_]\-\n\s*[^\W\d_]/s) && !( |
|
1358
|
|
|
|
|
|
|
$self->{__mode} & |
|
1359
|
|
|
|
|
|
|
($PRE | $HEADER | $MAILHEADER | $TABLE | $BREAK) |
|
1360
|
|
|
|
|
|
|
) |
|
1361
|
|
|
|
|
|
|
) |
|
1362
|
|
|
|
|
|
|
{ |
|
1363
|
8
|
|
|
|
|
37
|
$self->unhyphenate_para(\$para); |
|
1364
|
|
|
|
|
|
|
} |
|
1365
|
|
|
|
|
|
|
# chop trailing newlines for continuing lists and PRE |
|
1366
|
440
|
100
|
100
|
|
|
2311
|
if ( $self->{__mode} & $LIST |
|
1367
|
|
|
|
|
|
|
|| $self->{__mode} & $PRE) |
|
1368
|
|
|
|
|
|
|
{ |
|
1369
|
86
|
|
|
|
|
459
|
$para =~ s/\n$//g; |
|
1370
|
|
|
|
|
|
|
} |
|
1371
|
|
|
|
|
|
|
} |
|
1372
|
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
# apply links and bold/italic/underline formatting |
|
1374
|
440
|
50
|
|
|
|
1507
|
if ($para !~ /^\s*$/) |
|
1375
|
|
|
|
|
|
|
{ |
|
1376
|
440
|
|
|
|
|
1429
|
$self->apply_links( |
|
1377
|
|
|
|
|
|
|
para_ref => \$para, |
|
1378
|
|
|
|
|
|
|
para_action_ref => \$para_action |
|
1379
|
|
|
|
|
|
|
); |
|
1380
|
|
|
|
|
|
|
} |
|
1381
|
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
# close any open lists if required to |
|
1383
|
440
|
100
|
100
|
|
|
1394
|
if ( $args{close_tags} |
|
1384
|
|
|
|
|
|
|
&& $self->{__mode} & $LIST) # End all lists |
|
1385
|
|
|
|
|
|
|
{ |
|
1386
|
1
|
|
|
|
|
7
|
$self->endlist( |
|
1387
|
|
|
|
|
|
|
num_lists => $self->{__listnum}, |
|
1388
|
|
|
|
|
|
|
prev_ref => \$para, |
|
1389
|
|
|
|
|
|
|
line_action_ref => \$para_action |
|
1390
|
|
|
|
|
|
|
); |
|
1391
|
|
|
|
|
|
|
} |
|
1392
|
|
|
|
|
|
|
# close any open tags |
|
1393
|
440
|
100
|
66
|
|
|
1118
|
if ($args{close_tags} && $self->{xhtml}) |
|
1394
|
|
|
|
|
|
|
{ |
|
1395
|
7
|
|
|
|
|
11
|
while (@{$self->{__tags}}) |
|
|
7
|
|
|
|
|
22
|
|
|
1396
|
|
|
|
|
|
|
{ |
|
1397
|
0
|
|
|
|
|
0
|
$para .= $self->close_tag(''); |
|
1398
|
|
|
|
|
|
|
} |
|
1399
|
|
|
|
|
|
|
} |
|
1400
|
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
# convert remaining Microsoft character codes into sensible HTML |
|
1402
|
440
|
100
|
66
|
|
|
2307
|
if ($self->{demoronize} && !$self->{eight_bit_clean}) |
|
1403
|
|
|
|
|
|
|
{ |
|
1404
|
433
|
|
|
|
|
1084
|
$para = demoronize_code($para); |
|
1405
|
|
|
|
|
|
|
} |
|
1406
|
|
|
|
|
|
|
# All the matching and formatting is done. Now we can |
|
1407
|
|
|
|
|
|
|
# replace non-ASCII characters with character entities. |
|
1408
|
440
|
100
|
|
|
|
1303
|
if (!$self->{eight_bit_clean}) |
|
1409
|
|
|
|
|
|
|
{ |
|
1410
|
433
|
|
|
|
|
28107
|
my @chars = split(//, $para); |
|
1411
|
433
|
|
|
|
|
3997
|
foreach $_ (@chars) |
|
1412
|
|
|
|
|
|
|
{ |
|
1413
|
100265
|
100
|
|
|
|
200944
|
$_ = $char_entities{$_} if defined($char_entities{$_}); |
|
1414
|
|
|
|
|
|
|
} |
|
1415
|
433
|
|
|
|
|
12184
|
$para = join('', @chars); |
|
1416
|
|
|
|
|
|
|
} |
|
1417
|
|
|
|
|
|
|
|
|
1418
|
440
|
|
|
|
|
988
|
$self->{__prev_para_action} = $para_action; |
|
1419
|
|
|
|
|
|
|
|
|
1420
|
440
|
|
|
|
|
2450
|
return $para; |
|
1421
|
|
|
|
|
|
|
} # process_para |
|
1422
|
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
=head2 txt2html |
|
1424
|
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
$conv->txt2html(%args); |
|
1426
|
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
Convert a text file to HTML. Takes a hash of arguments. See |
|
1428
|
|
|
|
|
|
|
L for the possible values of the arguments. Arguments which |
|
1429
|
|
|
|
|
|
|
have already been set with B or B will remain as they are, |
|
1430
|
|
|
|
|
|
|
unless they are overridden. |
|
1431
|
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
=cut |
|
1433
|
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
sub txt2html ($;$) |
|
1435
|
|
|
|
|
|
|
{ |
|
1436
|
38
|
|
|
38
|
1
|
39948
|
my $self = shift; |
|
1437
|
|
|
|
|
|
|
|
|
1438
|
38
|
50
|
|
|
|
140
|
if (@_) |
|
1439
|
|
|
|
|
|
|
{ |
|
1440
|
38
|
|
|
|
|
138
|
$self->args(@_); |
|
1441
|
|
|
|
|
|
|
} |
|
1442
|
|
|
|
|
|
|
|
|
1443
|
38
|
|
|
|
|
158
|
$self->do_init_call(); |
|
1444
|
|
|
|
|
|
|
|
|
1445
|
38
|
|
|
|
|
91
|
my $outhandle; |
|
1446
|
|
|
|
|
|
|
my $outhandle_needs_closing; |
|
1447
|
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
# set up the output |
|
1449
|
38
|
100
|
|
|
|
339
|
if ($self->{outhandle}) |
|
|
|
50
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
{ |
|
1451
|
1
|
|
|
|
|
5
|
$outhandle = $self->{outhandle}; |
|
1452
|
1
|
|
|
|
|
4
|
$outhandle_needs_closing = 1; |
|
1453
|
|
|
|
|
|
|
} |
|
1454
|
|
|
|
|
|
|
elsif ($self->{outfile} eq "-") |
|
1455
|
|
|
|
|
|
|
{ |
|
1456
|
0
|
|
|
|
|
0
|
$outhandle = *STDOUT; |
|
1457
|
0
|
|
|
|
|
0
|
$outhandle_needs_closing = 0; |
|
1458
|
|
|
|
|
|
|
} |
|
1459
|
|
|
|
|
|
|
else |
|
1460
|
|
|
|
|
|
|
{ |
|
1461
|
37
|
50
|
|
|
|
5903
|
open($outhandle, "> " . $self->{outfile}) |
|
1462
|
|
|
|
|
|
|
|| die "Error: unable to open ", $self->{outfile}, ": $!\n"; |
|
1463
|
37
|
|
|
|
|
124
|
$outhandle_needs_closing = 1; |
|
1464
|
|
|
|
|
|
|
} |
|
1465
|
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
# slurp up a paragraph at a time, a file at a time |
|
1467
|
38
|
|
|
|
|
298
|
local $/ = ""; |
|
1468
|
38
|
|
|
|
|
98
|
my $para = ''; |
|
1469
|
38
|
|
|
|
|
81
|
my $count = 0; |
|
1470
|
38
|
|
|
|
|
70
|
my $print_count = 0; |
|
1471
|
38
|
|
|
|
|
100
|
my @sources = (); |
|
1472
|
38
|
|
|
|
|
91
|
my $source_type; |
|
1473
|
38
|
100
|
66
|
|
|
269
|
if ($self->{infile} and @{$self->{infile}}) |
|
|
38
|
50
|
33
|
|
|
264
|
|
|
|
|
0
|
0
|
|
|
|
|
|
1474
|
2
|
|
|
|
|
11
|
{ |
|
1475
|
36
|
|
|
|
|
82
|
@sources = @{$self->{infile}}; |
|
|
36
|
|
|
|
|
149
|
|
|
1476
|
36
|
|
|
|
|
87
|
$source_type = 'file'; |
|
1477
|
|
|
|
|
|
|
} |
|
1478
|
|
|
|
|
|
|
elsif ($self->{inhandle} and @{$self->{inhandle}}) |
|
1479
|
0
|
|
|
|
|
0
|
{ |
|
1480
|
2
|
|
|
|
|
6
|
@sources = @{$self->{inhandle}}; |
|
|
2
|
|
|
|
|
6
|
|
|
1481
|
2
|
|
|
|
|
6
|
$source_type = 'filehandle'; |
|
1482
|
|
|
|
|
|
|
} |
|
1483
|
|
|
|
|
|
|
elsif ($self->{instring} and @{$self->{instring}}) |
|
1484
|
|
|
|
|
|
|
{ |
|
1485
|
0
|
|
|
|
|
0
|
@sources = @{$self->{instring}}; |
|
|
0
|
|
|
|
|
0
|
|
|
1486
|
0
|
|
|
|
|
0
|
$source_type = 'string'; |
|
1487
|
|
|
|
|
|
|
} |
|
1488
|
38
|
|
|
|
|
72
|
my $inhandle; |
|
1489
|
38
|
|
|
|
|
68
|
my $inhandle_needs_closing = 0; |
|
1490
|
38
|
|
|
|
|
124
|
foreach my $source (@sources) |
|
1491
|
|
|
|
|
|
|
{ |
|
1492
|
38
|
|
|
|
|
76
|
$inhandle = undef; |
|
1493
|
38
|
100
|
|
|
|
150
|
if ($source_type eq 'file') |
|
|
|
50
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
{ |
|
1495
|
36
|
50
|
33
|
|
|
302
|
if (!$source or $source eq '-') |
|
1496
|
|
|
|
|
|
|
{ |
|
1497
|
0
|
|
|
|
|
0
|
$inhandle = *STDIN; |
|
1498
|
0
|
|
|
|
|
0
|
$inhandle_needs_closing = 0; |
|
1499
|
|
|
|
|
|
|
} |
|
1500
|
|
|
|
|
|
|
else |
|
1501
|
|
|
|
|
|
|
{ |
|
1502
|
36
|
50
|
33
|
|
|
2084
|
if (-f $source && open($inhandle, $source)) |
|
1503
|
|
|
|
|
|
|
{ |
|
1504
|
36
|
|
|
|
|
109
|
$inhandle_needs_closing = 1; |
|
1505
|
|
|
|
|
|
|
} |
|
1506
|
|
|
|
|
|
|
else # error |
|
1507
|
|
|
|
|
|
|
{ |
|
1508
|
0
|
|
|
|
|
0
|
warn "Could not open $source\n"; |
|
1509
|
0
|
|
|
|
|
0
|
next; |
|
1510
|
|
|
|
|
|
|
} |
|
1511
|
|
|
|
|
|
|
} |
|
1512
|
|
|
|
|
|
|
} |
|
1513
|
|
|
|
|
|
|
elsif ($source_type eq 'filehandle') |
|
1514
|
|
|
|
|
|
|
{ |
|
1515
|
2
|
|
|
|
|
6
|
$inhandle = $source; |
|
1516
|
2
|
|
|
|
|
5
|
$inhandle_needs_closing = 1; |
|
1517
|
|
|
|
|
|
|
} |
|
1518
|
38
|
50
|
|
|
|
142
|
if ($source_type eq 'string') |
|
1519
|
|
|
|
|
|
|
{ |
|
1520
|
|
|
|
|
|
|
# process the string |
|
1521
|
0
|
|
|
|
|
0
|
$para = $_; |
|
1522
|
0
|
|
|
|
|
0
|
$para =~ s/\n$//; # trim the endline |
|
1523
|
0
|
0
|
|
|
|
0
|
if ($count == 0) |
|
1524
|
|
|
|
|
|
|
{ |
|
1525
|
0
|
|
|
|
|
0
|
$self->do_file_start($outhandle, $para); |
|
1526
|
|
|
|
|
|
|
} |
|
1527
|
0
|
|
|
|
|
0
|
$self->{__done_with_sect_link} = []; |
|
1528
|
0
|
|
|
|
|
0
|
$para = $self->process_chunk($para, close_tags => 0); |
|
1529
|
0
|
|
|
|
|
0
|
print $outhandle $para, "\n"; |
|
1530
|
0
|
|
|
|
|
0
|
$print_count++; |
|
1531
|
0
|
|
|
|
|
0
|
$count++; |
|
1532
|
|
|
|
|
|
|
} |
|
1533
|
|
|
|
|
|
|
else # file or filehandle |
|
1534
|
|
|
|
|
|
|
{ |
|
1535
|
38
|
|
|
|
|
988
|
while (<$inhandle>) |
|
1536
|
|
|
|
|
|
|
{ |
|
1537
|
339
|
|
|
|
|
722
|
$para = $_; |
|
1538
|
339
|
|
|
|
|
1849
|
$para =~ s/\n$//; # trim the endline |
|
1539
|
339
|
100
|
|
|
|
1027
|
if ($count == 0) |
|
1540
|
|
|
|
|
|
|
{ |
|
1541
|
34
|
|
|
|
|
172
|
$self->do_file_start($outhandle, $para); |
|
1542
|
|
|
|
|
|
|
} |
|
1543
|
339
|
|
|
|
|
835
|
$self->{__done_with_sect_link} = []; |
|
1544
|
339
|
|
|
|
|
1068
|
$para = $self->process_chunk($para, close_tags => 0); |
|
1545
|
339
|
|
|
|
|
4768
|
print $outhandle $para, "\n"; |
|
1546
|
339
|
|
|
|
|
398
|
$print_count++; |
|
1547
|
339
|
|
|
|
|
2321
|
$count++; |
|
1548
|
|
|
|
|
|
|
} |
|
1549
|
38
|
50
|
|
|
|
139
|
if ($inhandle_needs_closing) |
|
1550
|
|
|
|
|
|
|
{ |
|
1551
|
38
|
|
|
|
|
1517
|
close($inhandle); |
|
1552
|
|
|
|
|
|
|
} |
|
1553
|
|
|
|
|
|
|
} |
|
1554
|
|
|
|
|
|
|
} # for each file |
|
1555
|
|
|
|
|
|
|
|
|
1556
|
38
|
|
|
|
|
144
|
$self->{__prev} = ""; |
|
1557
|
38
|
100
|
|
|
|
197
|
if ($self->{__mode} & $LIST) # End all lists |
|
1558
|
|
|
|
|
|
|
{ |
|
1559
|
6
|
|
|
|
|
48
|
$self->endlist( |
|
1560
|
|
|
|
|
|
|
num_lists => $self->{__listnum}, |
|
1561
|
|
|
|
|
|
|
prev_ref => \$self->{__prev}, |
|
1562
|
|
|
|
|
|
|
line_action_ref => \$self->{__line_action} |
|
1563
|
|
|
|
|
|
|
); |
|
1564
|
|
|
|
|
|
|
} |
|
1565
|
38
|
|
|
|
|
102
|
print $outhandle $self->{__prev}; |
|
1566
|
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
# end open preformats |
|
1568
|
38
|
100
|
|
|
|
124
|
if ($self->{__mode} & $PRE) |
|
1569
|
|
|
|
|
|
|
{ |
|
1570
|
2
|
|
|
|
|
11
|
my $tag = $self->close_tag('pre'); |
|
1571
|
2
|
|
|
|
|
6
|
print $outhandle $tag; |
|
1572
|
|
|
|
|
|
|
} |
|
1573
|
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
# close all open tags |
|
1575
|
38
|
100
|
100
|
|
|
287
|
if ( $self->{xhtml} |
|
|
13
|
|
100
|
|
|
70
|
|
|
1576
|
|
|
|
|
|
|
&& !$self->{extract} |
|
1577
|
|
|
|
|
|
|
&& @{$self->{__tags}}) |
|
1578
|
|
|
|
|
|
|
{ |
|
1579
|
12
|
50
|
|
|
|
39
|
if ($DictDebug & 8) |
|
1580
|
|
|
|
|
|
|
{ |
|
1581
|
0
|
|
|
|
|
0
|
print STDERR "closing all tags at end\n"; |
|
1582
|
|
|
|
|
|
|
} |
|
1583
|
|
|
|
|
|
|
# close any open tags (until we get to the body) |
|
1584
|
12
|
|
|
|
|
20
|
my $open_tag = @{$self->{__tags}}[$#{$self->{__tags}}]; |
|
|
12
|
|
|
|
|
32
|
|
|
|
12
|
|
|
|
|
27
|
|
|
1585
|
12
|
|
33
|
|
|
33
|
while (@{$self->{__tags}} |
|
|
12
|
|
33
|
|
|
94
|
|
|
1586
|
|
|
|
|
|
|
&& $open_tag ne 'body' |
|
1587
|
|
|
|
|
|
|
&& $open_tag ne 'html') |
|
1588
|
|
|
|
|
|
|
{ |
|
1589
|
0
|
|
|
|
|
0
|
print $outhandle $self->close_tag(''); |
|
1590
|
0
|
|
|
|
|
0
|
$open_tag = @{$self->{__tags}}[$#{$self->{__tags}}]; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1591
|
|
|
|
|
|
|
} |
|
1592
|
12
|
|
|
|
|
46
|
print $outhandle "\n"; |
|
1593
|
|
|
|
|
|
|
} |
|
1594
|
|
|
|
|
|
|
|
|
1595
|
38
|
100
|
|
|
|
140
|
if ($self->{append_file}) |
|
1596
|
|
|
|
|
|
|
{ |
|
1597
|
2
|
50
|
|
|
|
73
|
if (-r $self->{append_file}) |
|
1598
|
|
|
|
|
|
|
{ |
|
1599
|
2
|
|
|
|
|
83
|
open(APPEND, $self->{append_file}); |
|
1600
|
2
|
|
|
|
|
39
|
while () |
|
1601
|
|
|
|
|
|
|
{ |
|
1602
|
2
|
|
|
|
|
6
|
print $outhandle $_; |
|
1603
|
2
|
|
|
|
|
8
|
$print_count++; |
|
1604
|
|
|
|
|
|
|
} |
|
1605
|
2
|
|
|
|
|
20
|
close(APPEND); |
|
1606
|
|
|
|
|
|
|
} |
|
1607
|
|
|
|
|
|
|
else |
|
1608
|
|
|
|
|
|
|
{ |
|
1609
|
0
|
|
|
|
|
0
|
print STDERR "Can't find or read file ", $self->{append_file}, |
|
1610
|
|
|
|
|
|
|
" to append.\n"; |
|
1611
|
|
|
|
|
|
|
} |
|
1612
|
|
|
|
|
|
|
} |
|
1613
|
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
# print the closing tags (if we have printed stuff at all) |
|
1615
|
38
|
100
|
100
|
|
|
289
|
if ($print_count && !$self->{extract}) |
|
1616
|
|
|
|
|
|
|
{ |
|
1617
|
13
|
|
|
|
|
46
|
print $outhandle $self->close_tag('body'), "\n"; |
|
1618
|
13
|
|
|
|
|
42
|
print $outhandle $self->close_tag('html'), "\n"; |
|
1619
|
|
|
|
|
|
|
} |
|
1620
|
38
|
50
|
|
|
|
110
|
if ($outhandle_needs_closing) |
|
1621
|
|
|
|
|
|
|
{ |
|
1622
|
38
|
|
|
|
|
3632
|
close($outhandle); |
|
1623
|
|
|
|
|
|
|
} |
|
1624
|
38
|
|
|
|
|
627
|
return 1; |
|
1625
|
|
|
|
|
|
|
} |
|
1626
|
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
=head1 PRIVATE METHODS |
|
1628
|
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
These are methods used internally, only of interest to developers. |
|
1630
|
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
=cut |
|
1632
|
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
#---------------------------------------------------------------# |
|
1634
|
|
|
|
|
|
|
# Init-related subroutines |
|
1635
|
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
=head2 init_our_data |
|
1637
|
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
$self->init_our_data(); |
|
1639
|
|
|
|
|
|
|
|
|
1640
|
|
|
|
|
|
|
Initializes the internal object data. |
|
1641
|
|
|
|
|
|
|
|
|
1642
|
|
|
|
|
|
|
=cut |
|
1643
|
|
|
|
|
|
|
sub init_our_data ($) |
|
1644
|
|
|
|
|
|
|
{ |
|
1645
|
26
|
|
|
26
|
1
|
191
|
my $self = shift; |
|
1646
|
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
# |
|
1648
|
|
|
|
|
|
|
# All the options, in alphabetical order |
|
1649
|
|
|
|
|
|
|
# |
|
1650
|
26
|
|
|
|
|
101
|
$self->{append_file} = ''; |
|
1651
|
26
|
|
|
|
|
71
|
$self->{append_head} = ''; |
|
1652
|
26
|
|
|
|
|
70
|
$self->{body_deco} = ''; |
|
1653
|
26
|
|
|
|
|
81
|
$self->{bullets} = '-=o*\267'; |
|
1654
|
26
|
|
|
|
|
57
|
$self->{bullets_ordered} = ''; |
|
1655
|
26
|
|
|
|
|
62
|
$self->{bold_delimiter} = '#'; |
|
1656
|
26
|
|
|
|
|
65
|
$self->{caps_tag} = 'STRONG'; |
|
1657
|
26
|
|
|
|
|
104
|
$self->{custom_heading_regexp} = []; |
|
1658
|
26
|
50
|
|
|
|
262
|
$self->{default_link_dict} = |
|
1659
|
|
|
|
|
|
|
($ENV{HOME} ? "$ENV{HOME}/.txt2html.dict" : '.txt2html.dict'); |
|
1660
|
26
|
|
|
|
|
71
|
$self->{doctype} = '-//W3C//DTD HTML 4.01//EN" |
|
1661
|
|
|
|
|
|
|
"http://www.w3.org/TR/html4/strict.dtd'; |
|
1662
|
26
|
|
|
|
|
54
|
$self->{demoronize} = 1; |
|
1663
|
26
|
|
|
|
|
54
|
$self->{eight_bit_clean} = 0; |
|
1664
|
26
|
|
|
|
|
60
|
$self->{escape_HTML_chars} = 1; |
|
1665
|
26
|
|
|
|
|
59
|
$self->{explicit_headings} = 0; |
|
1666
|
26
|
|
|
|
|
52
|
$self->{extract} = 0; |
|
1667
|
26
|
|
|
|
|
94
|
$self->{hrule_min} = 4; |
|
1668
|
26
|
|
|
|
|
124
|
$self->{indent_width} = 2; |
|
1669
|
26
|
|
|
|
|
58
|
$self->{indent_par_break} = 0; |
|
1670
|
26
|
|
|
|
|
208
|
$self->{infile} = []; |
|
1671
|
26
|
|
|
|
|
139
|
$self->{inhandle} = []; |
|
1672
|
26
|
|
|
|
|
320
|
$self->{instring} = []; |
|
1673
|
26
|
|
|
|
|
59
|
$self->{italic_delimiter} = '*'; |
|
1674
|
26
|
|
|
|
|
68
|
$self->{links_dictionaries} = []; |
|
1675
|
26
|
|
|
|
|
58
|
$self->{link_only} = 0; |
|
1676
|
26
|
|
|
|
|
47
|
$self->{lower_case_tags} = 0; |
|
1677
|
26
|
|
|
|
|
69
|
$self->{mailmode} = 0; |
|
1678
|
26
|
|
|
|
|
54
|
$self->{make_anchors} = 1; |
|
1679
|
26
|
|
|
|
|
120
|
$self->{make_links} = 1; |
|
1680
|
26
|
|
|
|
|
56
|
$self->{make_tables} = 0; |
|
1681
|
26
|
|
|
|
|
42
|
$self->{min_caps_length} = 3; |
|
1682
|
26
|
|
|
|
|
58
|
$self->{outfile} = '-'; |
|
1683
|
26
|
|
|
|
|
128
|
$self->{par_indent} = 2; |
|
1684
|
26
|
|
|
|
|
65
|
$self->{preformat_trigger_lines} = 2; |
|
1685
|
26
|
|
|
|
|
57
|
$self->{endpreformat_trigger_lines} = 2; |
|
1686
|
26
|
|
|
|
|
68
|
$self->{preformat_start_marker} = "^(:?(:?<)|<)PRE(:?(:?>)|>)\$"; |
|
1687
|
26
|
|
|
|
|
53
|
$self->{preformat_end_marker} = "^(:?(:?<)|<)/PRE(:?(:?>)|>)\$"; |
|
1688
|
26
|
|
|
|
|
47
|
$self->{preformat_whitespace_min} = 5; |
|
1689
|
26
|
|
|
|
|
60
|
$self->{prepend_file} = ''; |
|
1690
|
26
|
|
|
|
|
51
|
$self->{preserve_indent} = 0; |
|
1691
|
26
|
|
|
|
|
53
|
$self->{short_line_length} = 40; |
|
1692
|
26
|
|
|
|
|
79
|
$self->{style_url} = ''; |
|
1693
|
26
|
|
|
|
|
60
|
$self->{tab_width} = 8; |
|
1694
|
26
|
|
|
|
|
184
|
$self->{table_type} = { |
|
1695
|
|
|
|
|
|
|
ALIGN => 1, |
|
1696
|
|
|
|
|
|
|
PGSQL => 1, |
|
1697
|
|
|
|
|
|
|
BORDER => 1, |
|
1698
|
|
|
|
|
|
|
DELIM => 1, |
|
1699
|
|
|
|
|
|
|
}; |
|
1700
|
26
|
|
|
|
|
76
|
$self->{title} = ''; |
|
1701
|
26
|
|
|
|
|
58
|
$self->{titlefirst} = 0; |
|
1702
|
26
|
|
|
|
|
62
|
$self->{underline_delimiter} = '_'; |
|
1703
|
26
|
|
|
|
|
65
|
$self->{underline_length_tolerance} = 1; |
|
1704
|
26
|
|
|
|
|
63
|
$self->{underline_offset_tolerance} = 1; |
|
1705
|
26
|
|
|
|
|
57
|
$self->{unhyphenation} = 1; |
|
1706
|
26
|
|
|
|
|
56
|
$self->{use_mosaic_header} = 0; |
|
1707
|
26
|
|
|
|
|
69
|
$self->{use_preformat_marker} = 0; |
|
1708
|
26
|
|
|
|
|
54
|
$self->{xhtml} = 1; |
|
1709
|
|
|
|
|
|
|
|
|
1710
|
|
|
|
|
|
|
# accumulation variables |
|
1711
|
26
|
|
|
|
|
48
|
$self->{__file} = ""; # Current file being processed |
|
1712
|
26
|
|
|
|
|
131
|
$self->{__heading_styles} = {}; |
|
1713
|
26
|
|
|
|
|
64
|
$self->{__num_heading_styles} = 0; |
|
1714
|
26
|
|
|
|
|
75
|
$self->{__links_table} = {}; |
|
1715
|
26
|
|
|
|
|
73
|
$self->{__links_table_order} = []; |
|
1716
|
26
|
|
|
|
|
67
|
$self->{__links_table_patterns} = {}; |
|
1717
|
26
|
|
|
|
|
64
|
$self->{__search_patterns} = []; |
|
1718
|
26
|
|
|
|
|
64
|
$self->{__repl_code} = []; |
|
1719
|
26
|
|
|
|
|
50
|
$self->{__prev_para_action} = 0; |
|
1720
|
26
|
|
|
|
|
48
|
$self->{__non_header_anchor} = 0; |
|
1721
|
26
|
|
|
|
|
46
|
$self->{__mode} = 0; |
|
1722
|
26
|
|
|
|
|
160
|
$self->{__listnum} = 0; |
|
1723
|
26
|
|
|
|
|
56
|
$self->{__list_nice_indent} = ""; |
|
1724
|
26
|
|
|
|
|
62
|
$self->{__list_indent} = []; |
|
1725
|
|
|
|
|
|
|
|
|
1726
|
26
|
|
|
|
|
53
|
$self->{__call_init_done} = 0; |
|
1727
|
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
# |
|
1729
|
|
|
|
|
|
|
# The global links data |
|
1730
|
|
|
|
|
|
|
# |
|
1731
|
26
|
|
|
|
|
46
|
my $system_dict = <<'EOT'; |
|
1732
|
|
|
|
|
|
|
# |
|
1733
|
|
|
|
|
|
|
# Global links dictionary file for HTML::TextToHTML |
|
1734
|
|
|
|
|
|
|
# http://www.katspace.com/tools/text_to_html |
|
1735
|
|
|
|
|
|
|
# http://txt2html.sourceforge.net/ |
|
1736
|
|
|
|
|
|
|
# based on links dictionary for Seth Golub's txt2html |
|
1737
|
|
|
|
|
|
|
# http://www.aigeek.com/txt2html/ |
|
1738
|
|
|
|
|
|
|
# |
|
1739
|
|
|
|
|
|
|
# This dictionary contains some patterns for converting obvious URLs, |
|
1740
|
|
|
|
|
|
|
# ftp sites, hostnames, email addresses and the like to hrefs. |
|
1741
|
|
|
|
|
|
|
# |
|
1742
|
|
|
|
|
|
|
# Original adapted from the html.pl package by Oscar Nierstrasz in |
|
1743
|
|
|
|
|
|
|
# the Software Archive of the Software Composition Group |
|
1744
|
|
|
|
|
|
|
# http://iamwww.unibe.ch/~scg/Src/ |
|
1745
|
|
|
|
|
|
|
# |
|
1746
|
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
# Some people even like to mark the URL label explicitly |
|
1748
|
|
|
|
|
|
|
/<URL:([-\w\.\/:~_\@]+):([a-zA-Z0-9'() ]+)>/ -h-> $2 |
|
1749
|
|
|
|
|
|
|
|
|
1750
|
|
|
|
|
|
|
# Some people like to mark URLs explicitly |
|
1751
|
|
|
|
|
|
|
/<URL:\s*(\S+?)\s*>/ -h-> $1 |
|
1752
|
|
|
|
|
|
|
|
|
1753
|
|
|
|
|
|
|
# |
|
1754
|
|
|
|
|
|
|
/<(http:\S+?)\s*>/ -h-> <$1> |
|
1755
|
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
# Urls: : |
|
1757
|
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
|snews:[\w\.]+| -> $& |
|
1759
|
|
|
|
|
|
|
|news:[\w\.]+| -> $& |
|
1760
|
|
|
|
|
|
|
|nntp:[\w/\.:+\-]+| -> $& |
|
1761
|
|
|
|
|
|
|
|http:[\w/\.:\@+\-~\%#?=&;,]+[\w/]| -> $& |
|
1762
|
|
|
|
|
|
|
|shttp:[\w/\.:+\-~\%#?=&;,]+| -> $& |
|
1763
|
|
|
|
|
|
|
|https:[\w/\.:+\-~\%#?=&;,]+| -> $& |
|
1764
|
|
|
|
|
|
|
|file:[\w/\.:+\-]+| -> $& |
|
1765
|
|
|
|
|
|
|
|ftp:[\w/\.:+\-]+| -> $& |
|
1766
|
|
|
|
|
|
|
|wais:[\w/\.:+\-]+| -> $& |
|
1767
|
|
|
|
|
|
|
|gopher:[\w/\.:+\-]+| -> $& |
|
1768
|
|
|
|
|
|
|
|telnet:[\w/\@\.:+\-]+| -> $& |
|
1769
|
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
# catch some newsgroups to avoid confusion with sites: |
|
1772
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(alt\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1773
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(bionet\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1774
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(bit\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1775
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(biz\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1776
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(clari\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1777
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(comp\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1778
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(gnu\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1779
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(humanities\.[\w\.+\-]+[\w+\-]+)| |
|
1780
|
|
|
|
|
|
|
-h-> $1$2 |
|
1781
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(k12\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1782
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(misc\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1783
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(news\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1784
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(rec\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1785
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(soc\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1786
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(talk\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1787
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(us\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1788
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(ch\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1789
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])(de\.[\w\.+\-]+[\w+\-]+)| -h-> $1$2 |
|
1790
|
|
|
|
|
|
|
|
|
1791
|
|
|
|
|
|
|
# FTP locations (with directory): |
|
1792
|
|
|
|
|
|
|
# anonymous@: |
|
1793
|
|
|
|
|
|
|
|(anonymous\@)([[:alpha:]][\w\.+\-]+\.[[:alpha:]]{2,}):(\s*)([\w\d+\-/\.]+)| |
|
1794
|
|
|
|
|
|
|
-h-> $1$2:$4$3 |
|
1795
|
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
# ftp@: |
|
1797
|
|
|
|
|
|
|
|(ftp\@)([[:alpha:]][\w\.+\-]+\.[[:alpha:]]{2,}):(\s*)([\w\d+\-/\.]+)| |
|
1798
|
|
|
|
|
|
|
-h-> $1$2:$4$3 |
|
1799
|
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
# Email address |
|
1801
|
|
|
|
|
|
|
|[[:alnum:]_\+\-\.]+\@([[:alnum:]][\w\.+\-]+\.[[:alpha:]]{2,})| |
|
1802
|
|
|
|
|
|
|
-> mailto:$& |
|
1803
|
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
# : |
|
1805
|
|
|
|
|
|
|
|([^\w\-/\.:\@>])([[:alpha:]][\w\.+\-]+\.[[:alpha:]]{2,}):(\s*)([\w\d+\-/\.]+)| |
|
1806
|
|
|
|
|
|
|
-h-> $1$2:$4$3 |
|
1807
|
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
# NB: don't confuse an http server with a port number for |
|
1809
|
|
|
|
|
|
|
# an FTP location! |
|
1810
|
|
|
|
|
|
|
# internet number version: : |
|
1811
|
|
|
|
|
|
|
|([^\w\-/\.:\@])(\d{2,}\.\d{2,}\.\d+\.\d+):([\w\d+\-/\.]+)| |
|
1812
|
|
|
|
|
|
|
-h-> $1$2:$3 |
|
1813
|
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
# telnet |
|
1815
|
|
|
|
|
|
|
|telnet ([[:alpha:]][\w+\-]+(\.[\w\.+\-]+)+\.[[:alpha:]]{2,})\s+(\d{2,4})| |
|
1816
|
|
|
|
|
|
|
-h-> telnet $1 $3 |
|
1817
|
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
# ftp |
|
1819
|
|
|
|
|
|
|
|ftp ([[:alpha:]][\w+\-]+(\.[\w\.+\-]+)+\.[[:alpha:]]{2,})| |
|
1820
|
|
|
|
|
|
|
-h-> ftp $1 |
|
1821
|
|
|
|
|
|
|
|
|
1822
|
|
|
|
|
|
|
# host with "ftp" in the machine name |
|
1823
|
|
|
|
|
|
|
|\b([[:alpha:]][\w])*ftp[\w]*(\.[\w+\-]+){2,}| -h-> ftp $& |
|
1824
|
|
|
|
|
|
|
|
|
1825
|
|
|
|
|
|
|
# ftp.foo.net/blah/ |
|
1826
|
|
|
|
|
|
|
|ftp(\.[\w\@:-]+)+/\S+| -> ftp://$& |
|
1827
|
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
# www.thehouse.org/txt2html/ |
|
1829
|
|
|
|
|
|
|
|www(\.[\w\@:-]+)+/\S+| -> http://$& |
|
1830
|
|
|
|
|
|
|
|
|
1831
|
|
|
|
|
|
|
# host with "www" in the machine name |
|
1832
|
|
|
|
|
|
|
|\b([[:alpha:]][\w])*www[\w]*(\.[\w+\-]+){2,}| -> http://$&/ |
|
1833
|
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
# |
|
1835
|
|
|
|
|
|
|
|([[:alpha:]][\w+\-]+\.[\w+\-]+\.[[:alpha:]]{2,})\s+(\d{2,4})| |
|
1836
|
|
|
|
|
|
|
-h-> $1 $2 |
|
1837
|
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
# just internet numbers with port: |
|
1839
|
|
|
|
|
|
|
|([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+(\d{1,4})| |
|
1840
|
|
|
|
|
|
|
-h-> $1$2 $3 |
|
1841
|
|
|
|
|
|
|
|
|
1842
|
|
|
|
|
|
|
# just internet numbers: |
|
1843
|
|
|
|
|
|
|
|([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})| |
|
1844
|
|
|
|
|
|
|
-h-> $1$2 |
|
1845
|
|
|
|
|
|
|
|
|
1846
|
|
|
|
|
|
|
# RFCs |
|
1847
|
|
|
|
|
|
|
/RFC ?(\d+)/ -i-> http://www.cis.ohio-state.edu/rfc/rfc$1.txt |
|
1848
|
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
# Mark _underlined stuff_ as underlined stuff |
|
1850
|
|
|
|
|
|
|
# Don't mistake variable names for underlines, and |
|
1851
|
|
|
|
|
|
|
# take account of possible trailing punctuation |
|
1852
|
|
|
|
|
|
|
#/([ \t\n])_([[:alpha:]][[:alnum:]\s-]*[[:alpha:]])_([\s\.;:,\!\?])/ -h-> $1$2$3 |
|
1853
|
|
|
|
|
|
|
|
|
1854
|
|
|
|
|
|
|
# Seth and his amazing conversion program :-) |
|
1855
|
|
|
|
|
|
|
|
|
1856
|
|
|
|
|
|
|
"Seth Golub" -o-> http://www.aigeek.com/ |
|
1857
|
|
|
|
|
|
|
"txt2html" -o-> http://txt2html.sourceforge.net/ |
|
1858
|
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
# Kathryn and her amazing modules 8-) |
|
1860
|
|
|
|
|
|
|
"Kathryn Andersen" -o-> http://www.katspace.com/ |
|
1861
|
|
|
|
|
|
|
"HTML::TextToHTML" -o-> http://www.katspace.com/tools/text_to_html/ |
|
1862
|
|
|
|
|
|
|
"hypertoc" -o-> http://www.katspace.com/tools/hypertoc/ |
|
1863
|
|
|
|
|
|
|
"HTML::GenToc" -o-> http://www.katspace.com/tools/hypertoc/ |
|
1864
|
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
# End of global dictionary |
|
1866
|
|
|
|
|
|
|
EOT |
|
1867
|
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
# pre-parse the above data by removing unwanted lines |
|
1869
|
|
|
|
|
|
|
# skip lines that start with '#' |
|
1870
|
26
|
|
|
|
|
1412
|
$system_dict =~ s/^\#.*$//mg; |
|
1871
|
|
|
|
|
|
|
# skip lines that end with unescaped ':' |
|
1872
|
26
|
|
|
|
|
5923
|
$system_dict =~ s/^.*[^\\]:\s*$//mg; |
|
1873
|
|
|
|
|
|
|
|
|
1874
|
26
|
|
|
|
|
87
|
$self->{__global_links_data} = $system_dict; |
|
1875
|
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
} # init_our_data |
|
1877
|
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
#---------------------------------------------------------------# |
|
1879
|
|
|
|
|
|
|
# txt2html-related subroutines |
|
1880
|
|
|
|
|
|
|
|
|
1881
|
|
|
|
|
|
|
=head2 deal_with_options |
|
1882
|
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
$self->deal_with_options(); |
|
1884
|
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
do extra processing related to particular options |
|
1886
|
|
|
|
|
|
|
|
|
1887
|
|
|
|
|
|
|
=cut |
|
1888
|
|
|
|
|
|
|
sub deal_with_options ($) |
|
1889
|
|
|
|
|
|
|
{ |
|
1890
|
96
|
|
|
96
|
1
|
132
|
my $self = shift; |
|
1891
|
|
|
|
|
|
|
|
|
1892
|
96
|
50
|
|
|
|
305
|
if (!$self->{make_links}) |
|
1893
|
|
|
|
|
|
|
{ |
|
1894
|
0
|
|
|
|
|
0
|
$self->{'links_dictionaries'} = 0; |
|
1895
|
|
|
|
|
|
|
} |
|
1896
|
96
|
100
|
|
|
|
270
|
if ($self->{append_file}) |
|
1897
|
|
|
|
|
|
|
{ |
|
1898
|
4
|
50
|
|
|
|
86
|
if (!-r $self->{append_file}) |
|
1899
|
|
|
|
|
|
|
{ |
|
1900
|
0
|
|
|
|
|
0
|
print STDERR "Can't find or read ", $self->{append_file}, "\n"; |
|
1901
|
0
|
|
|
|
|
0
|
$self->{append_file} = ''; |
|
1902
|
|
|
|
|
|
|
} |
|
1903
|
|
|
|
|
|
|
} |
|
1904
|
96
|
50
|
|
|
|
222
|
if ($self->{prepend_file}) |
|
1905
|
|
|
|
|
|
|
{ |
|
1906
|
0
|
0
|
|
|
|
0
|
if (!-r $self->{prepend_file}) |
|
1907
|
|
|
|
|
|
|
{ |
|
1908
|
0
|
|
|
|
|
0
|
print STDERR "Can't find or read ", $self->{prepend_file}, "\n"; |
|
1909
|
0
|
|
|
|
|
0
|
$self->{'prepend_file'} = ''; |
|
1910
|
|
|
|
|
|
|
} |
|
1911
|
|
|
|
|
|
|
} |
|
1912
|
96
|
50
|
|
|
|
239
|
if ($self->{append_head}) |
|
1913
|
|
|
|
|
|
|
{ |
|
1914
|
0
|
0
|
|
|
|
0
|
if (!-r $self->{append_head}) |
|
1915
|
|
|
|
|
|
|
{ |
|
1916
|
0
|
|
|
|
|
0
|
print STDERR "Can't find or read ", $self->{append_head}, "\n"; |
|
1917
|
0
|
|
|
|
|
0
|
$self->{'append_head'} = ''; |
|
1918
|
|
|
|
|
|
|
} |
|
1919
|
|
|
|
|
|
|
} |
|
1920
|
|
|
|
|
|
|
|
|
1921
|
96
|
50
|
|
|
|
238
|
if (!$self->{outfile}) |
|
1922
|
|
|
|
|
|
|
{ |
|
1923
|
0
|
|
|
|
|
0
|
$self->{'outfile'} = "-"; |
|
1924
|
|
|
|
|
|
|
} |
|
1925
|
|
|
|
|
|
|
|
|
1926
|
96
|
50
|
|
|
|
244
|
$self->{'preformat_trigger_lines'} = 0 |
|
1927
|
|
|
|
|
|
|
if ($self->{preformat_trigger_lines} < 0); |
|
1928
|
96
|
50
|
|
|
|
226
|
$self->{'preformat_trigger_lines'} = 2 |
|
1929
|
|
|
|
|
|
|
if ($self->{preformat_trigger_lines} > 2); |
|
1930
|
|
|
|
|
|
|
|
|
1931
|
96
|
50
|
|
|
|
244
|
$self->{'endpreformat_trigger_lines'} = 1 |
|
1932
|
|
|
|
|
|
|
if ($self->{preformat_trigger_lines} == 0); |
|
1933
|
96
|
50
|
|
|
|
222
|
$self->{'endpreformat_trigger_lines'} = 0 |
|
1934
|
|
|
|
|
|
|
if ($self->{endpreformat_trigger_lines} < 0); |
|
1935
|
96
|
50
|
|
|
|
204
|
$self->{'endpreformat_trigger_lines'} = 2 |
|
1936
|
|
|
|
|
|
|
if ($self->{endpreformat_trigger_lines} > 2); |
|
1937
|
|
|
|
|
|
|
|
|
1938
|
96
|
|
33
|
|
|
322
|
$self->{__preformat_enabled} = |
|
1939
|
|
|
|
|
|
|
(($self->{endpreformat_trigger_lines} != 0) |
|
1940
|
|
|
|
|
|
|
|| $self->{use_preformat_marker}); |
|
1941
|
|
|
|
|
|
|
|
|
1942
|
96
|
50
|
|
|
|
1068
|
if ($self->{use_mosaic_header}) |
|
1943
|
|
|
|
|
|
|
{ |
|
1944
|
0
|
|
|
|
|
0
|
my $num_heading_styles = 0; |
|
1945
|
0
|
|
|
|
|
0
|
my %heading_styles = (); |
|
1946
|
0
|
|
|
|
|
0
|
$heading_styles{"*"} = ++$num_heading_styles; |
|
1947
|
0
|
|
|
|
|
0
|
$heading_styles{"="} = ++$num_heading_styles; |
|
1948
|
0
|
|
|
|
|
0
|
$heading_styles{"+"} = ++$num_heading_styles; |
|
1949
|
0
|
|
|
|
|
0
|
$heading_styles{"-"} = ++$num_heading_styles; |
|
1950
|
0
|
|
|
|
|
0
|
$heading_styles{"~"} = ++$num_heading_styles; |
|
1951
|
0
|
|
|
|
|
0
|
$heading_styles{"."} = ++$num_heading_styles; |
|
1952
|
0
|
|
|
|
|
0
|
$self->{__heading_styles} = \%heading_styles; |
|
1953
|
0
|
|
|
|
|
0
|
$self->{__num_heading_styles} = $num_heading_styles; |
|
1954
|
|
|
|
|
|
|
} |
|
1955
|
|
|
|
|
|
|
# XHTML implies lower case |
|
1956
|
96
|
100
|
|
|
|
325
|
$self->{'lower_case_tags'} = 1 if ($self->{xhtml}); |
|
1957
|
|
|
|
|
|
|
} |
|
1958
|
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
=head2 escape |
|
1960
|
|
|
|
|
|
|
|
|
1961
|
|
|
|
|
|
|
$newtext = escape($text); |
|
1962
|
|
|
|
|
|
|
|
|
1963
|
|
|
|
|
|
|
Escape & < and > |
|
1964
|
|
|
|
|
|
|
|
|
1965
|
|
|
|
|
|
|
=cut |
|
1966
|
|
|
|
|
|
|
sub escape ($) |
|
1967
|
|
|
|
|
|
|
{ |
|
1968
|
2835
|
|
|
2835
|
1
|
4773
|
my ($text) = @_; |
|
1969
|
2835
|
|
|
|
|
3744
|
$text =~ s/&/&/g; |
|
1970
|
2835
|
|
|
|
|
3288
|
$text =~ s/>/>/g; |
|
1971
|
2835
|
|
|
|
|
3091
|
$text =~ s/</g; |
|
1972
|
2835
|
|
|
|
|
5594
|
return $text; |
|
1973
|
|
|
|
|
|
|
} |
|
1974
|
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
=head2 demoronize_char |
|
1976
|
|
|
|
|
|
|
|
|
1977
|
|
|
|
|
|
|
$newtext = demoronize_char($text); |
|
1978
|
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
Convert Microsoft character entities into characters. |
|
1980
|
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
Added by Alan Jackson, alan at ajackson dot org, and based |
|
1982
|
|
|
|
|
|
|
on the demoronize script by John Walker, http://www.fourmilab.ch/ |
|
1983
|
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
=cut |
|
1985
|
|
|
|
|
|
|
sub demoronize_char($) |
|
1986
|
|
|
|
|
|
|
{ |
|
1987
|
440
|
|
|
440
|
1
|
823
|
my $s = shift; |
|
1988
|
|
|
|
|
|
|
# Map strategically incompatible non-ISO characters in the |
|
1989
|
|
|
|
|
|
|
# range 0x82 -- 0x9F into plausible substitutes where |
|
1990
|
|
|
|
|
|
|
# possible. |
|
1991
|
|
|
|
|
|
|
|
|
1992
|
440
|
|
|
|
|
849
|
$s =~ s/\x82/,/g; |
|
1993
|
440
|
|
|
|
|
748
|
$s =~ s/\x84/,,/g; |
|
1994
|
440
|
|
|
|
|
1218
|
$s =~ s/\x85/.../g; |
|
1995
|
|
|
|
|
|
|
|
|
1996
|
440
|
|
|
|
|
853
|
$s =~ s/\x88/^/g; |
|
1997
|
|
|
|
|
|
|
|
|
1998
|
440
|
|
|
|
|
840
|
$s =~ s/\x8B/
|
|
1999
|
440
|
|
|
|
|
925
|
$s =~ s/\x8C/Oe/g; |
|
2000
|
|
|
|
|
|
|
|
|
2001
|
440
|
|
|
|
|
716
|
$s =~ s/\x91/`/g; |
|
2002
|
440
|
|
|
|
|
794
|
$s =~ s/\x92/'/g; |
|
2003
|
440
|
|
|
|
|
920
|
$s =~ s/\x93/"/g; |
|
2004
|
440
|
|
|
|
|
718
|
$s =~ s/\x94/"/g; |
|
2005
|
440
|
|
|
|
|
673
|
$s =~ s/\x95/*/g; |
|
2006
|
440
|
|
|
|
|
736
|
$s =~ s/\x96/-/g; |
|
2007
|
440
|
|
|
|
|
659
|
$s =~ s/\x97/--/g; |
|
2008
|
|
|
|
|
|
|
|
|
2009
|
440
|
|
|
|
|
678
|
$s =~ s/\x9B/>/g; |
|
2010
|
440
|
|
|
|
|
785
|
$s =~ s/\x9C/oe/g; |
|
2011
|
|
|
|
|
|
|
|
|
2012
|
440
|
|
|
|
|
931
|
return $s; |
|
2013
|
|
|
|
|
|
|
} |
|
2014
|
|
|
|
|
|
|
|
|
2015
|
|
|
|
|
|
|
=head2 demoronize_code |
|
2016
|
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
$newtext = demoronize_code($text); |
|
2018
|
|
|
|
|
|
|
|
|
2019
|
|
|
|
|
|
|
convert Microsoft character entities into HTML code |
|
2020
|
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
=cut |
|
2022
|
|
|
|
|
|
|
sub demoronize_code($) |
|
2023
|
|
|
|
|
|
|
{ |
|
2024
|
433
|
|
|
433
|
1
|
671
|
my $s = shift; |
|
2025
|
|
|
|
|
|
|
# Map strategically incompatible non-ISO characters in the |
|
2026
|
|
|
|
|
|
|
# range 0x82 -- 0x9F into plausible substitutes where |
|
2027
|
|
|
|
|
|
|
# possible. |
|
2028
|
|
|
|
|
|
|
|
|
2029
|
433
|
|
|
|
|
979
|
$s =~ s-\x83-f-g; |
|
2030
|
|
|
|
|
|
|
|
|
2031
|
433
|
|
|
|
|
728
|
$s =~ s-\x98-~-g; |
|
2032
|
433
|
|
|
|
|
786
|
$s =~ s-\x99-TM-g; |
|
2033
|
|
|
|
|
|
|
|
|
2034
|
433
|
|
|
|
|
1056
|
return $s; |
|
2035
|
|
|
|
|
|
|
} |
|
2036
|
|
|
|
|
|
|
|
|
2037
|
|
|
|
|
|
|
=head2 get_tag |
|
2038
|
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
$tag = $self->get_tag($in_tag); |
|
2040
|
|
|
|
|
|
|
|
|
2041
|
|
|
|
|
|
|
$tag = $self->get_tag($in_tag, |
|
2042
|
|
|
|
|
|
|
tag_type=>TAG_START, |
|
2043
|
|
|
|
|
|
|
inside_tag=>''); |
|
2044
|
|
|
|
|
|
|
|
|
2045
|
|
|
|
|
|
|
output the tag wanted (add the <> and the / if necessary) |
|
2046
|
|
|
|
|
|
|
- output in lower or upper case |
|
2047
|
|
|
|
|
|
|
- do tag-related processing |
|
2048
|
|
|
|
|
|
|
options: |
|
2049
|
|
|
|
|
|
|
tag_type=>TAG_START | tag_type=>TAG_END | tag_type=>TAG_EMPTY |
|
2050
|
|
|
|
|
|
|
(default start) |
|
2051
|
|
|
|
|
|
|
inside_tag=>string (default empty) |
|
2052
|
|
|
|
|
|
|
|
|
2053
|
|
|
|
|
|
|
=cut |
|
2054
|
|
|
|
|
|
|
sub get_tag ($$;%) |
|
2055
|
|
|
|
|
|
|
{ |
|
2056
|
3522
|
|
|
3522
|
1
|
4143
|
my $self = shift; |
|
2057
|
3522
|
|
|
|
|
4032
|
my $in_tag = shift; |
|
2058
|
3522
|
|
|
|
|
9954
|
my %args = ( |
|
2059
|
|
|
|
|
|
|
tag_type => TAG_START, |
|
2060
|
|
|
|
|
|
|
inside_tag => '', |
|
2061
|
|
|
|
|
|
|
@_ |
|
2062
|
|
|
|
|
|
|
); |
|
2063
|
3522
|
|
|
|
|
4635
|
my $inside_tag = $args{inside_tag}; |
|
2064
|
|
|
|
|
|
|
|
|
2065
|
3522
|
|
|
|
|
3497
|
my $open_tag = @{$self->{__tags}}[$#{$self->{__tags}}]; |
|
|
3522
|
|
|
|
|
5817
|
|
|
|
3522
|
|
|
|
|
5702
|
|
|
2066
|
3522
|
100
|
|
|
|
8313
|
if (!defined $open_tag) |
|
2067
|
|
|
|
|
|
|
{ |
|
2068
|
116
|
|
|
|
|
164
|
$open_tag = ''; |
|
2069
|
|
|
|
|
|
|
} |
|
2070
|
|
|
|
|
|
|
# close any open tags that need closing |
|
2071
|
|
|
|
|
|
|
# Note that we only have to check for the structural tags we make, |
|
2072
|
|
|
|
|
|
|
# not every possible HTML tag |
|
2073
|
3522
|
|
|
|
|
5263
|
my $tag_prefix = ''; |
|
2074
|
3522
|
100
|
|
|
|
7216
|
if ($self->{xhtml}) |
|
2075
|
|
|
|
|
|
|
{ |
|
2076
|
3219
|
100
|
100
|
|
|
41525
|
if ( $open_tag eq 'p' |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
and $in_tag eq 'p' |
|
2078
|
|
|
|
|
|
|
and $args{tag_type} != TAG_END) |
|
2079
|
|
|
|
|
|
|
{ |
|
2080
|
14
|
|
|
|
|
39
|
$tag_prefix = $self->close_tag('p'); |
|
2081
|
|
|
|
|
|
|
} |
|
2082
|
|
|
|
|
|
|
elsif ( $open_tag eq 'p' |
|
2083
|
|
|
|
|
|
|
and $in_tag =~ /^(hr|ul|ol|dl|pre|table|h)/) |
|
2084
|
|
|
|
|
|
|
{ |
|
2085
|
9
|
|
|
|
|
30
|
$tag_prefix = $self->close_tag('p'); |
|
2086
|
|
|
|
|
|
|
} |
|
2087
|
|
|
|
|
|
|
elsif ( $open_tag eq 'li' |
|
2088
|
|
|
|
|
|
|
and $in_tag eq 'li' |
|
2089
|
|
|
|
|
|
|
and $args{tag_type} != TAG_END) |
|
2090
|
|
|
|
|
|
|
{ |
|
2091
|
|
|
|
|
|
|
# close a LI before the next LI |
|
2092
|
70
|
|
|
|
|
175
|
$tag_prefix = $self->close_tag('li'); |
|
2093
|
|
|
|
|
|
|
} |
|
2094
|
|
|
|
|
|
|
elsif ( $open_tag eq 'li' |
|
2095
|
|
|
|
|
|
|
and $in_tag =~ /^(ul|ol)$/ |
|
2096
|
|
|
|
|
|
|
and $args{tag_type} == TAG_END) |
|
2097
|
|
|
|
|
|
|
{ |
|
2098
|
|
|
|
|
|
|
# close the LI before the list closes |
|
2099
|
47
|
|
|
|
|
1307
|
$tag_prefix = $self->close_tag('li'); |
|
2100
|
|
|
|
|
|
|
} |
|
2101
|
|
|
|
|
|
|
elsif ( $open_tag eq 'dt' |
|
2102
|
|
|
|
|
|
|
and $in_tag eq 'dd' |
|
2103
|
|
|
|
|
|
|
and $args{tag_type} != TAG_END) |
|
2104
|
|
|
|
|
|
|
{ |
|
2105
|
|
|
|
|
|
|
# close a DT before the next DD |
|
2106
|
0
|
|
|
|
|
0
|
$tag_prefix = $self->close_tag('dt'); |
|
2107
|
|
|
|
|
|
|
} |
|
2108
|
|
|
|
|
|
|
elsif ( $open_tag eq 'dd' |
|
2109
|
|
|
|
|
|
|
and $in_tag eq 'dt' |
|
2110
|
|
|
|
|
|
|
and $args{tag_type} != TAG_END) |
|
2111
|
|
|
|
|
|
|
{ |
|
2112
|
|
|
|
|
|
|
# close a DD before the next DT |
|
2113
|
2
|
|
|
|
|
7
|
$tag_prefix = $self->close_tag('dd'); |
|
2114
|
|
|
|
|
|
|
} |
|
2115
|
|
|
|
|
|
|
elsif ( $open_tag eq 'dd' |
|
2116
|
|
|
|
|
|
|
and $in_tag eq 'dl' |
|
2117
|
|
|
|
|
|
|
and $args{tag_type} == TAG_END) |
|
2118
|
|
|
|
|
|
|
{ |
|
2119
|
|
|
|
|
|
|
# close the DD before the list closes |
|
2120
|
5
|
|
|
|
|
18
|
$tag_prefix = $self->close_tag('dd'); |
|
2121
|
|
|
|
|
|
|
} |
|
2122
|
|
|
|
|
|
|
} |
|
2123
|
|
|
|
|
|
|
|
|
2124
|
3522
|
|
|
|
|
4376
|
my $out_tag = $in_tag; |
|
2125
|
3522
|
100
|
|
|
|
6095
|
if ($args{tag_type} == TAG_END) |
|
2126
|
|
|
|
|
|
|
{ |
|
2127
|
91
|
|
|
|
|
214
|
$out_tag = $self->close_tag($in_tag); |
|
2128
|
|
|
|
|
|
|
} |
|
2129
|
|
|
|
|
|
|
else |
|
2130
|
|
|
|
|
|
|
{ |
|
2131
|
3431
|
100
|
|
|
|
5754
|
if ($self->{lower_case_tags}) |
|
2132
|
|
|
|
|
|
|
{ |
|
2133
|
3146
|
|
|
|
|
3937
|
$out_tag =~ tr/A-Z/a-z/; |
|
2134
|
|
|
|
|
|
|
} |
|
2135
|
|
|
|
|
|
|
else # upper case |
|
2136
|
|
|
|
|
|
|
{ |
|
2137
|
285
|
|
|
|
|
349
|
$out_tag =~ tr/a-z/A-Z/; |
|
2138
|
|
|
|
|
|
|
} |
|
2139
|
3431
|
100
|
|
|
|
6029
|
if ($args{tag_type} == TAG_EMPTY) |
|
2140
|
|
|
|
|
|
|
{ |
|
2141
|
901
|
100
|
|
|
|
1701
|
if ($self->{xhtml}) |
|
2142
|
|
|
|
|
|
|
{ |
|
2143
|
821
|
|
|
|
|
1593
|
$out_tag = "<${out_tag}${inside_tag}/>"; |
|
2144
|
|
|
|
|
|
|
} |
|
2145
|
|
|
|
|
|
|
else |
|
2146
|
|
|
|
|
|
|
{ |
|
2147
|
80
|
|
|
|
|
138
|
$out_tag = "<${out_tag}${inside_tag}>"; |
|
2148
|
|
|
|
|
|
|
} |
|
2149
|
|
|
|
|
|
|
} |
|
2150
|
|
|
|
|
|
|
else |
|
2151
|
|
|
|
|
|
|
{ |
|
2152
|
2530
|
|
|
|
|
2428
|
push @{$self->{__tags}}, $in_tag; |
|
|
2530
|
|
|
|
|
4446
|
|
|
2153
|
2530
|
|
|
|
|
4531
|
$out_tag = "<${out_tag}${inside_tag}>"; |
|
2154
|
|
|
|
|
|
|
} |
|
2155
|
|
|
|
|
|
|
} |
|
2156
|
3522
|
100
|
|
|
|
6410
|
$out_tag = $tag_prefix . $out_tag if $tag_prefix; |
|
2157
|
3522
|
50
|
|
|
|
6492
|
if ($DictDebug & 8) |
|
2158
|
|
|
|
|
|
|
{ |
|
2159
|
0
|
|
|
|
|
0
|
print STDERR |
|
2160
|
|
|
|
|
|
|
"open_tag = '${open_tag}', in_tag = '${in_tag}', tag_type = ", |
|
2161
|
|
|
|
|
|
|
$args{tag_type}, |
|
2162
|
|
|
|
|
|
|
", inside_tag = '${inside_tag}', out_tag = '$out_tag'\n"; |
|
2163
|
|
|
|
|
|
|
} |
|
2164
|
|
|
|
|
|
|
|
|
2165
|
3522
|
|
|
|
|
10155
|
return $out_tag; |
|
2166
|
|
|
|
|
|
|
} # get_tag |
|
2167
|
|
|
|
|
|
|
|
|
2168
|
|
|
|
|
|
|
=head2 close_tag |
|
2169
|
|
|
|
|
|
|
|
|
2170
|
|
|
|
|
|
|
$tag = $self->close_tag($in_tag); |
|
2171
|
|
|
|
|
|
|
|
|
2172
|
|
|
|
|
|
|
close the open tag |
|
2173
|
|
|
|
|
|
|
|
|
2174
|
|
|
|
|
|
|
=cut |
|
2175
|
|
|
|
|
|
|
sub close_tag ($$) |
|
2176
|
|
|
|
|
|
|
{ |
|
2177
|
2459
|
|
|
2459
|
1
|
2994
|
my $self = shift; |
|
2178
|
2459
|
|
|
|
|
3094
|
my $in_tag = shift; |
|
2179
|
|
|
|
|
|
|
|
|
2180
|
2459
|
|
|
|
|
2550
|
my $open_tag = pop @{$self->{__tags}}; |
|
|
2459
|
|
|
|
|
4213
|
|
|
2181
|
2459
|
|
33
|
|
|
5079
|
$in_tag ||= $open_tag; |
|
2182
|
|
|
|
|
|
|
# put the open tag back on the stack if the in-tag is not the same |
|
2183
|
2459
|
100
|
66
|
|
|
9397
|
if (defined $open_tag && $open_tag ne $in_tag) |
|
2184
|
|
|
|
|
|
|
{ |
|
2185
|
15
|
|
|
|
|
18
|
push @{$self->{__tags}}, $open_tag; |
|
|
15
|
|
|
|
|
32
|
|
|
2186
|
|
|
|
|
|
|
} |
|
2187
|
2459
|
|
|
|
|
2973
|
my $out_tag = $in_tag; |
|
2188
|
2459
|
100
|
|
|
|
4356
|
if ($self->{lower_case_tags}) |
|
2189
|
|
|
|
|
|
|
{ |
|
2190
|
2324
|
|
|
|
|
2766
|
$out_tag =~ tr/A-Z/a-z/; |
|
2191
|
|
|
|
|
|
|
} |
|
2192
|
|
|
|
|
|
|
else # upper case |
|
2193
|
|
|
|
|
|
|
{ |
|
2194
|
135
|
|
|
|
|
150
|
$out_tag =~ tr/a-z/A-Z/; |
|
2195
|
|
|
|
|
|
|
} |
|
2196
|
2459
|
|
|
|
|
3625
|
$out_tag = "<\/${out_tag}>"; |
|
2197
|
2459
|
50
|
|
|
|
4428
|
if ($DictDebug & 8) |
|
2198
|
|
|
|
|
|
|
{ |
|
2199
|
0
|
|
|
|
|
0
|
print STDERR |
|
2200
|
|
|
|
|
|
|
"close_tag: open_tag = '${open_tag}', in_tag = '${in_tag}', out_tag = '$out_tag'\n"; |
|
2201
|
|
|
|
|
|
|
} |
|
2202
|
|
|
|
|
|
|
|
|
2203
|
2459
|
|
|
|
|
5471
|
return $out_tag; |
|
2204
|
|
|
|
|
|
|
} |
|
2205
|
|
|
|
|
|
|
|
|
2206
|
|
|
|
|
|
|
=head2 hrule |
|
2207
|
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
$self->hrule(para_lines_ref=>$para_lines, |
|
2209
|
|
|
|
|
|
|
para_action_ref=>$para_action, |
|
2210
|
|
|
|
|
|
|
ind=>0); |
|
2211
|
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
Deal with horizontal rules. |
|
2213
|
|
|
|
|
|
|
|
|
2214
|
|
|
|
|
|
|
=cut |
|
2215
|
|
|
|
|
|
|
sub hrule ($%) |
|
2216
|
|
|
|
|
|
|
{ |
|
2217
|
1086
|
|
|
1086
|
1
|
1329
|
my $self = shift; |
|
2218
|
1086
|
|
|
|
|
4362
|
my %args = ( |
|
2219
|
|
|
|
|
|
|
para_lines_ref => undef, |
|
2220
|
|
|
|
|
|
|
para_action_ref => undef, |
|
2221
|
|
|
|
|
|
|
ind => 0, |
|
2222
|
|
|
|
|
|
|
@_ |
|
2223
|
|
|
|
|
|
|
); |
|
2224
|
1086
|
|
|
|
|
1428
|
my $para_lines_ref = $args{para_lines_ref}; |
|
2225
|
1086
|
|
|
|
|
1434
|
my $para_action_ref = $args{para_action_ref}; |
|
2226
|
1086
|
|
|
|
|
1261
|
my $ind = $args{ind}; |
|
2227
|
|
|
|
|
|
|
|
|
2228
|
1086
|
|
|
|
|
1459
|
my $hrmin = $self->{hrule_min}; |
|
2229
|
1086
|
100
|
|
|
|
9486
|
if ($para_lines_ref->[$ind] =~ /^\s*([-_~=\*]\s*){$hrmin,}$/) |
|
|
|
100
|
|
|
|
|
|
|
2230
|
|
|
|
|
|
|
{ |
|
2231
|
18
|
|
|
|
|
57
|
my $tag = $self->get_tag("hr", tag_type => TAG_EMPTY); |
|
2232
|
18
|
|
|
|
|
42
|
$para_lines_ref->[$ind] = "$tag\n"; |
|
2233
|
18
|
|
|
|
|
65
|
$para_action_ref->[$ind] |= $HRULE; |
|
2234
|
|
|
|
|
|
|
} |
|
2235
|
|
|
|
|
|
|
elsif ($para_lines_ref->[$ind] =~ /\014/) |
|
2236
|
|
|
|
|
|
|
{ |
|
2237
|
|
|
|
|
|
|
# Linefeeds become horizontal rules |
|
2238
|
1
|
|
|
|
|
4
|
$para_action_ref->[$ind] |= $HRULE; |
|
2239
|
1
|
|
|
|
|
4
|
my $tag = $self->get_tag("hr", tag_type => TAG_EMPTY); |
|
2240
|
1
|
|
|
|
|
9
|
$para_lines_ref->[$ind] =~ s/\014/\n${tag}\n/g; |
|
2241
|
|
|
|
|
|
|
} |
|
2242
|
|
|
|
|
|
|
} |
|
2243
|
|
|
|
|
|
|
|
|
2244
|
|
|
|
|
|
|
=head2 shortline |
|
2245
|
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
$self->shortline(line_ref=>$line_ref, |
|
2247
|
|
|
|
|
|
|
line_action_ref=>$line_action_ref, |
|
2248
|
|
|
|
|
|
|
prev_ref=>$prev_ref, |
|
2249
|
|
|
|
|
|
|
prev_action_ref=>$prev_action_ref, |
|
2250
|
|
|
|
|
|
|
prev_line_len=>$prev_line_len); |
|
2251
|
|
|
|
|
|
|
|
|
2252
|
|
|
|
|
|
|
Deal with short lines. |
|
2253
|
|
|
|
|
|
|
|
|
2254
|
|
|
|
|
|
|
=cut |
|
2255
|
|
|
|
|
|
|
sub shortline ($%) |
|
2256
|
|
|
|
|
|
|
{ |
|
2257
|
802
|
|
|
802
|
1
|
989
|
my $self = shift; |
|
2258
|
802
|
|
|
|
|
4058
|
my %args = ( |
|
2259
|
|
|
|
|
|
|
line_ref => undef, |
|
2260
|
|
|
|
|
|
|
line_action_ref => undef, |
|
2261
|
|
|
|
|
|
|
prev_ref => undef, |
|
2262
|
|
|
|
|
|
|
prev_action_ref => undef, |
|
2263
|
|
|
|
|
|
|
prev_line_len => 0, |
|
2264
|
|
|
|
|
|
|
@_ |
|
2265
|
|
|
|
|
|
|
); |
|
2266
|
802
|
|
|
|
|
1147
|
my $mode_ref = $args{mode_ref}; |
|
2267
|
802
|
|
|
|
|
1095
|
my $line_ref = $args{line_ref}; |
|
2268
|
802
|
|
|
|
|
893
|
my $line_action_ref = $args{line_action_ref}; |
|
2269
|
802
|
|
|
|
|
898
|
my $prev_ref = $args{prev_ref}; |
|
2270
|
802
|
|
|
|
|
839
|
my $prev_action_ref = $args{prev_action_ref}; |
|
2271
|
802
|
|
|
|
|
2226
|
my $prev_line_len = $args{prev_line_len}; |
|
2272
|
|
|
|
|
|
|
|
|
2273
|
|
|
|
|
|
|
# Short lines should be broken even on list item lines iff the |
|
2274
|
|
|
|
|
|
|
# following line is more text. I haven't figured out how to do |
|
2275
|
|
|
|
|
|
|
# that yet. For now, I'll just not break on short lines in lists. |
|
2276
|
|
|
|
|
|
|
# (sorry) |
|
2277
|
|
|
|
|
|
|
|
|
2278
|
802
|
|
|
|
|
1592
|
my $tag = $self->get_tag('br', tag_type => TAG_EMPTY); |
|
2279
|
802
|
100
|
100
|
|
|
954
|
if ( |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2280
|
|
|
|
|
|
|
${$line_ref} !~ /^\s*$/ |
|
2281
|
320
|
|
|
|
|
1859
|
&& ${$prev_ref} !~ /^\s*$/ |
|
2282
|
|
|
|
|
|
|
&& ($prev_line_len < $self->{short_line_length}) |
|
2283
|
|
|
|
|
|
|
&& !( |
|
2284
|
54
|
|
|
|
|
188
|
${$line_action_ref} & |
|
2285
|
|
|
|
|
|
|
($END | $HEADER | $HRULE | $LIST | $IND_BREAK | $PAR) |
|
2286
|
|
|
|
|
|
|
) |
|
2287
|
|
|
|
|
|
|
&& !(${$prev_action_ref} & ($HEADER | $HRULE | $BREAK | $IND_BREAK)) |
|
2288
|
|
|
|
|
|
|
) |
|
2289
|
|
|
|
|
|
|
{ |
|
2290
|
48
|
|
|
|
|
53
|
${$prev_ref} .= $tag . chop(${$prev_ref}); |
|
|
48
|
|
|
|
|
73
|
|
|
|
48
|
|
|
|
|
108
|
|
|
2291
|
48
|
|
|
|
|
59
|
${$prev_action_ref} |= $BREAK; |
|
|
48
|
|
|
|
|
136
|
|
|
2292
|
|
|
|
|
|
|
} |
|
2293
|
|
|
|
|
|
|
} |
|
2294
|
|
|
|
|
|
|
|
|
2295
|
|
|
|
|
|
|
=head2 is_mailheader |
|
2296
|
|
|
|
|
|
|
|
|
2297
|
|
|
|
|
|
|
if ($self->is_mailheader(rows_ref=>$rows_ref)) |
|
2298
|
|
|
|
|
|
|
{ |
|
2299
|
|
|
|
|
|
|
... |
|
2300
|
|
|
|
|
|
|
} |
|
2301
|
|
|
|
|
|
|
|
|
2302
|
|
|
|
|
|
|
Is this a mailheader line? |
|
2303
|
|
|
|
|
|
|
|
|
2304
|
|
|
|
|
|
|
=cut |
|
2305
|
|
|
|
|
|
|
sub is_mailheader ($%) |
|
2306
|
|
|
|
|
|
|
{ |
|
2307
|
207
|
|
|
207
|
1
|
272
|
my $self = shift; |
|
2308
|
207
|
|
|
|
|
505
|
my %args = ( |
|
2309
|
|
|
|
|
|
|
rows_ref => undef, |
|
2310
|
|
|
|
|
|
|
@_ |
|
2311
|
|
|
|
|
|
|
); |
|
2312
|
207
|
|
|
|
|
272
|
my $rows_ref = $args{rows_ref}; |
|
2313
|
|
|
|
|
|
|
|
|
2314
|
|
|
|
|
|
|
# a mail header is assumed to be the whole |
|
2315
|
|
|
|
|
|
|
# paragraph which starts with a From , From: or Newsgroups: line |
|
2316
|
|
|
|
|
|
|
|
|
2317
|
207
|
100
|
|
|
|
1843
|
if ($rows_ref->[0] =~ /^(From:?)|(Newsgroups:) /) |
|
2318
|
|
|
|
|
|
|
{ |
|
2319
|
22
|
|
|
|
|
63
|
return 1; |
|
2320
|
|
|
|
|
|
|
} |
|
2321
|
185
|
|
|
|
|
386
|
return 0; |
|
2322
|
|
|
|
|
|
|
|
|
2323
|
|
|
|
|
|
|
} # is_mailheader |
|
2324
|
|
|
|
|
|
|
|
|
2325
|
|
|
|
|
|
|
=head2 mailheader |
|
2326
|
|
|
|
|
|
|
|
|
2327
|
|
|
|
|
|
|
$self->mailheader(rows_ref=>$rows_ref); |
|
2328
|
|
|
|
|
|
|
|
|
2329
|
|
|
|
|
|
|
Deal with a mailheader. |
|
2330
|
|
|
|
|
|
|
|
|
2331
|
|
|
|
|
|
|
=cut |
|
2332
|
|
|
|
|
|
|
sub mailheader ($%) |
|
2333
|
|
|
|
|
|
|
{ |
|
2334
|
11
|
|
|
11
|
1
|
18
|
my $self = shift; |
|
2335
|
11
|
|
|
|
|
35
|
my %args = ( |
|
2336
|
|
|
|
|
|
|
rows_ref => undef, |
|
2337
|
|
|
|
|
|
|
@_ |
|
2338
|
|
|
|
|
|
|
); |
|
2339
|
11
|
|
|
|
|
18
|
my $rows_ref = $args{rows_ref}; |
|
2340
|
|
|
|
|
|
|
|
|
2341
|
|
|
|
|
|
|
# a mail header is assumed to be the whole |
|
2342
|
|
|
|
|
|
|
# paragraph which starts with a From: or Newsgroups: line |
|
2343
|
11
|
|
|
|
|
19
|
my $tag = ''; |
|
2344
|
11
|
|
|
|
|
20
|
my @rows = @{$rows_ref}; |
|
|
11
|
|
|
|
|
34
|
|
|
2345
|
|
|
|
|
|
|
|
|
2346
|
11
|
50
|
|
|
|
46
|
if ($self->is_mailheader(%args)) |
|
2347
|
|
|
|
|
|
|
{ |
|
2348
|
11
|
|
|
|
|
21
|
$self->{__mode} |= $MAILHEADER; |
|
2349
|
11
|
50
|
|
|
|
36
|
if ($self->{escape_HTML_chars}) |
|
2350
|
|
|
|
|
|
|
{ |
|
2351
|
11
|
|
|
|
|
167
|
$rows[0] = escape($rows[0]); |
|
2352
|
|
|
|
|
|
|
} |
|
2353
|
11
|
|
|
|
|
50
|
$self->anchor_mail(\$rows[0]); |
|
2354
|
11
|
|
|
|
|
28
|
chomp ${rows}[0]; |
|
2355
|
11
|
|
|
|
|
46
|
$tag = $self->get_tag('p', inside_tag => " class='mail_header'"); |
|
2356
|
11
|
|
|
|
|
154
|
my $tag2 = $self->get_tag('br', tag_type => TAG_EMPTY); |
|
2357
|
11
|
|
|
|
|
36
|
$rows[0] = |
|
2358
|
|
|
|
|
|
|
join('', "\n", $tag, $rows[0], $tag2, "\n"); |
|
2359
|
|
|
|
|
|
|
# now put breaks on the rest of the paragraph |
|
2360
|
|
|
|
|
|
|
# apart from the last line |
|
2361
|
11
|
|
|
|
|
45
|
for (my $rn = 1; $rn < @rows; $rn++) |
|
2362
|
|
|
|
|
|
|
{ |
|
2363
|
43
|
50
|
|
|
|
98
|
if ($self->{escape_HTML_chars}) |
|
2364
|
|
|
|
|
|
|
{ |
|
2365
|
43
|
|
|
|
|
82
|
$rows[$rn] = escape($rows[$rn]); |
|
2366
|
|
|
|
|
|
|
} |
|
2367
|
43
|
100
|
|
|
|
129
|
if ($rn != (@rows - 1)) |
|
2368
|
|
|
|
|
|
|
{ |
|
2369
|
32
|
|
|
|
|
72
|
$tag = $self->get_tag('br', tag_type => TAG_EMPTY); |
|
2370
|
32
|
|
|
|
|
64
|
chomp $rows[$rn]; |
|
2371
|
32
|
|
|
|
|
196
|
$rows[$rn] =~ s/$/${tag}\n/; |
|
2372
|
|
|
|
|
|
|
} |
|
2373
|
|
|
|
|
|
|
} |
|
2374
|
|
|
|
|
|
|
} |
|
2375
|
11
|
|
|
|
|
25
|
@{$rows_ref} = @rows; |
|
|
11
|
|
|
|
|
68
|
|
|
2376
|
|
|
|
|
|
|
|
|
2377
|
|
|
|
|
|
|
} # mailheader |
|
2378
|
|
|
|
|
|
|
|
|
2379
|
|
|
|
|
|
|
=head2 mailquote |
|
2380
|
|
|
|
|
|
|
|
|
2381
|
|
|
|
|
|
|
$self->mailquote(line_ref=>$line_ref, |
|
2382
|
|
|
|
|
|
|
line_action_ref=>$line_action_ref, |
|
2383
|
|
|
|
|
|
|
prev_ref=>$prev_ref, |
|
2384
|
|
|
|
|
|
|
prev_action_ref=>$prev_action_ref, |
|
2385
|
|
|
|
|
|
|
next_ref=>$next_ref); |
|
2386
|
|
|
|
|
|
|
|
|
2387
|
|
|
|
|
|
|
Deal with quoted mail. |
|
2388
|
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
=cut |
|
2390
|
|
|
|
|
|
|
sub mailquote ($%) |
|
2391
|
|
|
|
|
|
|
{ |
|
2392
|
519
|
|
|
519
|
1
|
613
|
my $self = shift; |
|
2393
|
519
|
|
|
|
|
2426
|
my %args = ( |
|
2394
|
|
|
|
|
|
|
line_ref => undef, |
|
2395
|
|
|
|
|
|
|
line_action_ref => undef, |
|
2396
|
|
|
|
|
|
|
prev_ref => undef, |
|
2397
|
|
|
|
|
|
|
prev_action_ref => undef, |
|
2398
|
|
|
|
|
|
|
next_ref => undef, |
|
2399
|
|
|
|
|
|
|
@_ |
|
2400
|
|
|
|
|
|
|
); |
|
2401
|
519
|
|
|
|
|
667
|
my $line_ref = $args{line_ref}; |
|
2402
|
519
|
|
|
|
|
707
|
my $line_action_ref = $args{line_action_ref}; |
|
2403
|
519
|
|
|
|
|
566
|
my $prev_ref = $args{prev_ref}; |
|
2404
|
519
|
|
|
|
|
595
|
my $prev_action_ref = $args{prev_action_ref}; |
|
2405
|
519
|
|
|
|
|
579
|
my $next_ref = $args{next_ref}; |
|
2406
|
|
|
|
|
|
|
|
|
2407
|
519
|
|
|
|
|
611
|
my $tag = ''; |
|
2408
|
519
|
100
|
66
|
|
|
652
|
if ( |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
2409
|
|
|
|
|
|
|
( |
|
2410
|
|
|
|
|
|
|
(${$line_ref} =~ /^\w*>/) # Handle "FF> Werewolves." |
|
2411
|
24
|
|
|
|
|
97
|
|| (${$line_ref} =~ /^[\|:]/) |
|
2412
|
|
|
|
|
|
|
) # Handle "[|:] There wolves." |
|
2413
|
|
|
|
|
|
|
&& defined($next_ref) && (${$next_ref} !~ /^\s*$/) |
|
2414
|
|
|
|
|
|
|
) |
|
2415
|
|
|
|
|
|
|
{ |
|
2416
|
24
|
|
|
|
|
52
|
$tag = $self->get_tag('br', tag_type => TAG_EMPTY); |
|
2417
|
24
|
|
|
|
|
32
|
${$line_ref} =~ s/$/${tag}/; |
|
|
24
|
|
|
|
|
87
|
|
|
2418
|
24
|
|
|
|
|
31
|
${$line_action_ref} |= ($BREAK | $MAILQUOTE); |
|
|
24
|
|
|
|
|
31
|
|
|
2419
|
24
|
100
|
|
|
|
27
|
if (!(${$prev_action_ref} & ($BREAK | $MAILQUOTE))) |
|
|
24
|
|
|
|
|
102
|
|
|
2420
|
|
|
|
|
|
|
{ |
|
2421
|
10
|
|
|
|
|
24
|
$tag = $self->get_tag('p', inside_tag => " class='quote_mail'"); |
|
2422
|
10
|
|
|
|
|
12
|
${$prev_ref} .= $tag; |
|
|
10
|
|
|
|
|
16
|
|
|
2423
|
10
|
|
|
|
|
55
|
${$line_action_ref} |= $PAR; |
|
|
10
|
|
|
|
|
32
|
|
|
2424
|
|
|
|
|
|
|
} |
|
2425
|
|
|
|
|
|
|
} |
|
2426
|
|
|
|
|
|
|
} |
|
2427
|
|
|
|
|
|
|
|
|
2428
|
|
|
|
|
|
|
=head2 subtract_modes |
|
2429
|
|
|
|
|
|
|
|
|
2430
|
|
|
|
|
|
|
$newvector = subtract_modes($vector, $mask); |
|
2431
|
|
|
|
|
|
|
|
|
2432
|
|
|
|
|
|
|
Subtracts modes listed in $mask from $vector. |
|
2433
|
|
|
|
|
|
|
|
|
2434
|
|
|
|
|
|
|
=cut |
|
2435
|
|
|
|
|
|
|
sub subtract_modes ($$) |
|
2436
|
|
|
|
|
|
|
{ |
|
2437
|
1039
|
|
|
1039
|
1
|
1443
|
my ($vector, $mask) = @_; |
|
2438
|
1039
|
|
|
|
|
5131
|
return ($vector | $mask) - $mask; |
|
2439
|
|
|
|
|
|
|
} |
|
2440
|
|
|
|
|
|
|
|
|
2441
|
|
|
|
|
|
|
=head2 paragraph |
|
2442
|
|
|
|
|
|
|
|
|
2443
|
|
|
|
|
|
|
$self->paragraph(line_ref=>$line_ref, |
|
2444
|
|
|
|
|
|
|
line_action_ref=>$line_action_ref, |
|
2445
|
|
|
|
|
|
|
prev_ref=>$prev_ref, |
|
2446
|
|
|
|
|
|
|
prev_action_ref=>$prev_action_ref, |
|
2447
|
|
|
|
|
|
|
line_indent=>$line_indent, |
|
2448
|
|
|
|
|
|
|
prev_indent=>$prev_indent, |
|
2449
|
|
|
|
|
|
|
is_fragment=>$is_fragment, |
|
2450
|
|
|
|
|
|
|
ind=>$ind); |
|
2451
|
|
|
|
|
|
|
|
|
2452
|
|
|
|
|
|
|
Detect paragraph indentation. |
|
2453
|
|
|
|
|
|
|
|
|
2454
|
|
|
|
|
|
|
=cut |
|
2455
|
|
|
|
|
|
|
sub paragraph ($%) |
|
2456
|
|
|
|
|
|
|
{ |
|
2457
|
1065
|
|
|
1065
|
1
|
1352
|
my $self = shift; |
|
2458
|
1065
|
|
|
|
|
9948
|
my %args = ( |
|
2459
|
|
|
|
|
|
|
line_ref => undef, |
|
2460
|
|
|
|
|
|
|
line_action_ref => undef, |
|
2461
|
|
|
|
|
|
|
prev_ref => undef, |
|
2462
|
|
|
|
|
|
|
prev_action_ref => undef, |
|
2463
|
|
|
|
|
|
|
line_indent => 0, |
|
2464
|
|
|
|
|
|
|
prev_indent => 0, |
|
2465
|
|
|
|
|
|
|
is_fragment => 0, |
|
2466
|
|
|
|
|
|
|
ind => 0, |
|
2467
|
|
|
|
|
|
|
@_ |
|
2468
|
|
|
|
|
|
|
); |
|
2469
|
1065
|
|
|
|
|
1403
|
my $line_ref = $args{line_ref}; |
|
2470
|
1065
|
|
|
|
|
1180
|
my $line_action_ref = $args{line_action_ref}; |
|
2471
|
1065
|
|
|
|
|
1237
|
my $prev_ref = $args{prev_ref}; |
|
2472
|
1065
|
|
|
|
|
1221
|
my $prev_action_ref = $args{prev_action_ref}; |
|
2473
|
1065
|
|
|
|
|
1156
|
my $line_indent = $args{line_indent}; |
|
2474
|
1065
|
|
|
|
|
1172
|
my $prev_indent = $args{prev_indent}; |
|
2475
|
1065
|
|
|
|
|
1082
|
my $is_fragment = $args{is_fragment}; |
|
2476
|
1065
|
|
|
|
|
1235
|
my $line_no = $args{ind}; |
|
2477
|
|
|
|
|
|
|
|
|
2478
|
1065
|
|
|
|
|
1248
|
my $tag = ''; |
|
2479
|
1065
|
100
|
100
|
|
|
1115
|
if ( |
|
|
|
50
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2480
|
|
|
|
|
|
|
${$line_ref} !~ /^\s*$/ |
|
2481
|
|
|
|
|
|
|
&& !subtract_modes( |
|
2482
|
|
|
|
|
|
|
${$line_action_ref}, $END | $MAILQUOTE | $CAPS | $BREAK |
|
2483
|
|
|
|
|
|
|
) |
|
2484
|
|
|
|
|
|
|
&& ( ${$prev_ref} =~ /^\s*$/ |
|
2485
|
|
|
|
|
|
|
|| (${$line_action_ref} & $END) |
|
2486
|
|
|
|
|
|
|
|| ($line_indent > $prev_indent + $self->{par_indent})) |
|
2487
|
|
|
|
|
|
|
&& !($is_fragment && $line_no == 0) |
|
2488
|
|
|
|
|
|
|
) |
|
2489
|
0
|
|
|
|
|
0
|
{ |
|
2490
|
|
|
|
|
|
|
|
|
2491
|
259
|
50
|
33
|
|
|
1352
|
if ( $self->{indent_par_break} |
|
|
0
|
50
|
33
|
|
|
0
|
|
|
|
|
|
0
|
|
|
|
|
|
2492
|
0
|
|
|
|
|
0
|
&& ${$prev_ref} !~ /^\s*$/ |
|
2493
|
|
|
|
|
|
|
&& !(${$line_action_ref} & $END) |
|
2494
|
|
|
|
|
|
|
&& ($line_indent > $prev_indent + $self->{par_indent})) |
|
2495
|
|
|
|
|
|
|
{ |
|
2496
|
0
|
|
|
|
|
0
|
$tag = $self->get_tag('br', tag_type => TAG_EMPTY); |
|
2497
|
0
|
|
|
|
|
0
|
${$prev_ref} .= $tag; |
|
|
0
|
|
|
|
|
0
|
|
|
2498
|
0
|
|
|
|
|
0
|
${$prev_ref} .= " " x $line_indent; |
|
|
0
|
|
|
|
|
0
|
|
|
2499
|
0
|
|
|
|
|
0
|
${$line_ref} =~ s/^ {$line_indent}//; |
|
|
0
|
|
|
|
|
0
|
|
|
2500
|
0
|
|
|
|
|
0
|
${$prev_action_ref} |= $BREAK; |
|
|
0
|
|
|
|
|
0
|
|
|
2501
|
0
|
|
|
|
|
0
|
${$line_action_ref} |= $IND_BREAK; |
|
|
0
|
|
|
|
|
0
|
|
|
2502
|
|
|
|
|
|
|
} |
|
2503
|
|
|
|
|
|
|
elsif ($self->{preserve_indent}) |
|
2504
|
|
|
|
|
|
|
{ |
|
2505
|
0
|
|
|
|
|
0
|
$tag = $self->get_tag('p'); |
|
2506
|
0
|
|
|
|
|
0
|
${$prev_ref} .= $tag; |
|
|
0
|
|
|
|
|
0
|
|
|
2507
|
0
|
|
|
|
|
0
|
${$prev_ref} .= " " x $line_indent; |
|
|
0
|
|
|
|
|
0
|
|
|
2508
|
0
|
|
|
|
|
0
|
${$line_ref} =~ s/^ {$line_indent}//; |
|
|
0
|
|
|
|
|
0
|
|
|
2509
|
0
|
|
|
|
|
0
|
${$line_action_ref} |= $PAR; |
|
|
0
|
|
|
|
|
0
|
|
|
2510
|
|
|
|
|
|
|
} |
|
2511
|
|
|
|
|
|
|
else |
|
2512
|
|
|
|
|
|
|
{ |
|
2513
|
259
|
|
|
|
|
744
|
$tag = $self->get_tag('p'); |
|
2514
|
259
|
|
|
|
|
329
|
${$prev_ref} .= $tag; |
|
|
259
|
|
|
|
|
480
|
|
|
2515
|
259
|
|
|
|
|
334
|
${$line_action_ref} |= $PAR; |
|
|
259
|
|
|
|
|
981
|
|
|
2516
|
|
|
|
|
|
|
} |
|
2517
|
|
|
|
|
|
|
} |
|
2518
|
|
|
|
|
|
|
# detect also a continuing indentation at the same level |
|
2519
|
|
|
|
|
|
|
elsif ($self->{indent_par_break} |
|
2520
|
|
|
|
|
|
|
&& !($self->{__mode} & ($PRE | $TABLE | $LIST)) |
|
2521
|
0
|
|
|
|
|
0
|
&& ${$prev_ref} !~ /^\s*$/ |
|
2522
|
0
|
|
|
|
|
0
|
&& !(${$line_action_ref} & $END) |
|
2523
|
0
|
|
|
|
|
0
|
&& (${$prev_action_ref} & ($IND_BREAK | $PAR)) |
|
2524
|
|
|
|
|
|
|
&& !subtract_modes(${$line_action_ref}, $END | $MAILQUOTE | $CAPS) |
|
2525
|
|
|
|
|
|
|
&& ($line_indent > $self->{par_indent}) |
|
2526
|
|
|
|
|
|
|
&& ($line_indent == $prev_indent)) |
|
2527
|
|
|
|
|
|
|
{ |
|
2528
|
0
|
|
|
|
|
0
|
$tag = $self->get_tag('br', tag_type => TAG_EMPTY); |
|
2529
|
0
|
|
|
|
|
0
|
${$prev_ref} .= $tag; |
|
|
0
|
|
|
|
|
0
|
|
|
2530
|
0
|
|
|
|
|
0
|
${$prev_ref} .= " " x $line_indent; |
|
|
0
|
|
|
|
|
0
|
|
|
2531
|
0
|
|
|
|
|
0
|
${$line_ref} =~ s/^ {$line_indent}//; |
|
|
0
|
|
|
|
|
0
|
|
|
2532
|
0
|
|
|
|
|
0
|
${$prev_action_ref} |= $BREAK; |
|
|
0
|
|
|
|
|
0
|
|
|
2533
|
0
|
|
|
|
|
0
|
${$line_action_ref} |= $IND_BREAK; |
|
|
0
|
|
|
|
|
0
|
|
|
2534
|
|
|
|
|
|
|
} |
|
2535
|
|
|
|
|
|
|
} |
|
2536
|
|
|
|
|
|
|
|
|
2537
|
|
|
|
|
|
|
=head2 listprefix |
|
2538
|
|
|
|
|
|
|
|
|
2539
|
|
|
|
|
|
|
($prefix, $number, $rawprefix, $term) = $self->listprefix($line); |
|
2540
|
|
|
|
|
|
|
|
|
2541
|
|
|
|
|
|
|
Detect and parse a list item. |
|
2542
|
|
|
|
|
|
|
|
|
2543
|
|
|
|
|
|
|
=cut |
|
2544
|
|
|
|
|
|
|
sub listprefix ($$) |
|
2545
|
|
|
|
|
|
|
{ |
|
2546
|
1327
|
|
|
1327
|
1
|
1546
|
my $self = shift; |
|
2547
|
1327
|
|
|
|
|
1774
|
my $line = shift; |
|
2548
|
|
|
|
|
|
|
|
|
2549
|
1327
|
|
|
|
|
1420
|
my ($prefix, $number, $rawprefix, $term); |
|
2550
|
|
|
|
|
|
|
|
|
2551
|
1327
|
|
|
|
|
2007
|
my $bullets = $self->{bullets}; |
|
2552
|
1327
|
|
|
|
|
1745
|
my $bullets_ordered = $self->{bullets_ordered}; |
|
2553
|
1327
|
|
|
|
|
1637
|
my $number_match = '(\d+|[^\W\d])'; |
|
2554
|
1327
|
100
|
|
|
|
2513
|
if ($bullets_ordered) |
|
2555
|
|
|
|
|
|
|
{ |
|
2556
|
11
|
|
|
|
|
26
|
$number_match = '(\d+|[[:alpha:]]|[' . "${bullets_ordered}])"; |
|
2557
|
|
|
|
|
|
|
} |
|
2558
|
1327
|
|
|
|
|
2061
|
$self->{__number_match} = $number_match; |
|
2559
|
1327
|
|
|
|
|
1466
|
my $term_match = '(\w\w+)'; |
|
2560
|
1327
|
|
|
|
|
1701
|
$self->{__term_match} = $term_match; |
|
2561
|
1327
|
100
|
66
|
|
|
25180
|
return (0, 0, 0, 0) |
|
|
|
|
66
|
|
|
|
|
|
2562
|
|
|
|
|
|
|
if ( !($line =~ /^\s*[${bullets}]\s+\S/) |
|
2563
|
|
|
|
|
|
|
&& !($line =~ /^\s*${number_match}[\.\)\]:]\s+\S/) |
|
2564
|
|
|
|
|
|
|
&& !($line =~ /^\s*${term_match}:$/)); |
|
2565
|
|
|
|
|
|
|
|
|
2566
|
204
|
|
|
|
|
925
|
($term) = $line =~ /^\s*${term_match}:$/; |
|
2567
|
204
|
|
|
|
|
5893
|
($number) = $line =~ /^\s*${number_match}\S\s+\S/; |
|
2568
|
204
|
100
|
|
|
|
2020
|
$number = 0 unless defined($number); |
|
2569
|
204
|
100
|
100
|
|
|
679
|
if ( $bullets_ordered |
|
2570
|
|
|
|
|
|
|
&& $number =~ /[${bullets_ordered}]/) |
|
2571
|
|
|
|
|
|
|
{ |
|
2572
|
4
|
|
|
|
|
8
|
$number = 1; |
|
2573
|
|
|
|
|
|
|
} |
|
2574
|
|
|
|
|
|
|
|
|
2575
|
|
|
|
|
|
|
# That slippery exception of "o" as a bullet |
|
2576
|
|
|
|
|
|
|
# (This ought to be determined using the context of what lists |
|
2577
|
|
|
|
|
|
|
# we have in progress, but this will probably work well enough.) |
|
2578
|
204
|
100
|
66
|
|
|
1213
|
if ($bullets =~ /o/ && $line =~ /^\s*o\s/) |
|
2579
|
|
|
|
|
|
|
{ |
|
2580
|
8
|
|
|
|
|
13
|
$number = 0; |
|
2581
|
|
|
|
|
|
|
} |
|
2582
|
|
|
|
|
|
|
|
|
2583
|
204
|
100
|
|
|
|
512
|
if ($term) |
|
|
|
100
|
|
|
|
|
|
|
2584
|
|
|
|
|
|
|
{ |
|
2585
|
14
|
|
|
|
|
295
|
($rawprefix) = $line =~ /^(\s*${term_match}.)$/; |
|
2586
|
14
|
|
|
|
|
36
|
$prefix = $rawprefix; |
|
2587
|
14
|
|
|
|
|
76
|
$prefix =~ s/${term_match}//; # Take the term out |
|
2588
|
|
|
|
|
|
|
} |
|
2589
|
|
|
|
|
|
|
elsif ($number) |
|
2590
|
|
|
|
|
|
|
{ |
|
2591
|
75
|
|
|
|
|
4423
|
($rawprefix) = $line =~ /^(\s*${number_match}.)/; |
|
2592
|
75
|
|
|
|
|
1921
|
$prefix = $rawprefix; |
|
2593
|
75
|
|
|
|
|
3823
|
$prefix =~ s/${number_match}//; # Take the number out |
|
2594
|
|
|
|
|
|
|
} |
|
2595
|
|
|
|
|
|
|
else |
|
2596
|
|
|
|
|
|
|
{ |
|
2597
|
115
|
|
|
|
|
756
|
($rawprefix) = $line =~ /^(\s*[${bullets}].)/; |
|
2598
|
115
|
|
|
|
|
231
|
$prefix = $rawprefix; |
|
2599
|
|
|
|
|
|
|
} |
|
2600
|
204
|
|
|
|
|
2511
|
($prefix, $number, $rawprefix, $term); |
|
2601
|
|
|
|
|
|
|
} # listprefix |
|
2602
|
|
|
|
|
|
|
|
|
2603
|
|
|
|
|
|
|
=head2 startlist |
|
2604
|
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
$self->startlist(prefix=>$prefix, |
|
2606
|
|
|
|
|
|
|
number=>0, |
|
2607
|
|
|
|
|
|
|
rawprefix=>$rawprefix, |
|
2608
|
|
|
|
|
|
|
term=>$term, |
|
2609
|
|
|
|
|
|
|
para_lines_ref=>$para_lines_ref, |
|
2610
|
|
|
|
|
|
|
para_action_ref=>$para_action_ref, |
|
2611
|
|
|
|
|
|
|
ind=>0, |
|
2612
|
|
|
|
|
|
|
prev_ref=>$prev_ref, |
|
2613
|
|
|
|
|
|
|
total_prefix=>$total_prefix); |
|
2614
|
|
|
|
|
|
|
|
|
2615
|
|
|
|
|
|
|
Start a list. |
|
2616
|
|
|
|
|
|
|
|
|
2617
|
|
|
|
|
|
|
=cut |
|
2618
|
|
|
|
|
|
|
sub startlist ($%) |
|
2619
|
|
|
|
|
|
|
{ |
|
2620
|
68
|
|
|
68
|
1
|
95
|
my $self = shift; |
|
2621
|
68
|
|
|
|
|
736
|
my %args = ( |
|
2622
|
|
|
|
|
|
|
prefix => '', |
|
2623
|
|
|
|
|
|
|
number => 0, |
|
2624
|
|
|
|
|
|
|
rawprefix => '', |
|
2625
|
|
|
|
|
|
|
term => '', |
|
2626
|
|
|
|
|
|
|
para_lines_ref => undef, |
|
2627
|
|
|
|
|
|
|
para_action_ref => undef, |
|
2628
|
|
|
|
|
|
|
ind => 0, |
|
2629
|
|
|
|
|
|
|
prev_ref => undef, |
|
2630
|
|
|
|
|
|
|
total_prefix => '', |
|
2631
|
|
|
|
|
|
|
@_ |
|
2632
|
|
|
|
|
|
|
); |
|
2633
|
68
|
|
|
|
|
129
|
my $prefix = $args{prefix}; |
|
2634
|
68
|
|
|
|
|
120
|
my $number = $args{number}; |
|
2635
|
68
|
|
|
|
|
100
|
my $rawprefix = $args{rawprefix}; |
|
2636
|
68
|
|
|
|
|
101
|
my $term = $args{term}; |
|
2637
|
68
|
|
|
|
|
95
|
my $para_lines_ref = $args{para_lines_ref}; |
|
2638
|
68
|
|
|
|
|
94
|
my $para_action_ref = $args{para_action_ref}; |
|
2639
|
68
|
|
|
|
|
86
|
my $ind = $args{ind}; |
|
2640
|
68
|
|
|
|
|
101
|
my $prev_ref = $args{prev_ref}; |
|
2641
|
|
|
|
|
|
|
|
|
2642
|
68
|
|
|
|
|
95
|
my $tag = ''; |
|
2643
|
68
|
|
|
|
|
171
|
$self->{__listprefix}->[$self->{__listnum}] = $prefix; |
|
2644
|
68
|
100
|
|
|
|
179
|
if ($number) |
|
|
|
100
|
|
|
|
|
|
|
2645
|
|
|
|
|
|
|
{ |
|
2646
|
|
|
|
|
|
|
|
|
2647
|
|
|
|
|
|
|
# It doesn't start with 1,a,A. Let's not screw with it. |
|
2648
|
28
|
100
|
100
|
|
|
159
|
if (($number ne "1") && ($number ne "a") && ($number ne "A")) |
|
|
|
|
100
|
|
|
|
|
|
2649
|
|
|
|
|
|
|
{ |
|
2650
|
4
|
|
|
|
|
19
|
return 0; |
|
2651
|
|
|
|
|
|
|
} |
|
2652
|
24
|
|
|
|
|
62
|
$tag = $self->get_tag('ol'); |
|
2653
|
24
|
|
|
|
|
40
|
${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n"); |
|
|
24
|
|
|
|
|
81
|
|
|
2654
|
24
|
|
|
|
|
63
|
$self->{__list}->[$self->{__listnum}] = $OL; |
|
2655
|
|
|
|
|
|
|
} |
|
2656
|
|
|
|
|
|
|
elsif ($term) |
|
2657
|
|
|
|
|
|
|
{ |
|
2658
|
6
|
|
|
|
|
21
|
$tag = $self->get_tag('dl'); |
|
2659
|
6
|
|
|
|
|
11
|
${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n"); |
|
|
6
|
|
|
|
|
23
|
|
|
2660
|
6
|
|
|
|
|
22
|
$self->{__list}->[$self->{__listnum}] = $DL; |
|
2661
|
|
|
|
|
|
|
} |
|
2662
|
|
|
|
|
|
|
else |
|
2663
|
|
|
|
|
|
|
{ |
|
2664
|
34
|
|
|
|
|
85
|
$tag = $self->get_tag('ul'); |
|
2665
|
34
|
|
|
|
|
54
|
${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n"); |
|
|
34
|
|
|
|
|
104
|
|
|
2666
|
34
|
|
|
|
|
90
|
$self->{__list}->[$self->{__listnum}] = $UL; |
|
2667
|
|
|
|
|
|
|
} |
|
2668
|
|
|
|
|
|
|
|
|
2669
|
64
|
|
|
|
|
171
|
$self->{__list_indent}->[$self->{__listnum}] = length($args{total_prefix}); |
|
2670
|
64
|
|
|
|
|
98
|
$self->{__listnum}++; |
|
2671
|
64
|
|
|
|
|
181
|
$self->{__list_nice_indent} = |
|
2672
|
|
|
|
|
|
|
" " x $self->{__listnum} x $self->{indent_width}; |
|
2673
|
64
|
|
|
|
|
109
|
$para_action_ref->[$ind] |= $LIST; |
|
2674
|
64
|
|
|
|
|
95
|
$para_action_ref->[$ind] |= $LIST_START; |
|
2675
|
64
|
|
|
|
|
90
|
$self->{__mode} |= $LIST; |
|
2676
|
64
|
|
|
|
|
255
|
1; |
|
2677
|
|
|
|
|
|
|
} # startlist |
|
2678
|
|
|
|
|
|
|
|
|
2679
|
|
|
|
|
|
|
=head2 endlist |
|
2680
|
|
|
|
|
|
|
|
|
2681
|
|
|
|
|
|
|
$self->endlist(num_lists=>0, |
|
2682
|
|
|
|
|
|
|
prev_ref=>$prev_ref, |
|
2683
|
|
|
|
|
|
|
line_action_ref=>$line_action_ref); |
|
2684
|
|
|
|
|
|
|
|
|
2685
|
|
|
|
|
|
|
End N lists |
|
2686
|
|
|
|
|
|
|
|
|
2687
|
|
|
|
|
|
|
=cut |
|
2688
|
|
|
|
|
|
|
sub endlist ($%) |
|
2689
|
|
|
|
|
|
|
{ |
|
2690
|
53
|
|
|
53
|
1
|
82
|
my $self = shift; |
|
2691
|
53
|
|
|
|
|
285
|
my %args = ( |
|
2692
|
|
|
|
|
|
|
num_lists => 0, |
|
2693
|
|
|
|
|
|
|
prev_ref => undef, |
|
2694
|
|
|
|
|
|
|
line_action_ref => undef, |
|
2695
|
|
|
|
|
|
|
@_ |
|
2696
|
|
|
|
|
|
|
); |
|
2697
|
53
|
|
|
|
|
96
|
my $n = $args{num_lists}; |
|
2698
|
53
|
|
|
|
|
2368
|
my $prev_ref = $args{prev_ref}; |
|
2699
|
53
|
|
|
|
|
271
|
my $line_action_ref = $args{line_action_ref}; |
|
2700
|
|
|
|
|
|
|
|
|
2701
|
53
|
|
|
|
|
88
|
my $tag = ''; |
|
2702
|
53
|
|
|
|
|
148
|
for (; $n > 0; $n--, $self->{__listnum}--) |
|
2703
|
|
|
|
|
|
|
{ |
|
2704
|
64
|
|
|
|
|
207
|
$self->{__list_nice_indent} = |
|
2705
|
|
|
|
|
|
|
" " x ($self->{__listnum} - 1) x $self->{indent_width}; |
|
2706
|
64
|
100
|
|
|
|
338
|
if ($self->{__list}->[$self->{__listnum} - 1] == $UL) |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
2707
|
|
|
|
|
|
|
{ |
|
2708
|
34
|
|
|
|
|
88
|
$tag = $self->get_tag('ul', tag_type => TAG_END); |
|
2709
|
34
|
|
|
|
|
45
|
${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n"); |
|
|
34
|
|
|
|
|
114
|
|
|
2710
|
34
|
|
|
|
|
231
|
pop @{$self->{__list_indent}}; |
|
|
34
|
|
|
|
|
152
|
|
|
2711
|
|
|
|
|
|
|
} |
|
2712
|
|
|
|
|
|
|
elsif ($self->{__list}->[$self->{__listnum} - 1] == $OL) |
|
2713
|
|
|
|
|
|
|
{ |
|
2714
|
24
|
|
|
|
|
65
|
$tag = $self->get_tag('ol', tag_type => TAG_END); |
|
2715
|
24
|
|
|
|
|
37
|
${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n"); |
|
|
24
|
|
|
|
|
75
|
|
|
2716
|
24
|
|
|
|
|
38
|
pop @{$self->{__list_indent}}; |
|
|
24
|
|
|
|
|
93
|
|
|
2717
|
|
|
|
|
|
|
} |
|
2718
|
|
|
|
|
|
|
elsif ($self->{__list}->[$self->{__listnum} - 1] == $DL) |
|
2719
|
|
|
|
|
|
|
{ |
|
2720
|
6
|
|
|
|
|
20
|
$tag = $self->get_tag('dl', tag_type => TAG_END); |
|
2721
|
6
|
|
|
|
|
10
|
${$prev_ref} .= join('', $self->{__list_nice_indent}, $tag, "\n"); |
|
|
6
|
|
|
|
|
23
|
|
|
2722
|
6
|
|
|
|
|
10
|
pop @{$self->{__list_indent}}; |
|
|
6
|
|
|
|
|
24
|
|
|
2723
|
|
|
|
|
|
|
} |
|
2724
|
|
|
|
|
|
|
else |
|
2725
|
|
|
|
|
|
|
{ |
|
2726
|
0
|
|
|
|
|
0
|
print STDERR "Encountered list of unknown type\n"; |
|
2727
|
|
|
|
|
|
|
} |
|
2728
|
|
|
|
|
|
|
} |
|
2729
|
53
|
|
|
|
|
69
|
${$line_action_ref} |= $END; |
|
|
53
|
|
|
|
|
82
|
|
|
2730
|
53
|
100
|
|
|
|
225
|
$self->{__mode} ^= $LIST if (!$self->{__listnum}); |
|
2731
|
|
|
|
|
|
|
} # endlist |
|
2732
|
|
|
|
|
|
|
|
|
2733
|
|
|
|
|
|
|
=head2 continuelist |
|
2734
|
|
|
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
$self->continuelist(para_lines_ref=>$para_lines_ref, |
|
2736
|
|
|
|
|
|
|
para_action_ref=>$para_action_ref, |
|
2737
|
|
|
|
|
|
|
ind=>0, |
|
2738
|
|
|
|
|
|
|
term=>$term); |
|
2739
|
|
|
|
|
|
|
|
|
2740
|
|
|
|
|
|
|
Continue a list. |
|
2741
|
|
|
|
|
|
|
|
|
2742
|
|
|
|
|
|
|
=cut |
|
2743
|
|
|
|
|
|
|
sub continuelist ($%) |
|
2744
|
|
|
|
|
|
|
{ |
|
2745
|
158
|
|
|
158
|
1
|
185
|
my $self = shift; |
|
2746
|
158
|
|
|
|
|
796
|
my %args = ( |
|
2747
|
|
|
|
|
|
|
para_lines_ref => undef, |
|
2748
|
|
|
|
|
|
|
para_action_ref => undef, |
|
2749
|
|
|
|
|
|
|
ind => 0, |
|
2750
|
|
|
|
|
|
|
term => '', |
|
2751
|
|
|
|
|
|
|
@_ |
|
2752
|
|
|
|
|
|
|
); |
|
2753
|
158
|
|
|
|
|
214
|
my $para_lines_ref = $args{para_lines_ref}; |
|
2754
|
158
|
|
|
|
|
199
|
my $para_action_ref = $args{para_action_ref}; |
|
2755
|
158
|
|
|
|
|
197
|
my $ind = $args{ind}; |
|
2756
|
158
|
|
|
|
|
194
|
my $term = $args{term}; |
|
2757
|
|
|
|
|
|
|
|
|
2758
|
158
|
|
|
|
|
294
|
my $list_indent = $self->{__list_nice_indent}; |
|
2759
|
158
|
|
|
|
|
216
|
my $bullets = $self->{bullets}; |
|
2760
|
158
|
|
|
|
|
204
|
my $num_match = $self->{__number_match}; |
|
2761
|
158
|
|
|
|
|
197
|
my $term_match = $self->{__term_match}; |
|
2762
|
158
|
|
|
|
|
187
|
my $tag = ''; |
|
2763
|
158
|
100
|
66
|
|
|
1158
|
if ( $self->{__list}->[$self->{__listnum} - 1] == $UL |
|
2764
|
|
|
|
|
|
|
&& $para_lines_ref->[$ind] =~ /^\s*[${bullets}]\s*/) |
|
2765
|
|
|
|
|
|
|
{ |
|
2766
|
88
|
|
|
|
|
188
|
$tag = $self->get_tag('li'); |
|
2767
|
88
|
|
|
|
|
718
|
$para_lines_ref->[$ind] =~ s/^\s*[${bullets}]\s*/${list_indent}${tag}/; |
|
2768
|
88
|
|
|
|
|
218
|
$para_action_ref->[$ind] |= $LIST_ITEM; |
|
2769
|
|
|
|
|
|
|
} |
|
2770
|
158
|
100
|
|
|
|
445
|
if ($self->{__list}->[$self->{__listnum} - 1] == $OL) |
|
2771
|
|
|
|
|
|
|
{ |
|
2772
|
61
|
|
|
|
|
135
|
$tag = $self->get_tag('li'); |
|
2773
|
61
|
|
|
|
|
8672
|
$para_lines_ref->[$ind] =~ s/^\s*${num_match}.\s*/${list_indent}${tag}/; |
|
2774
|
61
|
|
|
|
|
2135
|
$para_action_ref->[$ind] |= $LIST_ITEM; |
|
2775
|
|
|
|
|
|
|
} |
|
2776
|
158
|
100
|
66
|
|
|
560
|
if ( $self->{__list}->[$self->{__listnum} - 1] == $DL |
|
2777
|
|
|
|
|
|
|
&& $term) |
|
2778
|
|
|
|
|
|
|
{ |
|
2779
|
9
|
|
|
|
|
29
|
$tag = $self->get_tag('dt'); |
|
2780
|
9
|
|
|
|
|
27
|
my $tag2 = $self->get_tag('dt', tag_type => TAG_END); |
|
2781
|
9
|
|
|
|
|
23
|
$term =~ s/_/ /g; # underscores are now spaces in the term |
|
2782
|
9
|
|
|
|
|
217
|
$para_lines_ref->[$ind] =~ |
|
2783
|
|
|
|
|
|
|
s/^\s*${term_match}.$/${list_indent}${tag}${term}${tag2}/; |
|
2784
|
9
|
|
|
|
|
42
|
$tag = $self->get_tag('dd'); |
|
2785
|
9
|
|
|
|
|
26
|
$para_lines_ref->[$ind] .= ${tag}; |
|
2786
|
9
|
|
|
|
|
21
|
$para_action_ref->[$ind] |= $LIST_ITEM; |
|
2787
|
|
|
|
|
|
|
} |
|
2788
|
158
|
|
|
|
|
488
|
$para_action_ref->[$ind] |= $LIST; |
|
2789
|
|
|
|
|
|
|
} # continuelist |
|
2790
|
|
|
|
|
|
|
|
|
2791
|
|
|
|
|
|
|
=head2 liststuff |
|
2792
|
|
|
|
|
|
|
|
|
2793
|
|
|
|
|
|
|
$self->liststuff(para_lines_ref=>$para_lines_ref, |
|
2794
|
|
|
|
|
|
|
para_action_ref=>$para_action_ref, |
|
2795
|
|
|
|
|
|
|
para_line_indent_ref=>$para_line_indent_ref, |
|
2796
|
|
|
|
|
|
|
ind=>0, |
|
2797
|
|
|
|
|
|
|
prev_ref=>$prev_ref); |
|
2798
|
|
|
|
|
|
|
|
|
2799
|
|
|
|
|
|
|
Process a list (higher-level method). |
|
2800
|
|
|
|
|
|
|
|
|
2801
|
|
|
|
|
|
|
=cut |
|
2802
|
|
|
|
|
|
|
sub liststuff ($%) |
|
2803
|
|
|
|
|
|
|
{ |
|
2804
|
1060
|
|
|
1060
|
1
|
1470
|
my $self = shift; |
|
2805
|
1060
|
|
|
|
|
5527
|
my %args = ( |
|
2806
|
|
|
|
|
|
|
para_lines_ref => undef, |
|
2807
|
|
|
|
|
|
|
para_action_ref => undef, |
|
2808
|
|
|
|
|
|
|
para_line_indent_ref => undef, |
|
2809
|
|
|
|
|
|
|
ind => 0, |
|
2810
|
|
|
|
|
|
|
prev_ref => undef, |
|
2811
|
|
|
|
|
|
|
@_ |
|
2812
|
|
|
|
|
|
|
); |
|
2813
|
1060
|
|
|
|
|
1472
|
my $para_lines_ref = $args{para_lines_ref}; |
|
2814
|
1060
|
|
|
|
|
1286
|
my $para_action_ref = $args{para_action_ref}; |
|
2815
|
1060
|
|
|
|
|
1359
|
my $para_line_indent_ref = $args{para_line_indent_ref}; |
|
2816
|
1060
|
|
|
|
|
1224
|
my $ind = $args{ind}; |
|
2817
|
1060
|
|
|
|
|
1143
|
my $prev_ref = $args{prev_ref}; |
|
2818
|
|
|
|
|
|
|
|
|
2819
|
1060
|
|
|
|
|
1198
|
my $i; |
|
2820
|
|
|
|
|
|
|
|
|
2821
|
1060
|
|
|
|
|
2335
|
my ($prefix, $number, $rawprefix, $term) = |
|
2822
|
|
|
|
|
|
|
$self->listprefix($para_lines_ref->[$ind]); |
|
2823
|
|
|
|
|
|
|
|
|
2824
|
1060
|
100
|
|
|
|
2961
|
if (!$prefix) |
|
2825
|
|
|
|
|
|
|
{ |
|
2826
|
|
|
|
|
|
|
# if the previous line is not blank |
|
2827
|
896
|
100
|
100
|
|
|
4267
|
if ($ind > 0 && $para_lines_ref->[$ind - 1] !~ /^\s*$/) |
|
2828
|
|
|
|
|
|
|
{ |
|
2829
|
|
|
|
|
|
|
# inside a list item |
|
2830
|
592
|
|
|
|
|
1630
|
return; |
|
2831
|
|
|
|
|
|
|
} |
|
2832
|
|
|
|
|
|
|
# This might be a new paragraph within an existing list item; |
|
2833
|
|
|
|
|
|
|
# It will be the first line, and have the same indentation |
|
2834
|
|
|
|
|
|
|
# as the list's indentation. |
|
2835
|
304
|
100
|
100
|
|
|
1615
|
if ( $ind == 0 |
|
|
|
|
100
|
|
|
|
|
|
2836
|
|
|
|
|
|
|
&& $self->{__listnum} |
|
2837
|
|
|
|
|
|
|
&& $para_line_indent_ref->[$ind] == |
|
2838
|
|
|
|
|
|
|
$self->{__list_indent}->[$self->{__listnum} - 1]) |
|
2839
|
|
|
|
|
|
|
{ |
|
2840
|
|
|
|
|
|
|
# start a paragraph |
|
2841
|
12
|
|
|
|
|
35
|
my $tag = $self->get_tag('p'); |
|
2842
|
12
|
|
|
|
|
18
|
${$prev_ref} .= $tag; |
|
|
12
|
|
|
|
|
21
|
|
|
2843
|
12
|
|
|
|
|
21
|
$para_action_ref->[$ind] |= $PAR; |
|
2844
|
12
|
|
|
|
|
36
|
return; |
|
2845
|
|
|
|
|
|
|
} |
|
2846
|
|
|
|
|
|
|
# This ain't no list. We'll want to end all of them. |
|
2847
|
292
|
100
|
|
|
|
673
|
if ($self->{__listnum}) |
|
2848
|
|
|
|
|
|
|
{ |
|
2849
|
16
|
|
|
|
|
81
|
$self->endlist( |
|
2850
|
|
|
|
|
|
|
num_lists => $self->{__listnum}, |
|
2851
|
|
|
|
|
|
|
prev_ref => $prev_ref, |
|
2852
|
|
|
|
|
|
|
line_action_ref => \$para_action_ref->[$ind] |
|
2853
|
|
|
|
|
|
|
); |
|
2854
|
|
|
|
|
|
|
} |
|
2855
|
292
|
|
|
|
|
1142
|
return; |
|
2856
|
|
|
|
|
|
|
} |
|
2857
|
|
|
|
|
|
|
|
|
2858
|
|
|
|
|
|
|
# If numbers with more than one digit grow to the left instead of |
|
2859
|
|
|
|
|
|
|
# to the right, the prefix will shrink and we'll fail to match the |
|
2860
|
|
|
|
|
|
|
# right list. We need to account for this. |
|
2861
|
164
|
|
|
|
|
185
|
my $prefix_alternate; |
|
2862
|
164
|
50
|
|
|
|
458
|
if (length("" . $number) > 1) |
|
2863
|
|
|
|
|
|
|
{ |
|
2864
|
0
|
|
|
|
|
0
|
$prefix_alternate = (" " x (length("" . $number) - 1)) . $prefix; |
|
2865
|
|
|
|
|
|
|
} |
|
2866
|
|
|
|
|
|
|
|
|
2867
|
|
|
|
|
|
|
# Maybe we're going back up to a previous list |
|
2868
|
164
|
|
100
|
|
|
1014
|
for ( |
|
2869
|
|
|
|
|
|
|
$i = $self->{__listnum} - 1; |
|
2870
|
|
|
|
|
|
|
($i >= 0) && ($prefix ne $self->{__listprefix}->[$i]); |
|
2871
|
|
|
|
|
|
|
$i-- |
|
2872
|
|
|
|
|
|
|
) |
|
2873
|
|
|
|
|
|
|
{ |
|
2874
|
73
|
50
|
|
|
|
396
|
if (length("" . $number) > 1) |
|
2875
|
|
|
|
|
|
|
{ |
|
2876
|
0
|
0
|
|
|
|
0
|
last if $prefix_alternate eq $self->{__listprefix}->[$i]; |
|
2877
|
|
|
|
|
|
|
} |
|
2878
|
|
|
|
|
|
|
} |
|
2879
|
|
|
|
|
|
|
|
|
2880
|
164
|
|
|
|
|
183
|
my $islist; |
|
2881
|
|
|
|
|
|
|
|
|
2882
|
|
|
|
|
|
|
# Measure the indent from where the text starts, not where the |
|
2883
|
|
|
|
|
|
|
# prefix starts. This won't screw anything up, and if we don't do |
|
2884
|
|
|
|
|
|
|
# it, the next line might appear to be indented relative to this |
|
2885
|
|
|
|
|
|
|
# line, and get tagged as a new paragraph. |
|
2886
|
164
|
|
|
|
|
309
|
my $bullets = $self->{bullets}; |
|
2887
|
164
|
|
|
|
|
268
|
my $bullets_ordered = $self->{bullets_ordered}; |
|
2888
|
164
|
|
|
|
|
238
|
my $term_match = $self->{__term_match}; |
|
2889
|
164
|
|
|
|
|
4940
|
my ($total_prefix) = |
|
2890
|
|
|
|
|
|
|
$para_lines_ref->[$ind] =~ /^(\s*[${bullets}${bullets_ordered}\w]+.\s*)/; |
|
2891
|
|
|
|
|
|
|
# a DL indent starts from the edge of the term, plus indent_width |
|
2892
|
164
|
100
|
|
|
|
2033
|
if ($term) |
|
2893
|
|
|
|
|
|
|
{ |
|
2894
|
9
|
|
|
|
|
167
|
($total_prefix) = $para_lines_ref->[$ind] =~ /^(\s*)${term_match}.$/; |
|
2895
|
9
|
|
|
|
|
35
|
$total_prefix .= " " x $self->{indent_width}; |
|
2896
|
|
|
|
|
|
|
} |
|
2897
|
|
|
|
|
|
|
|
|
2898
|
|
|
|
|
|
|
# Of course, we only use it if it really turns out to be a list. |
|
2899
|
|
|
|
|
|
|
|
|
2900
|
164
|
|
|
|
|
206
|
$islist = 1; |
|
2901
|
164
|
|
|
|
|
182
|
$i++; |
|
2902
|
164
|
100
|
100
|
|
|
1113
|
if (($i > 0) && ($i != $self->{__listnum})) |
|
|
|
100
|
100
|
|
|
|
|
|
2903
|
|
|
|
|
|
|
{ |
|
2904
|
23
|
|
|
|
|
91
|
$self->endlist( |
|
2905
|
|
|
|
|
|
|
num_lists => $self->{__listnum} - $i, |
|
2906
|
|
|
|
|
|
|
prev_ref => $prev_ref, |
|
2907
|
|
|
|
|
|
|
line_action_ref => \$para_action_ref->[$ind] |
|
2908
|
|
|
|
|
|
|
); |
|
2909
|
23
|
|
|
|
|
35
|
$islist = 0; |
|
2910
|
|
|
|
|
|
|
} |
|
2911
|
|
|
|
|
|
|
elsif (!$self->{__listnum} || ($i != $self->{__listnum})) |
|
2912
|
|
|
|
|
|
|
{ |
|
2913
|
70
|
100
|
100
|
|
|
418
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
2914
|
|
|
|
|
|
|
($para_line_indent_ref->[$ind] > 0) |
|
2915
|
|
|
|
|
|
|
|| $ind == 0 |
|
2916
|
|
|
|
|
|
|
|| ($ind > 0 && ($para_lines_ref->[$ind - 1] =~ /^\s*$/)) |
|
2917
|
|
|
|
|
|
|
|| ( $ind > 0 |
|
2918
|
|
|
|
|
|
|
&& $para_action_ref->[$ind - 1] & ($BREAK | $HEADER | $CAPS)) |
|
2919
|
|
|
|
|
|
|
) |
|
2920
|
|
|
|
|
|
|
{ |
|
2921
|
68
|
|
|
|
|
279
|
$islist = $self->startlist( |
|
2922
|
|
|
|
|
|
|
prefix => $prefix, |
|
2923
|
|
|
|
|
|
|
number => $number, |
|
2924
|
|
|
|
|
|
|
rawprefix => $rawprefix, |
|
2925
|
|
|
|
|
|
|
term => $term, |
|
2926
|
|
|
|
|
|
|
para_lines_ref => $para_lines_ref, |
|
2927
|
|
|
|
|
|
|
para_action_ref => $para_action_ref, |
|
2928
|
|
|
|
|
|
|
ind => $ind, |
|
2929
|
|
|
|
|
|
|
prev_ref => $prev_ref, |
|
2930
|
|
|
|
|
|
|
total_prefix => $total_prefix |
|
2931
|
|
|
|
|
|
|
); |
|
2932
|
|
|
|
|
|
|
} |
|
2933
|
|
|
|
|
|
|
else |
|
2934
|
|
|
|
|
|
|
{ |
|
2935
|
|
|
|
|
|
|
|
|
2936
|
|
|
|
|
|
|
# We have something like this: "- foo" which usually |
|
2937
|
|
|
|
|
|
|
# turns out not to be a list. |
|
2938
|
2
|
|
|
|
|
10
|
return; |
|
2939
|
|
|
|
|
|
|
} |
|
2940
|
|
|
|
|
|
|
} |
|
2941
|
|
|
|
|
|
|
|
|
2942
|
|
|
|
|
|
|
$self->continuelist( |
|
2943
|
162
|
100
|
|
|
|
677
|
para_lines_ref => $para_lines_ref, |
|
2944
|
|
|
|
|
|
|
para_action_ref => $para_action_ref, |
|
2945
|
|
|
|
|
|
|
ind => $ind, |
|
2946
|
|
|
|
|
|
|
term => $term |
|
2947
|
|
|
|
|
|
|
) |
|
2948
|
|
|
|
|
|
|
if ($self->{__mode} & $LIST); |
|
2949
|
162
|
100
|
|
|
|
834
|
$para_line_indent_ref->[$ind] = length($total_prefix) if $islist; |
|
2950
|
|
|
|
|
|
|
} # liststuff |
|
2951
|
|
|
|
|
|
|
|
|
2952
|
|
|
|
|
|
|
=head2 get_table_type |
|
2953
|
|
|
|
|
|
|
|
|
2954
|
|
|
|
|
|
|
$table_type = $self->get_table_type(rows_ref=>$rows_ref, |
|
2955
|
|
|
|
|
|
|
para_len=>0); |
|
2956
|
|
|
|
|
|
|
|
|
2957
|
|
|
|
|
|
|
Figure out the table type of this table, if any |
|
2958
|
|
|
|
|
|
|
|
|
2959
|
|
|
|
|
|
|
=cut |
|
2960
|
|
|
|
|
|
|
sub get_table_type ($%) |
|
2961
|
|
|
|
|
|
|
{ |
|
2962
|
123
|
|
|
123
|
1
|
386
|
my $self = shift; |
|
2963
|
123
|
|
|
|
|
460
|
my %args = ( |
|
2964
|
|
|
|
|
|
|
rows_ref => undef, |
|
2965
|
|
|
|
|
|
|
para_len => 0, |
|
2966
|
|
|
|
|
|
|
@_ |
|
2967
|
|
|
|
|
|
|
); |
|
2968
|
123
|
|
|
|
|
161
|
my $table_type = 0; |
|
2969
|
123
|
100
|
66
|
|
|
695
|
if ( $self->{table_type}->{DELIM} |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
2970
|
|
|
|
|
|
|
&& $self->is_delim_table(%args)) |
|
2971
|
|
|
|
|
|
|
{ |
|
2972
|
7
|
|
|
|
|
13
|
$table_type = $TAB_DELIM; |
|
2973
|
|
|
|
|
|
|
} |
|
2974
|
|
|
|
|
|
|
elsif ($self->{table_type}->{ALIGN} |
|
2975
|
|
|
|
|
|
|
&& $self->is_aligned_table(%args)) |
|
2976
|
|
|
|
|
|
|
{ |
|
2977
|
7
|
|
|
|
|
18
|
$table_type = $TAB_ALIGN; |
|
2978
|
|
|
|
|
|
|
} |
|
2979
|
|
|
|
|
|
|
elsif ($self->{table_type}->{PGSQL} |
|
2980
|
|
|
|
|
|
|
&& $self->is_pgsql_table(%args)) |
|
2981
|
|
|
|
|
|
|
{ |
|
2982
|
4
|
|
|
|
|
10
|
$table_type = $TAB_PGSQL; |
|
2983
|
|
|
|
|
|
|
} |
|
2984
|
|
|
|
|
|
|
elsif ($self->{table_type}->{BORDER} |
|
2985
|
|
|
|
|
|
|
&& $self->is_border_table(%args)) |
|
2986
|
|
|
|
|
|
|
{ |
|
2987
|
4
|
|
|
|
|
9
|
$table_type = $TAB_BORDER; |
|
2988
|
|
|
|
|
|
|
} |
|
2989
|
|
|
|
|
|
|
|
|
2990
|
123
|
|
|
|
|
418
|
return $table_type; |
|
2991
|
|
|
|
|
|
|
} |
|
2992
|
|
|
|
|
|
|
|
|
2993
|
|
|
|
|
|
|
=head2 is_aligned_table |
|
2994
|
|
|
|
|
|
|
|
|
2995
|
|
|
|
|
|
|
if ($self->is_aligned_table(rows_ref=>$rows_ref, para_len=>0)) |
|
2996
|
|
|
|
|
|
|
{ |
|
2997
|
|
|
|
|
|
|
... |
|
2998
|
|
|
|
|
|
|
} |
|
2999
|
|
|
|
|
|
|
|
|
3000
|
|
|
|
|
|
|
Check if the given paragraph-array is an aligned table |
|
3001
|
|
|
|
|
|
|
|
|
3002
|
|
|
|
|
|
|
=cut |
|
3003
|
|
|
|
|
|
|
sub is_aligned_table ($%) |
|
3004
|
|
|
|
|
|
|
{ |
|
3005
|
116
|
|
|
116
|
1
|
173
|
my $self = shift; |
|
3006
|
116
|
|
|
|
|
373
|
my %args = ( |
|
3007
|
|
|
|
|
|
|
rows_ref => undef, |
|
3008
|
|
|
|
|
|
|
para_len => 0, |
|
3009
|
|
|
|
|
|
|
@_ |
|
3010
|
|
|
|
|
|
|
); |
|
3011
|
116
|
|
|
|
|
187
|
my $rows_ref = $args{rows_ref}; |
|
3012
|
116
|
|
|
|
|
144
|
my $para_len = $args{para_len}; |
|
3013
|
|
|
|
|
|
|
|
|
3014
|
|
|
|
|
|
|
# TABLES: spot and mark up tables. We combine the lines of the |
|
3015
|
|
|
|
|
|
|
# paragraph using the string bitwise or (|) operator, the result |
|
3016
|
|
|
|
|
|
|
# being in $spaces. A character in $spaces is a space only if |
|
3017
|
|
|
|
|
|
|
# there was a space at that position in every line of the |
|
3018
|
|
|
|
|
|
|
# paragraph. $space can be used to search for contiguous spaces |
|
3019
|
|
|
|
|
|
|
# that occur on all lines of the paragraph. If this results in at |
|
3020
|
|
|
|
|
|
|
# least two columns, the paragraph is identified as a table. |
|
3021
|
|
|
|
|
|
|
|
|
3022
|
|
|
|
|
|
|
# Note that this sub must be called before checking for preformatted |
|
3023
|
|
|
|
|
|
|
# lines because a table may well have whitespace to the left, in |
|
3024
|
|
|
|
|
|
|
# which case it must not be incorrectly recognised as a preformat. |
|
3025
|
116
|
|
|
|
|
148
|
my @rows = @{$rows_ref}; |
|
|
116
|
|
|
|
|
292
|
|
|
3026
|
116
|
|
|
|
|
152
|
my @starts; |
|
3027
|
116
|
|
|
|
|
154
|
my $spaces = ''; |
|
3028
|
116
|
|
|
|
|
137
|
my $max = 0; |
|
3029
|
116
|
|
|
|
|
138
|
my $min = $para_len; |
|
3030
|
116
|
|
|
|
|
179
|
foreach my $row (@rows) |
|
3031
|
|
|
|
|
|
|
{ |
|
3032
|
611
|
|
|
|
|
1117
|
($spaces |= $row) =~ tr/ /\xff/c; |
|
3033
|
611
|
100
|
|
|
|
1100
|
$min = length $row if length $row < $min; |
|
3034
|
611
|
100
|
|
|
|
1201
|
$max = length $row if $max < length $row; |
|
3035
|
|
|
|
|
|
|
} |
|
3036
|
116
|
|
|
|
|
269
|
$spaces = substr $spaces, 0, $min; |
|
3037
|
116
|
100
|
|
|
|
354
|
push(@starts, 0) unless $spaces =~ /^ /; |
|
3038
|
116
|
|
|
|
|
485
|
while ($spaces =~ /((?:^| ) +)(?=[^ ])/g) |
|
3039
|
|
|
|
|
|
|
{ |
|
3040
|
47
|
|
|
|
|
229
|
push @starts, pos($spaces); |
|
3041
|
|
|
|
|
|
|
} |
|
3042
|
|
|
|
|
|
|
|
|
3043
|
116
|
100
|
66
|
|
|
509
|
if (2 <= @rows and 2 <= @starts) |
|
3044
|
|
|
|
|
|
|
{ |
|
3045
|
7
|
|
|
|
|
42
|
return 1; |
|
3046
|
|
|
|
|
|
|
} |
|
3047
|
|
|
|
|
|
|
else |
|
3048
|
|
|
|
|
|
|
{ |
|
3049
|
109
|
|
|
|
|
922
|
return 0; |
|
3050
|
|
|
|
|
|
|
} |
|
3051
|
|
|
|
|
|
|
} |
|
3052
|
|
|
|
|
|
|
|
|
3053
|
|
|
|
|
|
|
=head2 is_pgsql_table |
|
3054
|
|
|
|
|
|
|
|
|
3055
|
|
|
|
|
|
|
if ($self->is_pgsql_table(rows_ref=>$rows_ref, para_len=>0)) |
|
3056
|
|
|
|
|
|
|
{ |
|
3057
|
|
|
|
|
|
|
... |
|
3058
|
|
|
|
|
|
|
} |
|
3059
|
|
|
|
|
|
|
|
|
3060
|
|
|
|
|
|
|
Check if the given paragraph-array is a Postgresql table |
|
3061
|
|
|
|
|
|
|
(the ascii format produced by Postgresql) |
|
3062
|
|
|
|
|
|
|
|
|
3063
|
|
|
|
|
|
|
A PGSQL table can start with an optional table-caption, |
|
3064
|
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
then it has a row of column headings separated by | |
|
3066
|
|
|
|
|
|
|
then it has a row of ------+----- |
|
3067
|
|
|
|
|
|
|
then it has one or more rows of column values separated by | |
|
3068
|
|
|
|
|
|
|
then it has a row-count (N rows) |
|
3069
|
|
|
|
|
|
|
|
|
3070
|
|
|
|
|
|
|
=cut |
|
3071
|
|
|
|
|
|
|
sub is_pgsql_table ($%) |
|
3072
|
|
|
|
|
|
|
{ |
|
3073
|
109
|
|
|
109
|
1
|
155
|
my $self = shift; |
|
3074
|
109
|
|
|
|
|
343
|
my %args = ( |
|
3075
|
|
|
|
|
|
|
rows_ref => undef, |
|
3076
|
|
|
|
|
|
|
para_len => 0, |
|
3077
|
|
|
|
|
|
|
@_ |
|
3078
|
|
|
|
|
|
|
); |
|
3079
|
109
|
|
|
|
|
154
|
my $rows_ref = $args{rows_ref}; |
|
3080
|
109
|
|
|
|
|
140
|
my $para_len = $args{para_len}; |
|
3081
|
|
|
|
|
|
|
|
|
3082
|
|
|
|
|
|
|
# A PGSQL table must have at least 4 rows (see above). |
|
3083
|
109
|
100
|
|
|
|
132
|
if (@{$rows_ref} < 4) |
|
|
109
|
|
|
|
|
280
|
|
|
3084
|
|
|
|
|
|
|
{ |
|
3085
|
63
|
|
|
|
|
465
|
return 0; |
|
3086
|
|
|
|
|
|
|
} |
|
3087
|
|
|
|
|
|
|
|
|
3088
|
46
|
|
|
|
|
70
|
my @rows = @{$rows_ref}; |
|
|
46
|
|
|
|
|
144
|
|
|
3089
|
46
|
100
|
100
|
|
|
335
|
if ($rows[0] !~ /\|/ && $rows[0] =~ /^\s*\w+/) # possible caption |
|
3090
|
|
|
|
|
|
|
{ |
|
3091
|
29
|
|
|
|
|
49
|
shift @rows; |
|
3092
|
|
|
|
|
|
|
} |
|
3093
|
46
|
100
|
|
|
|
108
|
if (@rows < 4) |
|
3094
|
|
|
|
|
|
|
{ |
|
3095
|
10
|
|
|
|
|
79
|
return 0; |
|
3096
|
|
|
|
|
|
|
} |
|
3097
|
36
|
100
|
|
|
|
116
|
if ($rows[0] !~ /^\s*\w+\s+\|\s+/) # Colname | |
|
3098
|
|
|
|
|
|
|
{ |
|
3099
|
32
|
|
|
|
|
260
|
return 0; |
|
3100
|
|
|
|
|
|
|
} |
|
3101
|
4
|
50
|
|
|
|
28
|
if ($rows[1] !~ /^\s*[-]+[+][-]+/) # ----+---- |
|
3102
|
|
|
|
|
|
|
{ |
|
3103
|
0
|
|
|
|
|
0
|
return 0; |
|
3104
|
|
|
|
|
|
|
} |
|
3105
|
4
|
50
|
|
|
|
36
|
if ($rows[2] !~ /^\s*[^|]*\s+\|\s+/) # value | |
|
3106
|
|
|
|
|
|
|
{ |
|
3107
|
0
|
|
|
|
|
0
|
return 0; |
|
3108
|
|
|
|
|
|
|
} |
|
3109
|
|
|
|
|
|
|
# check the last row for rowcount |
|
3110
|
4
|
50
|
|
|
|
31
|
if ($rows[$#rows] !~ /\(\d+\s+rows\)/) |
|
3111
|
|
|
|
|
|
|
{ |
|
3112
|
0
|
|
|
|
|
0
|
return 0; |
|
3113
|
|
|
|
|
|
|
} |
|
3114
|
|
|
|
|
|
|
|
|
3115
|
4
|
|
|
|
|
28
|
return 1; |
|
3116
|
|
|
|
|
|
|
} |
|
3117
|
|
|
|
|
|
|
|
|
3118
|
|
|
|
|
|
|
=head2 is_border_table |
|
3119
|
|
|
|
|
|
|
|
|
3120
|
|
|
|
|
|
|
if ($self->is_border_table(rows_ref=>$rows_ref, para_len=>0)) |
|
3121
|
|
|
|
|
|
|
{ |
|
3122
|
|
|
|
|
|
|
... |
|
3123
|
|
|
|
|
|
|
} |
|
3124
|
|
|
|
|
|
|
|
|
3125
|
|
|
|
|
|
|
Check if the given paragraph-array is a Border table. |
|
3126
|
|
|
|
|
|
|
|
|
3127
|
|
|
|
|
|
|
A BORDER table can start with an optional table-caption, |
|
3128
|
|
|
|
|
|
|
|
|
3129
|
|
|
|
|
|
|
then it has a row of +------+-----+ |
|
3130
|
|
|
|
|
|
|
then it has a row of column headings separated by | |
|
3131
|
|
|
|
|
|
|
then it has a row of +------+-----+ |
|
3132
|
|
|
|
|
|
|
then it has one or more rows of column values separated by | |
|
3133
|
|
|
|
|
|
|
then it has a row of +------+-----+ |
|
3134
|
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
=cut |
|
3136
|
|
|
|
|
|
|
sub is_border_table ($%) |
|
3137
|
|
|
|
|
|
|
{ |
|
3138
|
105
|
|
|
105
|
1
|
142
|
my $self = shift; |
|
3139
|
105
|
|
|
|
|
291
|
my %args = ( |
|
3140
|
|
|
|
|
|
|
rows_ref => undef, |
|
3141
|
|
|
|
|
|
|
para_len => 0, |
|
3142
|
|
|
|
|
|
|
@_ |
|
3143
|
|
|
|
|
|
|
); |
|
3144
|
105
|
|
|
|
|
146
|
my $rows_ref = $args{rows_ref}; |
|
3145
|
105
|
|
|
|
|
125
|
my $para_len = $args{para_len}; |
|
3146
|
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
# A BORDER table must have at least 5 rows (see above) |
|
3148
|
|
|
|
|
|
|
# And note that it could be indented with spaces |
|
3149
|
105
|
100
|
|
|
|
114
|
if (@{$rows_ref} < 5) |
|
|
105
|
|
|
|
|
245
|
|
|
3150
|
|
|
|
|
|
|
{ |
|
3151
|
77
|
|
|
|
|
312
|
return 0; |
|
3152
|
|
|
|
|
|
|
} |
|
3153
|
|
|
|
|
|
|
|
|
3154
|
28
|
|
|
|
|
47
|
my @rows = @{$rows_ref}; |
|
|
28
|
|
|
|
|
87
|
|
|
3155
|
28
|
100
|
66
|
|
|
199
|
if ($rows[0] !~ /\|/ && $rows[0] =~ /^\s*\w+/) # possible caption |
|
3156
|
|
|
|
|
|
|
{ |
|
3157
|
18
|
|
|
|
|
27
|
shift @rows; |
|
3158
|
|
|
|
|
|
|
} |
|
3159
|
28
|
100
|
|
|
|
69
|
if (@rows < 5) |
|
3160
|
|
|
|
|
|
|
{ |
|
3161
|
8
|
|
|
|
|
40
|
return 0; |
|
3162
|
|
|
|
|
|
|
} |
|
3163
|
20
|
100
|
|
|
|
88
|
if ($rows[0] !~ /^\s*[+][-]+[+][-]+[+][-+]*$/) # +----+----+ |
|
3164
|
|
|
|
|
|
|
{ |
|
3165
|
16
|
|
|
|
|
83
|
return 0; |
|
3166
|
|
|
|
|
|
|
} |
|
3167
|
4
|
50
|
|
|
|
29
|
if ($rows[1] !~ /^\s*\|\s*\w+\s+\|\s+.*\|$/) # | Colname | |
|
3168
|
|
|
|
|
|
|
{ |
|
3169
|
0
|
|
|
|
|
0
|
return 0; |
|
3170
|
|
|
|
|
|
|
} |
|
3171
|
4
|
50
|
|
|
|
23
|
if ($rows[2] !~ /^\s*[+][-]+[+][-]+[+][-+]*$/) # +----+----+ |
|
3172
|
|
|
|
|
|
|
{ |
|
3173
|
0
|
|
|
|
|
0
|
return 0; |
|
3174
|
|
|
|
|
|
|
} |
|
3175
|
4
|
50
|
|
|
|
25
|
if ($rows[3] !~ /^\s*\|\s*[^|]*\s+\|\s+.*\|$/) # | value | |
|
3176
|
|
|
|
|
|
|
{ |
|
3177
|
0
|
|
|
|
|
0
|
return 0; |
|
3178
|
|
|
|
|
|
|
} |
|
3179
|
|
|
|
|
|
|
# check the last row for +------+------+ |
|
3180
|
4
|
50
|
|
|
|
22
|
if ($rows[$#rows] !~ /^\s*[+][-]+[+][-]+[+][-+]*$/) # +----+----+ |
|
3181
|
|
|
|
|
|
|
{ |
|
3182
|
0
|
|
|
|
|
0
|
return 0; |
|
3183
|
|
|
|
|
|
|
} |
|
3184
|
|
|
|
|
|
|
|
|
3185
|
4
|
|
|
|
|
25
|
return 1; |
|
3186
|
|
|
|
|
|
|
} # is_border_table |
|
3187
|
|
|
|
|
|
|
|
|
3188
|
|
|
|
|
|
|
=head2 is_delim_table |
|
3189
|
|
|
|
|
|
|
|
|
3190
|
|
|
|
|
|
|
if ($self->is_delim_table(rows_ref=>$rows_ref, para_len=>0)) |
|
3191
|
|
|
|
|
|
|
{ |
|
3192
|
|
|
|
|
|
|
... |
|
3193
|
|
|
|
|
|
|
} |
|
3194
|
|
|
|
|
|
|
|
|
3195
|
|
|
|
|
|
|
Check if the given paragraph-array is a Delimited table. |
|
3196
|
|
|
|
|
|
|
|
|
3197
|
|
|
|
|
|
|
A DELIM table can start with an optional table-caption, |
|
3198
|
|
|
|
|
|
|
then it has at least two rows which start and end and are |
|
3199
|
|
|
|
|
|
|
punctuated by a non-alphanumeric delimiter. |
|
3200
|
|
|
|
|
|
|
|
|
3201
|
|
|
|
|
|
|
| val1 | val2 | |
|
3202
|
|
|
|
|
|
|
| val3 | val4 | |
|
3203
|
|
|
|
|
|
|
|
|
3204
|
|
|
|
|
|
|
=cut |
|
3205
|
|
|
|
|
|
|
sub is_delim_table ($%) |
|
3206
|
|
|
|
|
|
|
{ |
|
3207
|
123
|
|
|
123
|
1
|
216
|
my $self = shift; |
|
3208
|
123
|
|
|
|
|
376
|
my %args = ( |
|
3209
|
|
|
|
|
|
|
rows_ref => undef, |
|
3210
|
|
|
|
|
|
|
para_len => 0, |
|
3211
|
|
|
|
|
|
|
@_ |
|
3212
|
|
|
|
|
|
|
); |
|
3213
|
123
|
|
|
|
|
175
|
my $rows_ref = $args{rows_ref}; |
|
3214
|
123
|
|
|
|
|
162
|
my $para_len = $args{para_len}; |
|
3215
|
|
|
|
|
|
|
|
|
3216
|
|
|
|
|
|
|
# |
|
3217
|
|
|
|
|
|
|
# And note that it could be indented with spaces |
|
3218
|
123
|
50
|
|
|
|
129
|
if (@{$rows_ref} < 2) |
|
|
123
|
|
|
|
|
315
|
|
|
3219
|
|
|
|
|
|
|
{ |
|
3220
|
0
|
|
|
|
|
0
|
return 0; |
|
3221
|
|
|
|
|
|
|
} |
|
3222
|
|
|
|
|
|
|
|
|
3223
|
123
|
|
|
|
|
148
|
my @rows = @{$rows_ref}; |
|
|
123
|
|
|
|
|
315
|
|
|
3224
|
123
|
100
|
66
|
|
|
721
|
if ($rows[0] !~ /[^\w\s]/ && $rows[0] =~ /^\s*\w+/) # possible caption |
|
3225
|
|
|
|
|
|
|
{ |
|
3226
|
44
|
|
|
|
|
77
|
shift @rows; |
|
3227
|
|
|
|
|
|
|
} |
|
3228
|
123
|
100
|
|
|
|
295
|
if (@rows < 2) |
|
3229
|
|
|
|
|
|
|
{ |
|
3230
|
30
|
|
|
|
|
285
|
return 0; |
|
3231
|
|
|
|
|
|
|
} |
|
3232
|
|
|
|
|
|
|
# figure out if the row starts with a possible delimiter |
|
3233
|
93
|
|
|
|
|
151
|
my $delim = ''; |
|
3234
|
93
|
100
|
|
|
|
296
|
if ($rows[0] =~ /^\s*([^[:alnum:]])/) |
|
3235
|
|
|
|
|
|
|
{ |
|
3236
|
55
|
|
|
|
|
176
|
$delim = $1; |
|
3237
|
|
|
|
|
|
|
# have to get rid of ^ and [] and \ |
|
3238
|
55
|
|
|
|
|
111
|
$delim =~ s/\^//g; |
|
3239
|
55
|
|
|
|
|
79
|
$delim =~ s/\[//g; |
|
3240
|
55
|
|
|
|
|
88
|
$delim =~ s/\]//g; |
|
3241
|
55
|
|
|
|
|
87
|
$delim =~ s/\\//g; |
|
3242
|
55
|
100
|
|
|
|
129
|
if (!$delim) # no delimiter after all |
|
3243
|
|
|
|
|
|
|
{ |
|
3244
|
1
|
|
|
|
|
13
|
return 0; |
|
3245
|
|
|
|
|
|
|
} |
|
3246
|
|
|
|
|
|
|
} |
|
3247
|
|
|
|
|
|
|
else |
|
3248
|
|
|
|
|
|
|
{ |
|
3249
|
38
|
|
|
|
|
332
|
return 0; |
|
3250
|
|
|
|
|
|
|
} |
|
3251
|
|
|
|
|
|
|
# There needs to be at least three delimiters in the row |
|
3252
|
54
|
|
|
|
|
920
|
my @all_delims = ($rows[0] =~ /[${delim}]/g); |
|
3253
|
54
|
|
|
|
|
119
|
my $total_num_delims = @all_delims; |
|
3254
|
54
|
100
|
|
|
|
138
|
if ($total_num_delims < 3) |
|
3255
|
|
|
|
|
|
|
{ |
|
3256
|
16
|
|
|
|
|
146
|
return 0; |
|
3257
|
|
|
|
|
|
|
} |
|
3258
|
|
|
|
|
|
|
# All rows must start and end with the delimiter |
|
3259
|
|
|
|
|
|
|
# and have $total_num_delims number of them |
|
3260
|
38
|
|
|
|
|
69
|
foreach my $row (@rows) |
|
3261
|
|
|
|
|
|
|
{ |
|
3262
|
71
|
100
|
|
|
|
736
|
if ($row !~ /^\s*[${delim}]/) |
|
3263
|
|
|
|
|
|
|
{ |
|
3264
|
8
|
|
|
|
|
119
|
return 0; |
|
3265
|
|
|
|
|
|
|
} |
|
3266
|
63
|
100
|
|
|
|
588
|
if ($row !~ /[${delim}]\s*$/) |
|
3267
|
|
|
|
|
|
|
{ |
|
3268
|
23
|
|
|
|
|
303
|
return 0; |
|
3269
|
|
|
|
|
|
|
} |
|
3270
|
40
|
|
|
|
|
427
|
@all_delims = ($row =~ /[${delim}]/g); |
|
3271
|
40
|
50
|
|
|
|
177
|
if (@all_delims != $total_num_delims) |
|
3272
|
|
|
|
|
|
|
{ |
|
3273
|
0
|
|
|
|
|
0
|
return 0; |
|
3274
|
|
|
|
|
|
|
} |
|
3275
|
|
|
|
|
|
|
} |
|
3276
|
|
|
|
|
|
|
|
|
3277
|
7
|
|
|
|
|
45
|
return 1; |
|
3278
|
|
|
|
|
|
|
} # is_delim_table |
|
3279
|
|
|
|
|
|
|
|
|
3280
|
|
|
|
|
|
|
=head2 tablestuff |
|
3281
|
|
|
|
|
|
|
|
|
3282
|
|
|
|
|
|
|
$self->tablestuff(table_type=>0, |
|
3283
|
|
|
|
|
|
|
rows_ref=>$rows_ref, |
|
3284
|
|
|
|
|
|
|
para_len=>0); |
|
3285
|
|
|
|
|
|
|
|
|
3286
|
|
|
|
|
|
|
Process a table. |
|
3287
|
|
|
|
|
|
|
|
|
3288
|
|
|
|
|
|
|
=cut |
|
3289
|
|
|
|
|
|
|
sub tablestuff ($%) |
|
3290
|
|
|
|
|
|
|
{ |
|
3291
|
22
|
|
|
22
|
1
|
37
|
my $self = shift; |
|
3292
|
22
|
|
|
|
|
113
|
my %args = ( |
|
3293
|
|
|
|
|
|
|
table_type => 0, |
|
3294
|
|
|
|
|
|
|
rows_ref => undef, |
|
3295
|
|
|
|
|
|
|
para_len => 0, |
|
3296
|
|
|
|
|
|
|
@_ |
|
3297
|
|
|
|
|
|
|
); |
|
3298
|
22
|
|
|
|
|
140
|
my $table_type = $args{table_type}; |
|
3299
|
22
|
100
|
|
|
|
70
|
if ($table_type eq $TAB_ALIGN) |
|
3300
|
|
|
|
|
|
|
{ |
|
3301
|
7
|
|
|
|
|
38
|
return $self->make_aligned_table(%args); |
|
3302
|
|
|
|
|
|
|
} |
|
3303
|
15
|
100
|
|
|
|
49
|
if ($table_type eq $TAB_PGSQL) |
|
3304
|
|
|
|
|
|
|
{ |
|
3305
|
4
|
|
|
|
|
24
|
return $self->make_pgsql_table(%args); |
|
3306
|
|
|
|
|
|
|
} |
|
3307
|
11
|
100
|
|
|
|
32
|
if ($table_type eq $TAB_BORDER) |
|
3308
|
|
|
|
|
|
|
{ |
|
3309
|
4
|
|
|
|
|
24
|
return $self->make_border_table(%args); |
|
3310
|
|
|
|
|
|
|
} |
|
3311
|
7
|
50
|
|
|
|
29
|
if ($table_type eq $TAB_DELIM) |
|
3312
|
|
|
|
|
|
|
{ |
|
3313
|
7
|
|
|
|
|
31
|
return $self->make_delim_table(%args); |
|
3314
|
|
|
|
|
|
|
} |
|
3315
|
|
|
|
|
|
|
} # tablestuff |
|
3316
|
|
|
|
|
|
|
|
|
3317
|
|
|
|
|
|
|
=head2 make_aligned_table |
|
3318
|
|
|
|
|
|
|
|
|
3319
|
|
|
|
|
|
|
$self->make_aligned_table(rows_ref=>$rows_ref, |
|
3320
|
|
|
|
|
|
|
para_len=>0); |
|
3321
|
|
|
|
|
|
|
|
|
3322
|
|
|
|
|
|
|
Make an Aligned table. |
|
3323
|
|
|
|
|
|
|
|
|
3324
|
|
|
|
|
|
|
=cut |
|
3325
|
|
|
|
|
|
|
sub make_aligned_table ($%) |
|
3326
|
|
|
|
|
|
|
{ |
|
3327
|
7
|
|
|
7
|
1
|
13
|
my $self = shift; |
|
3328
|
7
|
|
|
|
|
32
|
my %args = ( |
|
3329
|
|
|
|
|
|
|
rows_ref => undef, |
|
3330
|
|
|
|
|
|
|
para_len => 0, |
|
3331
|
|
|
|
|
|
|
@_ |
|
3332
|
|
|
|
|
|
|
); |
|
3333
|
7
|
|
|
|
|
16
|
my $rows_ref = $args{rows_ref}; |
|
3334
|
7
|
|
|
|
|
21
|
my $para_len = $args{para_len}; |
|
3335
|
|
|
|
|
|
|
|
|
3336
|
|
|
|
|
|
|
# TABLES: spot and mark up tables. We combine the lines of the |
|
3337
|
|
|
|
|
|
|
# paragraph using the string bitwise or (|) operator, the result |
|
3338
|
|
|
|
|
|
|
# being in $spaces. A character in $spaces is a space only if |
|
3339
|
|
|
|
|
|
|
# there was a space at that position in every line of the |
|
3340
|
|
|
|
|
|
|
# paragraph. $space can be used to search for contiguous spaces |
|
3341
|
|
|
|
|
|
|
# that occur on all lines of the paragraph. If this results in at |
|
3342
|
|
|
|
|
|
|
# least two columns, the paragraph is identified as a table. |
|
3343
|
|
|
|
|
|
|
|
|
3344
|
|
|
|
|
|
|
# Note that this sub must be called before checking for preformatted |
|
3345
|
|
|
|
|
|
|
# lines because a table may well have whitespace to the left, in |
|
3346
|
|
|
|
|
|
|
# which case it must not be incorrectly recognised as a preformat. |
|
3347
|
7
|
|
|
|
|
11
|
my @rows = @{$rows_ref}; |
|
|
7
|
|
|
|
|
25
|
|
|
3348
|
7
|
|
|
|
|
10
|
my @starts; |
|
3349
|
|
|
|
|
|
|
my @ends; |
|
3350
|
0
|
|
|
|
|
0
|
my $spaces; |
|
3351
|
7
|
|
|
|
|
11
|
my $max = 0; |
|
3352
|
7
|
|
|
|
|
11
|
my $min = $para_len; |
|
3353
|
7
|
|
|
|
|
17
|
foreach my $row (@rows) |
|
3354
|
|
|
|
|
|
|
{ |
|
3355
|
33
|
|
|
|
|
67
|
($spaces |= $row) =~ tr/ /\xff/c; |
|
3356
|
33
|
100
|
|
|
|
78
|
$min = length $row if length $row < $min; |
|
3357
|
33
|
100
|
|
|
|
83
|
$max = length $row if $max < length $row; |
|
3358
|
|
|
|
|
|
|
} |
|
3359
|
7
|
|
|
|
|
19
|
$spaces = substr $spaces, 0, $min; |
|
3360
|
7
|
100
|
|
|
|
38
|
push(@starts, 0) unless $spaces =~ /^ /; |
|
3361
|
7
|
|
|
|
|
49
|
while ($spaces =~ /((?:^| ) +)(?=[^ ])/g) |
|
3362
|
|
|
|
|
|
|
{ |
|
3363
|
12
|
|
|
|
|
34
|
push @ends, pos($spaces) - length $1; |
|
3364
|
12
|
|
|
|
|
51
|
push @starts, pos($spaces); |
|
3365
|
|
|
|
|
|
|
} |
|
3366
|
7
|
100
|
|
|
|
32
|
shift(@ends) if $spaces =~ /^ /; |
|
3367
|
7
|
|
|
|
|
15
|
push(@ends, $max); |
|
3368
|
|
|
|
|
|
|
|
|
3369
|
|
|
|
|
|
|
# Two or more rows and two or more columns indicate a table. |
|
3370
|
7
|
50
|
33
|
|
|
49
|
if (2 <= @rows and 2 <= @starts) |
|
3371
|
|
|
|
|
|
|
{ |
|
3372
|
7
|
|
|
|
|
18
|
$self->{__mode} |= $TABLE; |
|
3373
|
|
|
|
|
|
|
|
|
3374
|
|
|
|
|
|
|
# For each column, guess whether it should be left, centre or |
|
3375
|
|
|
|
|
|
|
# right aligned by examining all cells in that column for space |
|
3376
|
|
|
|
|
|
|
# to the left or the right. A simple majority among those cells |
|
3377
|
|
|
|
|
|
|
# that actually have space to one side or another decides (if no |
|
3378
|
|
|
|
|
|
|
# alignment gets a majority, left alignment wins by default). |
|
3379
|
7
|
|
|
|
|
11
|
my @align; |
|
3380
|
7
|
|
|
|
|
14
|
my $cell = ''; |
|
3381
|
7
|
|
|
|
|
27
|
foreach my $col (0 .. $#starts) |
|
3382
|
|
|
|
|
|
|
{ |
|
3383
|
15
|
|
|
|
|
36
|
my @count = (0, 0, 0, 0); |
|
3384
|
15
|
|
|
|
|
25
|
foreach my $row (@rows) |
|
3385
|
|
|
|
|
|
|
{ |
|
3386
|
69
|
|
|
|
|
106
|
my $width = $ends[$col] - $starts[$col]; |
|
3387
|
69
|
|
|
|
|
118
|
$cell = substr $row, $starts[$col], $width; |
|
3388
|
69
|
100
|
100
|
|
|
388
|
++$count[($cell =~ /^ / ? 2 : 0) + |
|
|
|
100
|
|
|
|
|
|
|
3389
|
|
|
|
|
|
|
($cell =~ / $/ || length($cell) < $width ? 1 : 0)]; |
|
3390
|
|
|
|
|
|
|
} |
|
3391
|
15
|
|
|
|
|
29
|
$align[$col] = 0; |
|
3392
|
15
|
|
|
|
|
26
|
my $population = $count[1] + $count[2] + $count[3]; |
|
3393
|
15
|
|
|
|
|
27
|
foreach (1 .. 3) |
|
3394
|
|
|
|
|
|
|
{ |
|
3395
|
24
|
100
|
|
|
|
71
|
if ($count[$_] * 2 > $population) |
|
3396
|
|
|
|
|
|
|
{ |
|
3397
|
12
|
|
|
|
|
16
|
$align[$col] = $_; |
|
3398
|
12
|
|
|
|
|
35
|
last; |
|
3399
|
|
|
|
|
|
|
} |
|
3400
|
|
|
|
|
|
|
} |
|
3401
|
|
|
|
|
|
|
} |
|
3402
|
|
|
|
|
|
|
|
|
3403
|
7
|
|
|
|
|
16
|
foreach my $row (@rows) |
|
3404
|
|
|
|
|
|
|
{ |
|
3405
|
69
|
|
|
|
|
161
|
$row = join '', $self->get_tag('tr'), ( |
|
3406
|
|
|
|
|
|
|
map { |
|
3407
|
33
|
|
|
|
|
79
|
$cell = substr $row, $starts[$_], $ends[$_] - $starts[$_]; |
|
3408
|
69
|
|
|
|
|
118
|
$cell =~ s/^ +//; |
|
3409
|
69
|
|
|
|
|
150
|
$cell =~ s/ +$//; |
|
3410
|
|
|
|
|
|
|
|
|
3411
|
69
|
50
|
|
|
|
154
|
if ($self->{escape_HTML_chars}) |
|
3412
|
|
|
|
|
|
|
{ |
|
3413
|
69
|
|
|
|
|
113
|
$cell = escape($cell); |
|
3414
|
|
|
|
|
|
|
} |
|
3415
|
|
|
|
|
|
|
|
|
3416
|
|
|
|
|
|
|
( |
|
3417
|
69
|
50
|
|
|
|
257
|
$self->get_tag( |
|
|
|
100
|
|
|
|
|
|
|
3418
|
|
|
|
|
|
|
'td', |
|
3419
|
|
|
|
|
|
|
inside_tag => ( |
|
3420
|
|
|
|
|
|
|
$self->{xhtml} ? $xhtml_alignments[$align[$_]] |
|
3421
|
|
|
|
|
|
|
: ( |
|
3422
|
|
|
|
|
|
|
$self->{lower_case_tags} |
|
3423
|
|
|
|
|
|
|
? $lc_alignments[$align[$_]] |
|
3424
|
|
|
|
|
|
|
: $alignments[$align[$_]] |
|
3425
|
|
|
|
|
|
|
) |
|
3426
|
|
|
|
|
|
|
) |
|
3427
|
|
|
|
|
|
|
), |
|
3428
|
|
|
|
|
|
|
$cell, |
|
3429
|
|
|
|
|
|
|
$self->close_tag('td') |
|
3430
|
|
|
|
|
|
|
); |
|
3431
|
|
|
|
|
|
|
} 0 .. $#starts |
|
3432
|
|
|
|
|
|
|
), |
|
3433
|
|
|
|
|
|
|
$self->close_tag('tr'); |
|
3434
|
|
|
|
|
|
|
} |
|
3435
|
|
|
|
|
|
|
|
|
3436
|
|
|
|
|
|
|
# put the around the rows
|
3437
|
7
|
|
|
|
|
16
|
my $tag; |
|
3438
|
7
|
100
|
|
|
|
27
|
if ($self->{xhtml}) |
|
3439
|
|
|
|
|
|
|
{ |
|
3440
|
5
|
|
|
|
|
18
|
$tag = $self->get_tag('table', inside_tag => ' summary=""'); |
|
3441
|
|
|
|
|
|
|
} |
|
3442
|
|
|
|
|
|
|
else |
|
3443
|
|
|
|
|
|
|
{ |
|
3444
|
2
|
|
|
|
|
4
|
$tag = $self->get_tag('table'); |
|
3445
|
|
|
|
|
|
|
} |
|
3446
|
7
|
|
|
|
|
24
|
$rows[0] = join("\n", $tag, $rows[0]); |
|
3447
|
7
|
|
|
|
|
22
|
$tag = $self->close_tag('table', tag_type => TAG_END); |
|
3448
|
7
|
|
|
|
|
23
|
$rows[$#rows] .= "\n${tag}"; |
|
3449
|
7
|
|
|
|
|
13
|
@{$rows_ref} = @rows; |
|
|
7
|
|
|
|
|
29
|
|
|
3450
|
7
|
|
|
|
|
59
|
return 1; |
|
3451
|
|
|
|
|
|
|
} |
|
3452
|
|
|
|
|
|
|
else |
|
3453
|
|
|
|
|
|
|
{ |
|
3454
|
0
|
|
|
|
|
0
|
return 0; |
|
3455
|
|
|
|
|
|
|
} |
|
3456
|
|
|
|
|
|
|
} # make_aligned_table |
|
3457
|
|
|
|
|
|
|
|
|
3458
|
|
|
|
|
|
|
=head2 make_pgsql_table |
|
3459
|
|
|
|
|
|
|
|
|
3460
|
|
|
|
|
|
|
$self->make_pgsql_table(rows_ref=>$rows_ref, |
|
3461
|
|
|
|
|
|
|
para_len=>0); |
|
3462
|
|
|
|
|
|
|
|
|
3463
|
|
|
|
|
|
|
Make a PGSQL table. |
|
3464
|
|
|
|
|
|
|
|
|
3465
|
|
|
|
|
|
|
=cut |
|
3466
|
|
|
|
|
|
|
sub make_pgsql_table ($%) |
|
3467
|
|
|
|
|
|
|
{ |
|
3468
|
4
|
|
|
4
|
1
|
11
|
my $self = shift; |
|
3469
|
4
|
|
|
|
|
23
|
my %args = ( |
|
3470
|
|
|
|
|
|
|
rows_ref => undef, |
|
3471
|
|
|
|
|
|
|
para_len => 0, |
|
3472
|
|
|
|
|
|
|
@_ |
|
3473
|
|
|
|
|
|
|
); |
|
3474
|
4
|
|
|
|
|
9
|
my $rows_ref = $args{rows_ref}; |
|
3475
|
4
|
|
|
|
|
10
|
my $para_len = $args{para_len}; |
|
3476
|
|
|
|
|
|
|
|
|
3477
|
|
|
|
|
|
|
# a PGSQL table can start with an optional table-caption, |
|
3478
|
|
|
|
|
|
|
# then it has a row of column headings separated by | |
|
3479
|
|
|
|
|
|
|
# then it has a row of ------+----- |
|
3480
|
|
|
|
|
|
|
# then it has one or more rows of column values separated by | |
|
3481
|
|
|
|
|
|
|
# then it has a row-count (N rows) |
|
3482
|
|
|
|
|
|
|
# Thus it must have at least 4 rows. |
|
3483
|
4
|
|
|
|
|
7
|
my @rows = @{$rows_ref}; |
|
|
4
|
|
|
|
|
26
|
|
|
3484
|
4
|
|
|
|
|
17
|
my $caption = ''; |
|
3485
|
4
|
100
|
66
|
|
|
33
|
if ($rows[0] !~ /\|/ && $rows[0] =~ /^\s*\w+/) # possible caption |
|
3486
|
|
|
|
|
|
|
{ |
|
3487
|
1
|
|
|
|
|
2
|
$caption = shift @rows; |
|
3488
|
|
|
|
|
|
|
} |
|
3489
|
4
|
|
|
|
|
41
|
my @headings = split(/\s+\|\s+/, shift @rows); |
|
3490
|
|
|
|
|
|
|
# skip the ----+--- line |
|
3491
|
4
|
|
|
|
|
11
|
shift @rows; |
|
3492
|
|
|
|
|
|
|
# grab the N rows line |
|
3493
|
4
|
|
|
|
|
9
|
my $n_rows = pop @rows; |
|
3494
|
|
|
|
|
|
|
|
|
3495
|
|
|
|
|
|
|
# now start making the table |
|
3496
|
4
|
|
|
|
|
14
|
my @tab_lines = (); |
|
3497
|
4
|
|
|
|
|
8
|
my $tag; |
|
3498
|
|
|
|
|
|
|
my $tag2; |
|
3499
|
4
|
100
|
|
|
|
19
|
if ($self->{xhtml}) |
|
3500
|
|
|
|
|
|
|
{ |
|
3501
|
3
|
|
|
|
|
16
|
$tag = $self->get_tag('table', inside_tag => ' border="1" summary=""'); |
|
3502
|
|
|
|
|
|
|
} |
|
3503
|
|
|
|
|
|
|
else |
|
3504
|
|
|
|
|
|
|
{ |
|
3505
|
1
|
|
|
|
|
3
|
$tag = $self->get_tag('table', inside_tag => ' border="1"'); |
|
3506
|
|
|
|
|
|
|
} |
|
3507
|
4
|
|
|
|
|
15
|
push @tab_lines, "$tag\n"; |
|
3508
|
4
|
100
|
|
|
|
16
|
if ($caption) |
|
3509
|
|
|
|
|
|
|
{ |
|
3510
|
1
|
|
|
|
|
4
|
$caption =~ s/^\s+//; |
|
3511
|
1
|
|
|
|
|
6
|
$caption =~ s/\s+$//; |
|
3512
|
1
|
|
|
|
|
4
|
$tag = $self->get_tag('caption'); |
|
3513
|
1
|
|
|
|
|
7
|
$tag2 = $self->close_tag('caption'); |
|
3514
|
1
|
|
|
|
|
5
|
$caption = join('', $tag, $caption, $tag2, "\n"); |
|
3515
|
1
|
|
|
|
|
2
|
push @tab_lines, $caption; |
|
3516
|
|
|
|
|
|
|
} |
|
3517
|
|
|
|
|
|
|
# table header |
|
3518
|
4
|
|
|
|
|
9
|
my $thead = ''; |
|
3519
|
4
|
|
|
|
|
15
|
$tag = $self->get_tag('thead'); |
|
3520
|
4
|
|
|
|
|
8
|
$thead .= $tag; |
|
3521
|
4
|
|
|
|
|
30
|
$tag = $self->get_tag('tr'); |
|
3522
|
4
|
|
|
|
|
10
|
$thead .= $tag; |
|
3523
|
4
|
|
|
|
|
13
|
foreach my $col (@headings) |
|
3524
|
|
|
|
|
|
|
{ |
|
3525
|
18
|
|
|
|
|
46
|
$col =~ s/^\s+//; |
|
3526
|
18
|
|
|
|
|
37
|
$col =~ s/\s+$//; |
|
3527
|
18
|
|
|
|
|
40
|
$tag = $self->get_tag('th'); |
|
3528
|
18
|
|
|
|
|
81
|
$tag2 = $self->close_tag('th'); |
|
3529
|
18
|
|
|
|
|
53
|
$thead .= join('', $tag, $col, $tag2); |
|
3530
|
|
|
|
|
|
|
} |
|
3531
|
4
|
|
|
|
|
13
|
$tag = $self->close_tag('tr'); |
|
3532
|
4
|
|
|
|
|
11
|
$thead .= $tag; |
|
3533
|
4
|
|
|
|
|
13
|
$tag = $self->close_tag('thead'); |
|
3534
|
4
|
|
|
|
|
8
|
$thead .= $tag; |
|
3535
|
4
|
|
|
|
|
12
|
push @tab_lines, "${thead}\n"; |
|
3536
|
4
|
|
|
|
|
12
|
$tag = $self->get_tag('tbody'); |
|
3537
|
4
|
|
|
|
|
12
|
push @tab_lines, "$tag\n"; |
|
3538
|
|
|
|
|
|
|
|
|
3539
|
|
|
|
|
|
|
# each row |
|
3540
|
4
|
|
|
|
|
10
|
foreach my $row (@rows) |
|
3541
|
|
|
|
|
|
|
{ |
|
3542
|
141
|
|
|
|
|
175
|
my $this_row = ''; |
|
3543
|
141
|
|
|
|
|
258
|
$tag = $self->get_tag('tr'); |
|
3544
|
141
|
|
|
|
|
173
|
$this_row .= $tag; |
|
3545
|
141
|
|
|
|
|
837
|
my @cols = split(/\|/, $row); |
|
3546
|
141
|
|
|
|
|
252
|
foreach my $cell (@cols) |
|
3547
|
|
|
|
|
|
|
{ |
|
3548
|
1274
|
|
|
|
|
3646
|
$cell =~ s/^\s+//; |
|
3549
|
1274
|
|
|
|
|
2665
|
$cell =~ s/\s+$//; |
|
3550
|
1274
|
50
|
|
|
|
2590
|
if ($self->{escape_HTML_chars}) |
|
3551
|
|
|
|
|
|
|
{ |
|
3552
|
1274
|
|
|
|
|
2067
|
$cell = escape($cell); |
|
3553
|
|
|
|
|
|
|
} |
|
3554
|
1274
|
100
|
|
|
|
2326
|
if (!$cell) |
|
3555
|
|
|
|
|
|
|
{ |
|
3556
|
422
|
|
|
|
|
486
|
$cell = ' '; |
|
3557
|
|
|
|
|
|
|
} |
|
3558
|
1274
|
|
|
|
|
2193
|
$tag = $self->get_tag('td'); |
|
3559
|
1274
|
|
|
|
|
2281
|
$tag2 = $self->close_tag('td'); |
|
3560
|
1274
|
|
|
|
|
2517
|
$this_row .= join('', $tag, $cell, $tag2); |
|
3561
|
|
|
|
|
|
|
} |
|
3562
|
141
|
|
|
|
|
301
|
$tag = $self->close_tag('tr'); |
|
3563
|
141
|
|
|
|
|
171
|
$this_row .= $tag; |
|
3564
|
141
|
|
|
|
|
685
|
push @tab_lines, "${this_row}\n"; |
|
3565
|
|
|
|
|
|
|
} |
|
3566
|
|
|
|
|
|
|
|
|
3567
|
|
|
|
|
|
|
# end the table |
|
3568
|
4
|
|
|
|
|
16
|
$tag = $self->close_tag('tbody'); |
|
3569
|
4
|
|
|
|
|
15
|
push @tab_lines, "$tag\n"; |
|
3570
|
4
|
|
|
|
|
17
|
$tag = $self->get_tag('table', tag_type => TAG_END); |
|
3571
|
4
|
|
|
|
|
14
|
push @tab_lines, "$tag\n"; |
|
3572
|
|
|
|
|
|
|
|
|
3573
|
|
|
|
|
|
|
# and add the N rows line |
|
3574
|
4
|
|
|
|
|
14
|
$tag = $self->get_tag('p'); |
|
3575
|
4
|
|
|
|
|
14
|
push @tab_lines, "${tag}${n_rows}\n"; |
|
3576
|
4
|
100
|
|
|
|
19
|
if ($self->{xhtml}) |
|
3577
|
|
|
|
|
|
|
{ |
|
3578
|
3
|
|
|
|
|
12
|
$tag = $self->get_tag('p', tag_type => TAG_END); |
|
3579
|
3
|
|
|
|
|
481
|
$tab_lines[$#tab_lines] =~ s/\n/${tag}\n/; |
|
3580
|
|
|
|
|
|
|
} |
|
3581
|
|
|
|
|
|
|
|
|
3582
|
|
|
|
|
|
|
# replace the rows |
|
3583
|
4
|
|
|
|
|
12
|
@{$rows_ref} = @tab_lines; |
|
|
4
|
|
|
|
|
164
|
|
|
3584
|
|
|
|
|
|
|
} # make_pgsql_table |
|
3585
|
|
|
|
|
|
|
|
|
3586
|
|
|
|
|
|
|
=head2 make_border_table |
|
3587
|
|
|
|
|
|
|
|
|
3588
|
|
|
|
|
|
|
$self->make_border_table(rows_ref=>$rows_ref, |
|
3589
|
|
|
|
|
|
|
para_len=>0); |
|
3590
|
|
|
|
|
|
|
|
|
3591
|
|
|
|
|
|
|
Make a BORDER table. |
|
3592
|
|
|
|
|
|
|
|
|
3593
|
|
|
|
|
|
|
=cut |
|
3594
|
|
|
|
|
|
|
sub make_border_table ($%) |
|
3595
|
|
|
|
|
|
|
{ |
|
3596
|
4
|
|
|
4
|
1
|
8
|
my $self = shift; |
|
3597
|
4
|
|
|
|
|
18
|
my %args = ( |
|
3598
|
|
|
|
|
|
|
rows_ref => undef, |
|
3599
|
|
|
|
|
|
|
para_len => 0, |
|
3600
|
|
|
|
|
|
|
@_ |
|
3601
|
|
|
|
|
|
|
); |
|
3602
|
4
|
|
|
|
|
10
|
my $rows_ref = $args{rows_ref}; |
|
3603
|
4
|
|
|
|
|
6
|
my $para_len = $args{para_len}; |
|
3604
|
|
|
|
|
|
|
|
|
3605
|
|
|
|
|
|
|
# a BORDER table can start with an optional table-caption, |
|
3606
|
|
|
|
|
|
|
# then it has a row of +------+-----+ |
|
3607
|
|
|
|
|
|
|
# then it has a row of column headings separated by | |
|
3608
|
|
|
|
|
|
|
# then it has a row of +------+-----+ |
|
3609
|
|
|
|
|
|
|
# then it has one or more rows of column values separated by | |
|
3610
|
|
|
|
|
|
|
# then it has a row of +------+-----+ |
|
3611
|
4
|
|
|
|
|
7
|
my @rows = @{$rows_ref}; |
|
|
4
|
|
|
|
|
17
|
|
|
3612
|
4
|
|
|
|
|
9
|
my $caption = ''; |
|
3613
|
4
|
50
|
33
|
|
|
39
|
if ($rows[0] !~ /\|/ && $rows[0] =~ /^\s*\w+/) # possible caption |
|
3614
|
|
|
|
|
|
|
{ |
|
3615
|
0
|
|
|
|
|
0
|
$caption = shift @rows; |
|
3616
|
|
|
|
|
|
|
} |
|
3617
|
|
|
|
|
|
|
# skip the +----+---+ line |
|
3618
|
4
|
|
|
|
|
8
|
shift @rows; |
|
3619
|
|
|
|
|
|
|
# get the head row and cut off the start and end | |
|
3620
|
4
|
|
|
|
|
36
|
my $head_row = shift @rows; |
|
3621
|
4
|
|
|
|
|
17
|
$head_row =~ s/^\s*\|//; |
|
3622
|
4
|
|
|
|
|
16
|
$head_row =~ s/\|$//; |
|
3623
|
4
|
|
|
|
|
27
|
my @headings = split(/\s+\|\s+/, $head_row); |
|
3624
|
|
|
|
|
|
|
# skip the +----+---+ line |
|
3625
|
4
|
|
|
|
|
8
|
shift @rows; |
|
3626
|
|
|
|
|
|
|
# skip the last +----+---+ line |
|
3627
|
4
|
|
|
|
|
7
|
pop @rows; |
|
3628
|
|
|
|
|
|
|
|
|
3629
|
|
|
|
|
|
|
# now start making the table |
|
3630
|
4
|
|
|
|
|
9
|
my @tab_lines = (); |
|
3631
|
4
|
|
|
|
|
5
|
my $tag; |
|
3632
|
4
|
100
|
|
|
|
15
|
if ($self->{xhtml}) |
|
3633
|
|
|
|
|
|
|
{ |
|
3634
|
3
|
|
|
|
|
16
|
$tag = $self->get_tag('table', inside_tag => ' border="1" summary=""'); |
|
3635
|
|
|
|
|
|
|
} |
|
3636
|
|
|
|
|
|
|
else |
|
3637
|
|
|
|
|
|
|
{ |
|
3638
|
1
|
|
|
|
|
4
|
$tag = $self->get_tag('table', inside_tag => ' border="1"'); |
|
3639
|
|
|
|
|
|
|
} |
|
3640
|
4
|
|
|
|
|
13
|
push @tab_lines, "$tag\n"; |
|
3641
|
4
|
50
|
|
|
|
13
|
if ($caption) |
|
3642
|
|
|
|
|
|
|
{ |
|
3643
|
0
|
|
|
|
|
0
|
$caption =~ s/^\s+//; |
|
3644
|
0
|
|
|
|
|
0
|
$caption =~ s/\s+$//; |
|
3645
|
0
|
|
|
|
|
0
|
$tag = $self->get_tag('caption'); |
|
3646
|
0
|
|
|
|
|
0
|
$caption = $tag . $caption; |
|
3647
|
0
|
|
|
|
|
0
|
$tag = $self->close_tag('caption'); |
|
3648
|
0
|
|
|
|
|
0
|
$caption .= $tag; |
|
3649
|
0
|
|
|
|
|
0
|
push @tab_lines, "$caption\n"; |
|
3650
|
|
|
|
|
|
|
} |
|
3651
|
|
|
|
|
|
|
# table header |
|
3652
|
4
|
|
|
|
|
7
|
my $thead = ''; |
|
3653
|
4
|
|
|
|
|
13
|
$tag = $self->get_tag('thead'); |
|
3654
|
4
|
|
|
|
|
12
|
$thead .= $tag; |
|
3655
|
4
|
|
|
|
|
13
|
$tag = $self->get_tag('tr'); |
|
3656
|
4
|
|
|
|
|
8
|
$thead .= $tag; |
|
3657
|
4
|
|
|
|
|
10
|
foreach my $col (@headings) |
|
3658
|
|
|
|
|
|
|
{ |
|
3659
|
12
|
|
|
|
|
31
|
$col =~ s/^\s+//; |
|
3660
|
12
|
|
|
|
|
31
|
$col =~ s/\s+$//; |
|
3661
|
12
|
|
|
|
|
24
|
$tag = $self->get_tag('th'); |
|
3662
|
12
|
|
|
|
|
16
|
$thead .= $tag; |
|
3663
|
12
|
|
|
|
|
16
|
$thead .= $col; |
|
3664
|
12
|
|
|
|
|
27
|
$tag = $self->close_tag('th'); |
|
3665
|
12
|
|
|
|
|
27
|
$thead .= $tag; |
|
3666
|
|
|
|
|
|
|
} |
|
3667
|
4
|
|
|
|
|
13
|
$tag = $self->close_tag('tr'); |
|
3668
|
4
|
|
|
|
|
8
|
$thead .= $tag; |
|
3669
|
4
|
|
|
|
|
11
|
$tag = $self->close_tag('thead'); |
|
3670
|
4
|
|
|
|
|
6
|
$thead .= $tag; |
|
3671
|
4
|
|
|
|
|
10
|
push @tab_lines, "${thead}\n"; |
|
3672
|
4
|
|
|
|
|
14
|
$tag = $self->get_tag('tbody'); |
|
3673
|
4
|
|
|
|
|
11
|
push @tab_lines, "$tag\n"; |
|
3674
|
|
|
|
|
|
|
|
|
3675
|
|
|
|
|
|
|
# each row |
|
3676
|
4
|
|
|
|
|
10
|
foreach my $row (@rows) |
|
3677
|
|
|
|
|
|
|
{ |
|
3678
|
|
|
|
|
|
|
# cut off the start and end | |
|
3679
|
32
|
|
|
|
|
121
|
$row =~ s/^\s*\|//; |
|
3680
|
32
|
|
|
|
|
88
|
$row =~ s/\|$//; |
|
3681
|
32
|
|
|
|
|
45
|
my $this_row = ''; |
|
3682
|
32
|
|
|
|
|
60
|
$tag = $self->get_tag('tr'); |
|
3683
|
32
|
|
|
|
|
40
|
$this_row .= $tag; |
|
3684
|
32
|
|
|
|
|
99
|
my @cols = split(/\|/, $row); |
|
3685
|
32
|
|
|
|
|
71
|
foreach my $cell (@cols) |
|
3686
|
|
|
|
|
|
|
{ |
|
3687
|
112
|
|
|
|
|
288
|
$cell =~ s/^\s+//; |
|
3688
|
112
|
|
|
|
|
309
|
$cell =~ s/\s+$//; |
|
3689
|
112
|
50
|
|
|
|
230
|
if ($self->{escape_HTML_chars}) |
|
3690
|
|
|
|
|
|
|
{ |
|
3691
|
112
|
|
|
|
|
188
|
$cell = escape($cell); |
|
3692
|
|
|
|
|
|
|
} |
|
3693
|
112
|
50
|
|
|
|
212
|
if (!$cell) |
|
3694
|
|
|
|
|
|
|
{ |
|
3695
|
0
|
|
|
|
|
0
|
$cell = ' '; |
|
3696
|
|
|
|
|
|
|
} |
|
3697
|
112
|
|
|
|
|
203
|
$tag = $self->get_tag('td'); |
|
3698
|
112
|
|
|
|
|
139
|
$this_row .= $tag; |
|
3699
|
112
|
|
|
|
|
124
|
$this_row .= $cell; |
|
3700
|
112
|
|
|
|
|
197
|
$tag = $self->close_tag('td'); |
|
3701
|
112
|
|
|
|
|
189
|
$this_row .= $tag; |
|
3702
|
|
|
|
|
|
|
} |
|
3703
|
32
|
|
|
|
|
60
|
$tag = $self->close_tag('tr'); |
|
3704
|
32
|
|
|
|
|
37
|
$this_row .= $tag; |
|
3705
|
32
|
|
|
|
|
116
|
push @tab_lines, "${this_row}\n"; |
|
3706
|
|
|
|
|
|
|
} |
|
3707
|
|
|
|
|
|
|
|
|
3708
|
|
|
|
|
|
|
# end the table |
|
3709
|
4
|
|
|
|
|
16
|
$tag = $self->close_tag('tbody'); |
|
3710
|
4
|
|
|
|
|
13
|
push @tab_lines, "$tag\n"; |
|
3711
|
4
|
|
|
|
|
14
|
$tag = $self->get_tag('table', tag_type => TAG_END); |
|
3712
|
4
|
|
|
|
|
12
|
push @tab_lines, "$tag\n"; |
|
3713
|
|
|
|
|
|
|
|
|
3714
|
|
|
|
|
|
|
# replace the rows |
|
3715
|
4
|
|
|
|
|
8
|
@{$rows_ref} = @tab_lines; |
|
|
4
|
|
|
|
|
83
|
|
|
3716
|
|
|
|
|
|
|
} # make_border_table |
|
3717
|
|
|
|
|
|
|
|
|
3718
|
|
|
|
|
|
|
=head2 make_delim_table |
|
3719
|
|
|
|
|
|
|
|
|
3720
|
|
|
|
|
|
|
$self->make_delim_table(rows_ref=>$rows_ref, |
|
3721
|
|
|
|
|
|
|
para_len=>0); |
|
3722
|
|
|
|
|
|
|
|
|
3723
|
|
|
|
|
|
|
Make a Delimited table. |
|
3724
|
|
|
|
|
|
|
|
|
3725
|
|
|
|
|
|
|
=cut |
|
3726
|
|
|
|
|
|
|
sub make_delim_table ($%) |
|
3727
|
|
|
|
|
|
|
{ |
|
3728
|
7
|
|
|
7
|
1
|
15
|
my $self = shift; |
|
3729
|
7
|
|
|
|
|
30
|
my %args = ( |
|
3730
|
|
|
|
|
|
|
rows_ref => undef, |
|
3731
|
|
|
|
|
|
|
para_len => 0, |
|
3732
|
|
|
|
|
|
|
@_ |
|
3733
|
|
|
|
|
|
|
); |
|
3734
|
7
|
|
|
|
|
17
|
my $rows_ref = $args{rows_ref}; |
|
3735
|
7
|
|
|
|
|
12
|
my $para_len = $args{para_len}; |
|
3736
|
|
|
|
|
|
|
|
|
3737
|
|
|
|
|
|
|
# a DELIM table can start with an optional table-caption, |
|
3738
|
|
|
|
|
|
|
# then it has at least two rows which start and end and are |
|
3739
|
|
|
|
|
|
|
# punctuated by a non-alphanumeric delimiter. |
|
3740
|
|
|
|
|
|
|
# A DELIM table has no table-header. |
|
3741
|
7
|
|
|
|
|
13
|
my @rows = @{$rows_ref}; |
|
|
7
|
|
|
|
|
20
|
|
|
3742
|
7
|
|
|
|
|
15
|
my $caption = ''; |
|
3743
|
7
|
100
|
100
|
|
|
55
|
if ($rows[0] !~ /\|/ && $rows[0] =~ /^\s*\w+/) # possible caption |
|
3744
|
|
|
|
|
|
|
{ |
|
3745
|
1
|
|
|
|
|
3
|
$caption = shift @rows; |
|
3746
|
|
|
|
|
|
|
} |
|
3747
|
|
|
|
|
|
|
# figure out the delimiter |
|
3748
|
7
|
|
|
|
|
12
|
my $delim = ''; |
|
3749
|
7
|
50
|
|
|
|
35
|
if ($rows[0] =~ /^\s*([^[:alnum:]])/) |
|
3750
|
|
|
|
|
|
|
{ |
|
3751
|
7
|
|
|
|
|
25
|
$delim = $1; |
|
3752
|
|
|
|
|
|
|
} |
|
3753
|
|
|
|
|
|
|
else |
|
3754
|
|
|
|
|
|
|
{ |
|
3755
|
0
|
|
|
|
|
0
|
return 0; |
|
3756
|
|
|
|
|
|
|
} |
|
3757
|
|
|
|
|
|
|
|
|
3758
|
|
|
|
|
|
|
# now start making the table |
|
3759
|
7
|
|
|
|
|
16
|
my @tab_lines = (); |
|
3760
|
7
|
|
|
|
|
10
|
my $tag; |
|
3761
|
7
|
100
|
|
|
|
22
|
if ($self->{xhtml}) |
|
3762
|
|
|
|
|
|
|
{ |
|
3763
|
5
|
|
|
|
|
16
|
$tag = $self->get_tag('table', inside_tag => ' border="1" summary=""'); |
|
3764
|
|
|
|
|
|
|
} |
|
3765
|
|
|
|
|
|
|
else |
|
3766
|
|
|
|
|
|
|
{ |
|
3767
|
2
|
|
|
|
|
5
|
$tag = $self->get_tag('table', inside_tag => ' border="1"'); |
|
3768
|
|
|
|
|
|
|
} |
|
3769
|
7
|
|
|
|
|
19
|
push @tab_lines, "$tag\n"; |
|
3770
|
7
|
100
|
|
|
|
20
|
if ($caption) |
|
3771
|
|
|
|
|
|
|
{ |
|
3772
|
1
|
|
|
|
|
4
|
$caption =~ s/^\s+//; |
|
3773
|
1
|
|
|
|
|
6
|
$caption =~ s/\s+$//; |
|
3774
|
1
|
|
|
|
|
5
|
$tag = $self->get_tag('caption'); |
|
3775
|
1
|
|
|
|
|
3
|
$caption = $tag . $caption; |
|
3776
|
1
|
|
|
|
|
4
|
$tag = $self->close_tag('caption'); |
|
3777
|
1
|
|
|
|
|
3
|
$caption .= $tag; |
|
3778
|
1
|
|
|
|
|
3
|
push @tab_lines, "$caption\n"; |
|
3779
|
|
|
|
|
|
|
} |
|
3780
|
|
|
|
|
|
|
|
|
3781
|
|
|
|
|
|
|
# each row |
|
3782
|
7
|
|
|
|
|
17
|
foreach my $row (@rows) |
|
3783
|
|
|
|
|
|
|
{ |
|
3784
|
|
|
|
|
|
|
# cut off the start and end delimiter |
|
3785
|
30
|
|
|
|
|
245
|
$row =~ s/^\s*[${delim}]//; |
|
3786
|
30
|
|
|
|
|
153
|
$row =~ s/[${delim}]$//; |
|
3787
|
30
|
|
|
|
|
46
|
my $this_row = ''; |
|
3788
|
30
|
|
|
|
|
63
|
$tag = $self->get_tag('tr'); |
|
3789
|
30
|
|
|
|
|
39
|
$this_row .= $tag; |
|
3790
|
30
|
|
|
|
|
162
|
my @cols = split(/[${delim}]/, $row); |
|
3791
|
30
|
|
|
|
|
55
|
foreach my $cell (@cols) |
|
3792
|
|
|
|
|
|
|
{ |
|
3793
|
95
|
|
|
|
|
221
|
$cell =~ s/^\s+//; |
|
3794
|
95
|
|
|
|
|
243
|
$cell =~ s/\s+$//; |
|
3795
|
95
|
50
|
|
|
|
218
|
if ($self->{escape_HTML_chars}) |
|
3796
|
|
|
|
|
|
|
{ |
|
3797
|
95
|
|
|
|
|
155
|
$cell = escape($cell); |
|
3798
|
|
|
|
|
|
|
} |
|
3799
|
95
|
100
|
|
|
|
190
|
if (!$cell) |
|
3800
|
|
|
|
|
|
|
{ |
|
3801
|
1
|
|
|
|
|
3
|
$cell = ' '; |
|
3802
|
|
|
|
|
|
|
} |
|
3803
|
95
|
|
|
|
|
180
|
$tag = $self->get_tag('td'); |
|
3804
|
95
|
|
|
|
|
174
|
$this_row .= $tag; |
|
3805
|
95
|
|
|
|
|
108
|
$this_row .= $cell; |
|
3806
|
95
|
|
|
|
|
160
|
$tag = $self->close_tag('td'); |
|
3807
|
95
|
|
|
|
|
282
|
$this_row .= $tag; |
|
3808
|
|
|
|
|
|
|
} |
|
3809
|
30
|
|
|
|
|
65
|
$tag = $self->close_tag('tr'); |
|
3810
|
30
|
|
|
|
|
41
|
$this_row .= $tag; |
|
3811
|
30
|
|
|
|
|
107
|
push @tab_lines, "${this_row}\n"; |
|
3812
|
|
|
|
|
|
|
} |
|
3813
|
|
|
|
|
|
|
|
|
3814
|
|
|
|
|
|
|
# end the table |
|
3815
|
7
|
|
|
|
|
21
|
$tag = $self->get_tag('table', tag_type => TAG_END); |
|
3816
|
7
|
|
|
|
|
18
|
push @tab_lines, "$tag\n"; |
|
3817
|
|
|
|
|
|
|
|
|
3818
|
|
|
|
|
|
|
# replace the rows |
|
3819
|
7
|
|
|
|
|
11
|
@{$rows_ref} = @tab_lines; |
|
|
7
|
|
|
|
|
71
|
|
|
3820
|
|
|
|
|
|
|
} # make_delim_table |
|
3821
|
|
|
|
|
|
|
|
|
3822
|
|
|
|
|
|
|
=head2 is_preformatted |
|
3823
|
|
|
|
|
|
|
|
|
3824
|
|
|
|
|
|
|
if ($self->is_preformatted($line)) |
|
3825
|
|
|
|
|
|
|
{ |
|
3826
|
|
|
|
|
|
|
... |
|
3827
|
|
|
|
|
|
|
} |
|
3828
|
|
|
|
|
|
|
|
|
3829
|
|
|
|
|
|
|
Returns true if the passed string is considered to be preformatted. |
|
3830
|
|
|
|
|
|
|
|
|
3831
|
|
|
|
|
|
|
=cut |
|
3832
|
|
|
|
|
|
|
sub is_preformatted ($$) |
|
3833
|
|
|
|
|
|
|
{ |
|
3834
|
921
|
|
|
921
|
1
|
1079
|
my $self = shift; |
|
3835
|
921
|
|
|
|
|
1328
|
my $line = shift; |
|
3836
|
|
|
|
|
|
|
|
|
3837
|
921
|
|
|
|
|
1288
|
my $pre_white_min = $self->{preformat_whitespace_min}; |
|
3838
|
921
|
|
66
|
|
|
5934
|
my $result = ( |
|
3839
|
|
|
|
|
|
|
($line =~ /\s{$pre_white_min,}\S+/o) # whitespaces |
|
3840
|
|
|
|
|
|
|
|| ($line =~ /\.{$pre_white_min,}\S+/o) |
|
3841
|
|
|
|
|
|
|
); # dots |
|
3842
|
921
|
|
|
|
|
4537
|
return $result; |
|
3843
|
|
|
|
|
|
|
} |
|
3844
|
|
|
|
|
|
|
|
|
3845
|
|
|
|
|
|
|
=head2 split_end_explicit_preformat |
|
3846
|
|
|
|
|
|
|
|
|
3847
|
|
|
|
|
|
|
$front = $self->split_end_explicit_preformat(para_ref=>$para_ref); |
|
3848
|
|
|
|
|
|
|
|
|
3849
|
|
|
|
|
|
|
Modifies the given string, and returns the front preformatted part. |
|
3850
|
|
|
|
|
|
|
|
|
3851
|
|
|
|
|
|
|
=cut |
|
3852
|
|
|
|
|
|
|
sub split_end_explicit_preformat ($%) |
|
3853
|
|
|
|
|
|
|
{ |
|
3854
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
3855
|
0
|
|
|
|
|
0
|
my %args = ( |
|
3856
|
|
|
|
|
|
|
para_ref => undef, |
|
3857
|
|
|
|
|
|
|
@_ |
|
3858
|
|
|
|
|
|
|
); |
|
3859
|
0
|
|
|
|
|
0
|
my $para_ref = $args{para_ref}; |
|
3860
|
|
|
|
|
|
|
|
|
3861
|
0
|
|
|
|
|
0
|
my $tag = ''; |
|
3862
|
0
|
|
|
|
|
0
|
my $pre_str = ''; |
|
3863
|
0
|
|
|
|
|
0
|
my $post_str = ''; |
|
3864
|
0
|
0
|
|
|
|
0
|
if ($self->{__mode} & $PRE_EXPLICIT) |
|
3865
|
|
|
|
|
|
|
{ |
|
3866
|
0
|
|
|
|
|
0
|
my $pe_mark = $self->{preformat_end_marker}; |
|
3867
|
0
|
0
|
|
|
|
0
|
if (${para_ref} =~ /$pe_mark/io) |
|
3868
|
|
|
|
|
|
|
{ |
|
3869
|
0
|
|
|
|
|
0
|
($pre_str, $post_str) = split(/$pe_mark/, ${$para_ref}, 2); |
|
|
0
|
|
|
|
|
0
|
|
|
3870
|
0
|
0
|
|
|
|
0
|
if ($self->{escape_HTML_chars}) |
|
3871
|
|
|
|
|
|
|
{ |
|
3872
|
0
|
|
|
|
|
0
|
$pre_str = escape($pre_str); |
|
3873
|
|
|
|
|
|
|
} |
|
3874
|
0
|
|
|
|
|
0
|
$tag = $self->close_tag('pre'); |
|
3875
|
0
|
|
|
|
|
0
|
$pre_str .= "${tag}\n"; |
|
3876
|
0
|
|
|
|
|
0
|
$self->{__mode} ^= (($PRE | $PRE_EXPLICIT) & $self->{__mode}); |
|
3877
|
|
|
|
|
|
|
} |
|
3878
|
|
|
|
|
|
|
else # no end -- the whole thing is preformatted |
|
3879
|
|
|
|
|
|
|
{ |
|
3880
|
0
|
|
|
|
|
0
|
$pre_str = ${$para_ref}; |
|
|
0
|
|
|
|
|
0
|
|
|
3881
|
0
|
0
|
|
|
|
0
|
if ($self->{escape_HTML_chars}) |
|
3882
|
|
|
|
|
|
|
{ |
|
3883
|
0
|
|
|
|
|
0
|
$pre_str = escape($pre_str); |
|
3884
|
|
|
|
|
|
|
} |
|
3885
|
0
|
|
|
|
|
0
|
${$para_ref} = ''; |
|
|
0
|
|
|
|
|
0
|
|
|
3886
|
|
|
|
|
|
|
} |
|
3887
|
|
|
|
|
|
|
} |
|
3888
|
0
|
|
|
|
|
0
|
return $pre_str; |
|
3889
|
|
|
|
|
|
|
} # split_end_explicit_preformat |
|
3890
|
|
|
|
|
|
|
|
|
3891
|
|
|
|
|
|
|
=head2 endpreformat |
|
3892
|
|
|
|
|
|
|
|
|
3893
|
|
|
|
|
|
|
$self->endpreformat(para_lines_ref=>$para_lines_ref, |
|
3894
|
|
|
|
|
|
|
para_action_ref=>$para_action_ref, |
|
3895
|
|
|
|
|
|
|
ind=>0, |
|
3896
|
|
|
|
|
|
|
prev_ref=>$prev_ref); |
|
3897
|
|
|
|
|
|
|
|
|
3898
|
|
|
|
|
|
|
End a preformatted section. |
|
3899
|
|
|
|
|
|
|
|
|
3900
|
|
|
|
|
|
|
=cut |
|
3901
|
|
|
|
|
|
|
sub endpreformat ($%) |
|
3902
|
|
|
|
|
|
|
{ |
|
3903
|
85
|
|
|
85
|
1
|
104
|
my $self = shift; |
|
3904
|
85
|
|
|
|
|
387
|
my %args = ( |
|
3905
|
|
|
|
|
|
|
para_lines_ref => undef, |
|
3906
|
|
|
|
|
|
|
para_action_ref => undef, |
|
3907
|
|
|
|
|
|
|
ind => 0, |
|
3908
|
|
|
|
|
|
|
prev_ref => undef, |
|
3909
|
|
|
|
|
|
|
@_ |
|
3910
|
|
|
|
|
|
|
); |
|
3911
|
85
|
|
|
|
|
130
|
my $para_lines_ref = $args{para_lines_ref}; |
|
3912
|
85
|
|
|
|
|
105
|
my $para_action_ref = $args{para_action_ref}; |
|
3913
|
85
|
|
|
|
|
100
|
my $ind = $args{ind}; |
|
3914
|
85
|
|
|
|
|
98
|
my $prev_ref = $args{prev_ref}; |
|
3915
|
|
|
|
|
|
|
|
|
3916
|
85
|
|
|
|
|
99
|
my $tag = ''; |
|
3917
|
85
|
100
|
|
|
|
198
|
if ($self->{__mode} & $PRE_EXPLICIT) |
|
3918
|
|
|
|
|
|
|
{ |
|
3919
|
5
|
|
|
|
|
7
|
my $pe_mark = $self->{preformat_end_marker}; |
|
3920
|
5
|
100
|
|
|
|
57
|
if ($para_lines_ref->[$ind] =~ /$pe_mark/io) |
|
3921
|
|
|
|
|
|
|
{ |
|
3922
|
1
|
50
|
|
|
|
4
|
if ($ind == 0) |
|
3923
|
|
|
|
|
|
|
{ |
|
3924
|
0
|
|
|
|
|
0
|
$tag = $self->close_tag('pre'); |
|
3925
|
0
|
|
|
|
|
0
|
$para_lines_ref->[$ind] = "${tag}\n"; |
|
3926
|
|
|
|
|
|
|
} |
|
3927
|
|
|
|
|
|
|
else |
|
3928
|
|
|
|
|
|
|
{ |
|
3929
|
1
|
|
|
|
|
5
|
$tag = $self->close_tag('pre'); |
|
3930
|
1
|
|
|
|
|
4
|
$para_lines_ref->[$ind - 1] .= "${tag}\n"; |
|
3931
|
1
|
|
|
|
|
2
|
$para_lines_ref->[$ind] = ""; |
|
3932
|
|
|
|
|
|
|
} |
|
3933
|
1
|
|
|
|
|
4
|
$self->{__mode} ^= (($PRE | $PRE_EXPLICIT) & $self->{__mode}); |
|
3934
|
1
|
|
|
|
|
2
|
$para_action_ref->[$ind] |= $END; |
|
3935
|
|
|
|
|
|
|
} |
|
3936
|
5
|
|
|
|
|
16
|
return; |
|
3937
|
|
|
|
|
|
|
} |
|
3938
|
|
|
|
|
|
|
|
|
3939
|
80
|
50
|
33
|
|
|
175
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
3940
|
|
|
|
|
|
|
!$self->is_preformatted($para_lines_ref->[$ind]) |
|
3941
|
|
|
|
|
|
|
&& ( |
|
3942
|
|
|
|
|
|
|
$self->{endpreformat_trigger_lines} == 1 |
|
3943
|
|
|
|
|
|
|
|| ($ind + 1 < @{$para_lines_ref} |
|
3944
|
|
|
|
|
|
|
&& !$self->is_preformatted($para_lines_ref->[$ind + 1])) |
|
3945
|
|
|
|
|
|
|
|| $ind + 1 >= @{$para_lines_ref} # last line of para |
|
3946
|
|
|
|
|
|
|
) |
|
3947
|
|
|
|
|
|
|
) |
|
3948
|
|
|
|
|
|
|
{ |
|
3949
|
4
|
50
|
|
|
|
23
|
if ($ind == 0) |
|
3950
|
|
|
|
|
|
|
{ |
|
3951
|
0
|
|
|
|
|
0
|
$tag = $self->close_tag('pre'); |
|
3952
|
0
|
|
|
|
|
0
|
${$prev_ref} = "${tag}\n"; |
|
|
0
|
|
|
|
|
0
|
|
|
3953
|
|
|
|
|
|
|
} |
|
3954
|
|
|
|
|
|
|
else |
|
3955
|
|
|
|
|
|
|
{ |
|
3956
|
4
|
|
|
|
|
11
|
$tag = $self->close_tag('pre'); |
|
3957
|
4
|
|
|
|
|
12
|
$para_lines_ref->[$ind - 1] .= "${tag}\n"; |
|
3958
|
|
|
|
|
|
|
} |
|
3959
|
4
|
|
|
|
|
9
|
$self->{__mode} ^= ($PRE & $self->{__mode}); |
|
3960
|
4
|
|
|
|
|
15
|
$para_action_ref->[$ind] |= $END; |
|
3961
|
|
|
|
|
|
|
} |
|
3962
|
|
|
|
|
|
|
} # endpreformat |
|
3963
|
|
|
|
|
|
|
|
|
3964
|
|
|
|
|
|
|
=head2 preformat |
|
3965
|
|
|
|
|
|
|
|
|
3966
|
|
|
|
|
|
|
$self->preformat(mode_ref=>$mode_ref, |
|
3967
|
|
|
|
|
|
|
line_ref=>$line_ref, |
|
3968
|
|
|
|
|
|
|
line_action_ref=>$line_action_ref, |
|
3969
|
|
|
|
|
|
|
prev_ref=>$prev_ref, |
|
3970
|
|
|
|
|
|
|
next_ref=>$next_ref, |
|
3971
|
|
|
|
|
|
|
prev_action_ref); |
|
3972
|
|
|
|
|
|
|
|
|
3973
|
|
|
|
|
|
|
Detect and process a preformatted section. |
|
3974
|
|
|
|
|
|
|
|
|
3975
|
|
|
|
|
|
|
=cut |
|
3976
|
|
|
|
|
|
|
sub preformat ($%) |
|
3977
|
|
|
|
|
|
|
{ |
|
3978
|
823
|
|
|
823
|
1
|
1209
|
my $self = shift; |
|
3979
|
823
|
|
|
|
|
4989
|
my %args = ( |
|
3980
|
|
|
|
|
|
|
mode_ref => undef, |
|
3981
|
|
|
|
|
|
|
line_ref => undef, |
|
3982
|
|
|
|
|
|
|
line_action_ref => undef, |
|
3983
|
|
|
|
|
|
|
prev_ref => undef, |
|
3984
|
|
|
|
|
|
|
next_ref => undef, |
|
3985
|
|
|
|
|
|
|
prev_action_ref => undef, |
|
3986
|
|
|
|
|
|
|
@_ |
|
3987
|
|
|
|
|
|
|
); |
|
3988
|
823
|
|
|
|
|
1196
|
my $mode_ref = $args{mode_ref}; |
|
3989
|
823
|
|
|
|
|
1147
|
my $line_ref = $args{line_ref}; |
|
3990
|
823
|
|
|
|
|
959
|
my $line_action_ref = $args{line_action_ref}; |
|
3991
|
823
|
|
|
|
|
882
|
my $prev_ref = $args{prev_ref}; |
|
3992
|
823
|
|
|
|
|
982
|
my $next_ref = $args{next_ref}; |
|
3993
|
823
|
|
|
|
|
949
|
my $prev_action_ref = $args{prev_action_ref}; |
|
3994
|
|
|
|
|
|
|
|
|
3995
|
823
|
|
|
|
|
1013
|
my $tag = ''; |
|
3996
|
823
|
100
|
|
|
|
1793
|
if ($self->{use_preformat_marker}) |
|
3997
|
|
|
|
|
|
|
{ |
|
3998
|
6
|
|
|
|
|
11
|
my $pstart = $self->{preformat_start_marker}; |
|
3999
|
6
|
100
|
|
|
|
8
|
if (${$line_ref} =~ /$pstart/io) |
|
|
6
|
|
|
|
|
81
|
|
|
4000
|
|
|
|
|
|
|
{ |
|
4001
|
1
|
50
|
|
|
|
3
|
if (${$prev_ref} =~ s/ $//) |
|
|
1
|
|
|
|
|
5
|
|
|
4002
|
|
|
|
|
|
|
{ |
|
4003
|
0
|
|
|
|
|
0
|
pop @{$self->{__tags}}; |
|
|
0
|
|
|
|
|
0
|
|
|
4004
|
|
|
|
|
|
|
} |
|
4005
|
|
|
|
|
|
|
$tag = |
|
4006
|
1
|
|
|
|
|
4
|
$self->get_tag('pre', inside_tag => " class='quote_explicit'"); |
|
4007
|
1
|
|
|
|
|
3
|
${$line_ref} = "${tag}\n"; |
|
|
1
|
|
|
|
|
3
|
|
|
4008
|
1
|
|
|
|
|
9
|
${$mode_ref} |= $PRE | $PRE_EXPLICIT; |
|
|
1
|
|
|
|
|
3
|
|
|
4009
|
1
|
|
|
|
|
2
|
${$line_action_ref} |= $PRE; |
|
|
1
|
|
|
|
|
2
|
|
|
4010
|
1
|
|
|
|
|
3
|
return; |
|
4011
|
|
|
|
|
|
|
} |
|
4012
|
|
|
|
|
|
|
} |
|
4013
|
|
|
|
|
|
|
|
|
4014
|
822
|
100
|
100
|
|
|
881
|
if ( |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
4015
|
|
|
|
|
|
|
!(${$line_action_ref} & $MAILQUOTE) |
|
4016
|
|
|
|
|
|
|
&& !(${$prev_action_ref} & $MAILQUOTE) |
|
4017
|
|
|
|
|
|
|
&& ( |
|
4018
|
|
|
|
|
|
|
$self->{preformat_trigger_lines} == 0 |
|
4019
|
|
|
|
|
|
|
|| ( |
|
4020
|
|
|
|
|
|
|
$self->is_preformatted(${$line_ref}) |
|
4021
|
|
|
|
|
|
|
&& ( |
|
4022
|
|
|
|
|
|
|
$self->{preformat_trigger_lines} == 1 |
|
4023
|
|
|
|
|
|
|
|| (defined $next_ref |
|
4024
|
|
|
|
|
|
|
&& $self->is_preformatted(${$next_ref})) |
|
4025
|
|
|
|
|
|
|
) |
|
4026
|
|
|
|
|
|
|
) |
|
4027
|
|
|
|
|
|
|
) |
|
4028
|
|
|
|
|
|
|
) |
|
4029
|
|
|
|
|
|
|
{ |
|
4030
|
20
|
50
|
|
|
|
33
|
if (${$prev_ref} =~ s/ $//) |
|
|
20
|
|
|
|
|
69
|
|
|
4031
|
|
|
|
|
|
|
{ |
|
4032
|
0
|
|
|
|
|
0
|
pop @{$self->{__tags}}; |
|
|
0
|
|
|
|
|
0
|
|
|
4033
|
|
|
|
|
|
|
} |
|
4034
|
20
|
|
|
|
|
256
|
$tag = $self->get_tag('pre'); |
|
4035
|
20
|
|
|
|
|
200
|
${$line_ref} =~ s/^/${tag}\n/; |
|
|
20
|
|
|
|
|
125
|
|
|
4036
|
20
|
|
|
|
|
34
|
${$mode_ref} |= $PRE; |
|
|
20
|
|
|
|
|
29
|
|
|
4037
|
20
|
|
|
|
|
36
|
${$line_action_ref} |= $PRE; |
|
|
20
|
|
|
|
|
60
|
|
|
4038
|
|
|
|
|
|
|
} |
|
4039
|
|
|
|
|
|
|
} # preformat |
|
4040
|
|
|
|
|
|
|
|
|
4041
|
|
|
|
|
|
|
=head2 make_new_anchor |
|
4042
|
|
|
|
|
|
|
|
|
4043
|
|
|
|
|
|
|
$anchor = $self->make_new_anchor($heading_level); |
|
4044
|
|
|
|
|
|
|
|
|
4045
|
|
|
|
|
|
|
Make a new anchor. |
|
4046
|
|
|
|
|
|
|
|
|
4047
|
|
|
|
|
|
|
=cut |
|
4048
|
|
|
|
|
|
|
sub make_new_anchor ($$) |
|
4049
|
|
|
|
|
|
|
{ |
|
4050
|
73
|
|
|
73
|
1
|
131
|
my $self = shift; |
|
4051
|
73
|
|
|
|
|
105
|
my $heading_level = shift; |
|
4052
|
|
|
|
|
|
|
|
|
4053
|
73
|
|
|
|
|
95
|
my ($anchor, $i); |
|
4054
|
|
|
|
|
|
|
|
|
4055
|
73
|
100
|
|
|
|
248
|
return sprintf("%d", $self->{__non_header_anchor}++) if (!$heading_level); |
|
4056
|
|
|
|
|
|
|
|
|
4057
|
62
|
|
|
|
|
114
|
$anchor = "section"; |
|
4058
|
62
|
|
|
|
|
154
|
$self->{__heading_count}->[$heading_level - 1]++; |
|
4059
|
|
|
|
|
|
|
|
|
4060
|
|
|
|
|
|
|
# Reset lower order counters |
|
4061
|
62
|
|
|
|
|
71
|
for ($i = @{$self->{__heading_count}}; $i > $heading_level; $i--) |
|
|
62
|
|
|
|
|
214
|
|
|
4062
|
|
|
|
|
|
|
{ |
|
4063
|
14
|
|
|
|
|
41
|
$self->{__heading_count}->[$i - 1] = 0; |
|
4064
|
|
|
|
|
|
|
} |
|
4065
|
|
|
|
|
|
|
|
|
4066
|
62
|
|
|
|
|
157
|
for ($i = 0; $i < $heading_level; $i++) |
|
4067
|
|
|
|
|
|
|
{ |
|
4068
|
151
|
100
|
|
|
|
352
|
$self->{__heading_count}->[$i] = 1 |
|
4069
|
|
|
|
|
|
|
if !$self->{__heading_count}->[$i]; # In case they skip any |
|
4070
|
151
|
|
|
|
|
685
|
$anchor .= sprintf("_%d", $self->{__heading_count}->[$i]); |
|
4071
|
|
|
|
|
|
|
} |
|
4072
|
62
|
|
|
|
|
81
|
chomp($anchor); |
|
4073
|
62
|
|
|
|
|
164
|
$anchor; |
|
4074
|
|
|
|
|
|
|
} # make_new_anchor |
|
4075
|
|
|
|
|
|
|
|
|
4076
|
|
|
|
|
|
|
=head2 anchor_mail |
|
4077
|
|
|
|
|
|
|
|
|
4078
|
|
|
|
|
|
|
$self->anchor_mail($line_ref); |
|
4079
|
|
|
|
|
|
|
|
|
4080
|
|
|
|
|
|
|
Make an anchor for a mail section. |
|
4081
|
|
|
|
|
|
|
|
|
4082
|
|
|
|
|
|
|
=cut |
|
4083
|
|
|
|
|
|
|
sub anchor_mail ($$) |
|
4084
|
|
|
|
|
|
|
{ |
|
4085
|
11
|
|
|
11
|
1
|
20
|
my $self = shift; |
|
4086
|
11
|
|
|
|
|
20
|
my $line_ref = shift; |
|
4087
|
|
|
|
|
|
|
|
|
4088
|
11
|
50
|
|
|
|
39
|
if ($self->{make_anchors}) |
|
4089
|
|
|
|
|
|
|
{ |
|
4090
|
11
|
|
|
|
|
39
|
my ($anchor) = $self->make_new_anchor(0); |
|
4091
|
11
|
100
|
|
|
|
32
|
if ($self->{lower_case_tags}) |
|
4092
|
|
|
|
|
|
|
{ |
|
4093
|
10
|
|
|
|
|
13
|
${$line_ref} =~ s/([^ ]*)/$1<\/a>/; |
|
|
10
|
|
|
|
|
192
|
|
|
4094
|
|
|
|
|
|
|
} |
|
4095
|
|
|
|
|
|
|
else |
|
4096
|
|
|
|
|
|
|
{ |
|
4097
|
1
|
|
|
|
|
2
|
${$line_ref} =~ s/([^ ]*)/$1<\/A>/; |
|
|
1
|
|
|
|
|
8
|
|
|
4098
|
|
|
|
|
|
|
} |
|
4099
|
|
|
|
|
|
|
} |
|
4100
|
|
|
|
|
|
|
} # anchor_mail |
|
4101
|
|
|
|
|
|
|
|
|
4102
|
|
|
|
|
|
|
=head2 anchor_heading |
|
4103
|
|
|
|
|
|
|
|
|
4104
|
|
|
|
|
|
|
$self->anchor_heading($heading_level, $line_ref); |
|
4105
|
|
|
|
|
|
|
|
|
4106
|
|
|
|
|
|
|
Make an anchor for a heading. |
|
4107
|
|
|
|
|
|
|
|
|
4108
|
|
|
|
|
|
|
=cut |
|
4109
|
|
|
|
|
|
|
sub anchor_heading ($$$) |
|
4110
|
|
|
|
|
|
|
{ |
|
4111
|
63
|
|
|
63
|
1
|
107
|
my $self = shift; |
|
4112
|
63
|
|
|
|
|
89
|
my $level = shift; |
|
4113
|
63
|
|
|
|
|
82
|
my $line_ref = shift; |
|
4114
|
|
|
|
|
|
|
|
|
4115
|
63
|
50
|
|
|
|
150
|
if ($DictDebug & 8) |
|
4116
|
|
|
|
|
|
|
{ |
|
4117
|
0
|
|
|
|
|
0
|
print STDERR "anchor_heading: ", ${$line_ref}, "\n"; |
|
|
0
|
|
|
|
|
0
|
|
|
4118
|
|
|
|
|
|
|
} |
|
4119
|
63
|
100
|
|
|
|
168
|
if ($self->{make_anchors}) |
|
4120
|
|
|
|
|
|
|
{ |
|
4121
|
62
|
|
|
|
|
198
|
my ($anchor) = $self->make_new_anchor($level); |
|
4122
|
62
|
100
|
|
|
|
157
|
if ($self->{lower_case_tags}) |
|
4123
|
|
|
|
|
|
|
{ |
|
4124
|
47
|
|
|
|
|
58
|
${$line_ref} =~ s/()(.*)(<\/h.>)/$1$2<\/a>$3/; |
|
|
47
|
|
|
|
|
520
|
|
|
4125
|
|
|
|
|
|
|
} |
|
4126
|
|
|
|
|
|
|
else |
|
4127
|
|
|
|
|
|
|
{ |
|
4128
|
15
|
|
|
|
|
17
|
${$line_ref} =~ s/()(.*)(<\/H.>)/$1$2<\/A>$3/; |
|
|
15
|
|
|
|
|
138
|
|
|
4129
|
|
|
|
|
|
|
} |
|
4130
|
|
|
|
|
|
|
} |
|
4131
|
63
|
50
|
|
|
|
263
|
if ($DictDebug & 8) |
|
4132
|
|
|
|
|
|
|
{ |
|
4133
|
0
|
|
|
|
|
0
|
print STDERR "anchor_heading(after): ", ${$line_ref}, "\n"; |
|
|
0
|
|
|
|
|
0
|
|
|
4134
|
|
|
|
|
|
|
} |
|
4135
|
|
|
|
|
|
|
} # anchor_heading |
|
4136
|
|
|
|
|
|
|
|
|
4137
|
|
|
|
|
|
|
=head2 heading_level |
|
4138
|
|
|
|
|
|
|
|
|
4139
|
|
|
|
|
|
|
$self->heading_level($style); |
|
4140
|
|
|
|
|
|
|
|
|
4141
|
|
|
|
|
|
|
Add a new heading style if this is a new heading style. |
|
4142
|
|
|
|
|
|
|
|
|
4143
|
|
|
|
|
|
|
=cut |
|
4144
|
|
|
|
|
|
|
sub heading_level ($$) |
|
4145
|
|
|
|
|
|
|
{ |
|
4146
|
63
|
|
|
63
|
1
|
108
|
my $self = shift; |
|
4147
|
|
|
|
|
|
|
|
|
4148
|
63
|
|
|
|
|
98
|
my ($style) = @_; |
|
4149
|
63
|
100
|
|
|
|
278
|
$self->{__heading_styles}->{$style} = ++$self->{__num_heading_styles} |
|
4150
|
|
|
|
|
|
|
if !$self->{__heading_styles}->{$style}; |
|
4151
|
63
|
|
|
|
|
179
|
$self->{__heading_styles}->{$style}; |
|
4152
|
|
|
|
|
|
|
} # heading_level |
|
4153
|
|
|
|
|
|
|
|
|
4154
|
|
|
|
|
|
|
=head2 is_ul_list_line |
|
4155
|
|
|
|
|
|
|
|
|
4156
|
|
|
|
|
|
|
if ($self->is_ul_list_line($line)) |
|
4157
|
|
|
|
|
|
|
{ |
|
4158
|
|
|
|
|
|
|
... |
|
4159
|
|
|
|
|
|
|
} |
|
4160
|
|
|
|
|
|
|
|
|
4161
|
|
|
|
|
|
|
Tests if this line starts a UL list item. |
|
4162
|
|
|
|
|
|
|
|
|
4163
|
|
|
|
|
|
|
=cut |
|
4164
|
|
|
|
|
|
|
sub is_ul_list_line ($%) |
|
4165
|
|
|
|
|
|
|
{ |
|
4166
|
267
|
|
|
267
|
1
|
373
|
my $self = shift; |
|
4167
|
267
|
|
|
|
|
943
|
my %args = ( |
|
4168
|
|
|
|
|
|
|
line => undef, |
|
4169
|
|
|
|
|
|
|
@_ |
|
4170
|
|
|
|
|
|
|
); |
|
4171
|
267
|
|
|
|
|
426
|
my $line = $args{line}; |
|
4172
|
|
|
|
|
|
|
|
|
4173
|
267
|
|
|
|
|
781
|
my ($prefix, $number, $rawprefix, $term) = $self->listprefix($line); |
|
4174
|
267
|
100
|
100
|
|
|
962
|
if ($prefix && !$number) |
|
4175
|
|
|
|
|
|
|
{ |
|
4176
|
32
|
|
|
|
|
197
|
return 1; |
|
4177
|
|
|
|
|
|
|
} |
|
4178
|
235
|
|
|
|
|
2681
|
return 0; |
|
4179
|
|
|
|
|
|
|
} |
|
4180
|
|
|
|
|
|
|
|
|
4181
|
|
|
|
|
|
|
=head2 is_heading |
|
4182
|
|
|
|
|
|
|
|
|
4183
|
|
|
|
|
|
|
if ($self->is_heading(line_ref=>$line_ref, next_ref=>$next_ref)) |
|
4184
|
|
|
|
|
|
|
{ |
|
4185
|
|
|
|
|
|
|
... |
|
4186
|
|
|
|
|
|
|
} |
|
4187
|
|
|
|
|
|
|
|
|
4188
|
|
|
|
|
|
|
Tests if this line is a heading. Needs to take account of the |
|
4189
|
|
|
|
|
|
|
next line, because a standard heading is defined by "underlining" |
|
4190
|
|
|
|
|
|
|
the text of the heading. |
|
4191
|
|
|
|
|
|
|
|
|
4192
|
|
|
|
|
|
|
=cut |
|
4193
|
|
|
|
|
|
|
sub is_heading ($%) |
|
4194
|
|
|
|
|
|
|
{ |
|
4195
|
269
|
|
|
269
|
1
|
375
|
my $self = shift; |
|
4196
|
269
|
|
|
|
|
1108
|
my %args = ( |
|
4197
|
|
|
|
|
|
|
line_ref => undef, |
|
4198
|
|
|
|
|
|
|
next_ref => undef, |
|
4199
|
|
|
|
|
|
|
@_ |
|
4200
|
|
|
|
|
|
|
); |
|
4201
|
269
|
|
|
|
|
444
|
my $line_ref = $args{line_ref}; |
|
4202
|
269
|
|
|
|
|
384
|
my $next_ref = $args{next_ref}; |
|
4203
|
|
|
|
|
|
|
|
|
4204
|
269
|
100
|
100
|
|
|
332
|
if ( ${$line_ref} !~ /^\s*$/ |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
4205
|
235
|
|
|
|
|
1232
|
&& !$self->is_ul_list_line(line => ${$line_ref}) |
|
4206
|
|
|
|
|
|
|
&& defined $next_ref |
|
4207
|
|
|
|
|
|
|
&& ${$next_ref} =~ /^\s*[-=*.~+]+\s*$/) |
|
4208
|
|
|
|
|
|
|
{ |
|
4209
|
54
|
|
|
|
|
80
|
my ($hoffset, $heading) = ${$line_ref} =~ /^(\s*)(.+)$/; |
|
|
54
|
|
|
|
|
308
|
|
|
4210
|
54
|
50
|
|
|
|
148
|
$hoffset = "" unless defined($hoffset); |
|
4211
|
54
|
50
|
|
|
|
129
|
$heading = "" unless defined($heading); |
|
4212
|
|
|
|
|
|
|
# Unescape chars so we get an accurate length |
|
4213
|
54
|
|
|
|
|
123
|
$heading =~ s/&[^;]+;/X/g; |
|
4214
|
54
|
|
|
|
|
76
|
my ($uoffset, $underline) = ${$next_ref} =~ /^(\s*)(\S+)\s*$/; |
|
|
54
|
|
|
|
|
288
|
|
|
4215
|
54
|
50
|
|
|
|
139
|
$uoffset = "" unless defined($uoffset); |
|
4216
|
54
|
50
|
|
|
|
133
|
$underline = "" unless defined($underline); |
|
4217
|
54
|
|
|
|
|
77
|
my ($lendiff, $offsetdiff); |
|
4218
|
54
|
|
|
|
|
99
|
$lendiff = length($heading) - length($underline); |
|
4219
|
54
|
100
|
|
|
|
133
|
$lendiff *= -1 if $lendiff < 0; |
|
4220
|
|
|
|
|
|
|
|
|
4221
|
54
|
|
|
|
|
87
|
$offsetdiff = length($hoffset) - length($uoffset); |
|
4222
|
54
|
50
|
|
|
|
129
|
$offsetdiff *= -1 if $offsetdiff < 0; |
|
4223
|
54
|
50
|
33
|
|
|
206
|
if ( ($lendiff <= $self->{underline_length_tolerance}) |
|
4224
|
|
|
|
|
|
|
|| ($offsetdiff <= $self->{underline_offset_tolerance})) |
|
4225
|
|
|
|
|
|
|
{ |
|
4226
|
54
|
|
|
|
|
188
|
return 1; |
|
4227
|
|
|
|
|
|
|
} |
|
4228
|
|
|
|
|
|
|
} |
|
4229
|
|
|
|
|
|
|
|
|
4230
|
215
|
|
|
|
|
605
|
return 0; |
|
4231
|
|
|
|
|
|
|
|
|
4232
|
|
|
|
|
|
|
} # is_heading |
|
4233
|
|
|
|
|
|
|
|
|
4234
|
|
|
|
|
|
|
=head2 heading |
|
4235
|
|
|
|
|
|
|
|
|
4236
|
|
|
|
|
|
|
$self->heading(line_ref=>$line_ref, |
|
4237
|
|
|
|
|
|
|
next_ref=>$next_ref); |
|
4238
|
|
|
|
|
|
|
|
|
4239
|
|
|
|
|
|
|
Make a heading. |
|
4240
|
|
|
|
|
|
|
Assumes is_heading is true. |
|
4241
|
|
|
|
|
|
|
|
|
4242
|
|
|
|
|
|
|
=cut |
|
4243
|
|
|
|
|
|
|
sub heading ($%) |
|
4244
|
|
|
|
|
|
|
{ |
|
4245
|
54
|
|
|
54
|
1
|
92
|
my $self = shift; |
|
4246
|
54
|
|
|
|
|
205
|
my %args = ( |
|
4247
|
|
|
|
|
|
|
line_ref => undef, |
|
4248
|
|
|
|
|
|
|
next_ref => undef, |
|
4249
|
|
|
|
|
|
|
@_ |
|
4250
|
|
|
|
|
|
|
); |
|
4251
|
54
|
|
|
|
|
101
|
my $line_ref = $args{line_ref}; |
|
4252
|
54
|
|
|
|
|
79
|
my $next_ref = $args{next_ref}; |
|
4253
|
|
|
|
|
|
|
|
|
4254
|
54
|
|
|
|
|
87
|
my ($hoffset, $heading) = ${$line_ref} =~ /^(\s*)(.+)$/; |
|
|
54
|
|
|
|
|
427
|
|
|
4255
|
54
|
50
|
|
|
|
143
|
$hoffset = "" unless defined($hoffset); |
|
4256
|
54
|
50
|
|
|
|
118
|
$heading = "" unless defined($heading); |
|
4257
|
54
|
|
|
|
|
175
|
$heading =~ s/&[^;]+;/X/g; # Unescape chars so we get an accurate length |
|
4258
|
54
|
|
|
|
|
63
|
my ($uoffset, $underline) = ${$next_ref} =~ /^(\s*)(\S+)\s*$/; |
|
|
54
|
|
|
|
|
293
|
|
|
4259
|
54
|
50
|
|
|
|
134
|
$uoffset = "" unless defined($uoffset); |
|
4260
|
54
|
50
|
|
|
|
131
|
$underline = "" unless defined($underline); |
|
4261
|
|
|
|
|
|
|
|
|
4262
|
54
|
|
|
|
|
122
|
$underline = substr($underline, 0, 1); |
|
4263
|
|
|
|
|
|
|
|
|
4264
|
|
|
|
|
|
|
# Call it a different style if the heading is in all caps. |
|
4265
|
54
|
100
|
|
|
|
74
|
$underline .= "C" if $self->iscaps(${$line_ref}); |
|
|
54
|
|
|
|
|
165
|
|
|
4266
|
54
|
|
|
|
|
88
|
${$next_ref} = " "; # Eat the underline |
|
|
54
|
|
|
|
|
101
|
|
|
4267
|
54
|
|
|
|
|
185
|
$self->{__heading_level} = $self->heading_level($underline); |
|
4268
|
54
|
50
|
|
|
|
147
|
if ($self->{escape_HTML_chars}) |
|
4269
|
|
|
|
|
|
|
{ |
|
4270
|
54
|
|
|
|
|
79
|
${$line_ref} = escape(${$line_ref}); |
|
|
54
|
|
|
|
|
89
|
|
|
|
54
|
|
|
|
|
147
|
|
|
4271
|
|
|
|
|
|
|
} |
|
4272
|
54
|
|
|
|
|
274
|
$self->tagline("H" . $self->{__heading_level}, $line_ref); |
|
4273
|
54
|
|
|
|
|
243
|
$self->anchor_heading($self->{__heading_level}, $line_ref); |
|
4274
|
|
|
|
|
|
|
} # heading |
|
4275
|
|
|
|
|
|
|
|
|
4276
|
|
|
|
|
|
|
=head2 is_custom_heading |
|
4277
|
|
|
|
|
|
|
|
|
4278
|
|
|
|
|
|
|
if ($self->is_custom_heading($line)) |
|
4279
|
|
|
|
|
|
|
{ |
|
4280
|
|
|
|
|
|
|
... |
|
4281
|
|
|
|
|
|
|
} |
|
4282
|
|
|
|
|
|
|
|
|
4283
|
|
|
|
|
|
|
Check if the given line matches a custom heading. |
|
4284
|
|
|
|
|
|
|
|
|
4285
|
|
|
|
|
|
|
=cut |
|
4286
|
|
|
|
|
|
|
sub is_custom_heading ($%) |
|
4287
|
|
|
|
|
|
|
{ |
|
4288
|
155
|
|
|
155
|
1
|
193
|
my $self = shift; |
|
4289
|
155
|
|
|
|
|
426
|
my %args = ( |
|
4290
|
|
|
|
|
|
|
line => undef, |
|
4291
|
|
|
|
|
|
|
@_ |
|
4292
|
|
|
|
|
|
|
); |
|
4293
|
155
|
|
|
|
|
215
|
my $line = $args{line}; |
|
4294
|
|
|
|
|
|
|
|
|
4295
|
155
|
|
|
|
|
171
|
foreach my $reg (@{$self->{custom_heading_regexp}}) |
|
|
155
|
|
|
|
|
323
|
|
|
4296
|
|
|
|
|
|
|
{ |
|
4297
|
161
|
100
|
|
|
|
1064
|
return 1 if ($line =~ /$reg/); |
|
4298
|
|
|
|
|
|
|
} |
|
4299
|
146
|
|
|
|
|
382
|
return 0; |
|
4300
|
|
|
|
|
|
|
} # is_custom_heading |
|
4301
|
|
|
|
|
|
|
|
|
4302
|
|
|
|
|
|
|
=head2 custom_heading |
|
4303
|
|
|
|
|
|
|
|
|
4304
|
|
|
|
|
|
|
$self->custom_heading(line_ref=>$line_ref); |
|
4305
|
|
|
|
|
|
|
|
|
4306
|
|
|
|
|
|
|
Make a custom heading. Assumes is_custom_heading is true. |
|
4307
|
|
|
|
|
|
|
|
|
4308
|
|
|
|
|
|
|
=cut |
|
4309
|
|
|
|
|
|
|
sub custom_heading ($%) |
|
4310
|
|
|
|
|
|
|
{ |
|
4311
|
9
|
|
|
9
|
1
|
88
|
my $self = shift; |
|
4312
|
9
|
|
|
|
|
28
|
my %args = ( |
|
4313
|
|
|
|
|
|
|
line_ref => undef, |
|
4314
|
|
|
|
|
|
|
@_ |
|
4315
|
|
|
|
|
|
|
); |
|
4316
|
9
|
|
|
|
|
15
|
my $line_ref = $args{line_ref}; |
|
4317
|
|
|
|
|
|
|
|
|
4318
|
9
|
|
|
|
|
12
|
my $level; |
|
4319
|
9
|
|
|
|
|
11
|
my $i = 0; |
|
4320
|
9
|
|
|
|
|
13
|
foreach my $reg (@{$self->{custom_heading_regexp}}) |
|
|
9
|
|
|
|
|
22
|
|
|
4321
|
|
|
|
|
|
|
{ |
|
4322
|
11
|
100
|
|
|
|
12
|
if (${$line_ref} =~ /$reg/) |
|
|
11
|
|
|
|
|
232
|
|
|
4323
|
|
|
|
|
|
|
{ |
|
4324
|
9
|
50
|
|
|
|
26
|
if ($self->{explicit_headings}) |
|
4325
|
|
|
|
|
|
|
{ |
|
4326
|
0
|
|
|
|
|
0
|
$level = $i + 1; |
|
4327
|
|
|
|
|
|
|
} |
|
4328
|
|
|
|
|
|
|
else |
|
4329
|
|
|
|
|
|
|
{ |
|
4330
|
9
|
|
|
|
|
42
|
$level = $self->heading_level("Cust" . $i); |
|
4331
|
|
|
|
|
|
|
} |
|
4332
|
9
|
50
|
|
|
|
32
|
if ($self->{escape_HTML_chars}) |
|
4333
|
|
|
|
|
|
|
{ |
|
4334
|
9
|
|
|
|
|
12
|
${$line_ref} = escape(${$line_ref}); |
|
|
9
|
|
|
|
|
17
|
|
|
|
9
|
|
|
|
|
25
|
|
|
4335
|
|
|
|
|
|
|
} |
|
4336
|
9
|
|
|
|
|
40
|
$self->tagline("H" . $level, $line_ref); |
|
4337
|
9
|
|
|
|
|
32
|
$self->anchor_heading($level, $line_ref); |
|
4338
|
9
|
|
|
|
|
30
|
last; |
|
4339
|
|
|
|
|
|
|
} |
|
4340
|
2
|
|
|
|
|
5
|
$i++; |
|
4341
|
|
|
|
|
|
|
} |
|
4342
|
|
|
|
|
|
|
} # custom_heading |
|
4343
|
|
|
|
|
|
|
|
|
4344
|
|
|
|
|
|
|
=head2 unhyphenate_para |
|
4345
|
|
|
|
|
|
|
|
|
4346
|
|
|
|
|
|
|
$self->unhyphenate_para($para_ref); |
|
4347
|
|
|
|
|
|
|
|
|
4348
|
|
|
|
|
|
|
Join up hyphenated words that are split across lines. |
|
4349
|
|
|
|
|
|
|
|
|
4350
|
|
|
|
|
|
|
=cut |
|
4351
|
|
|
|
|
|
|
sub unhyphenate_para ($$) |
|
4352
|
|
|
|
|
|
|
{ |
|
4353
|
8
|
|
|
8
|
1
|
15
|
my $self = shift; |
|
4354
|
8
|
|
|
|
|
15
|
my $para_ref = shift; |
|
4355
|
|
|
|
|
|
|
|
|
4356
|
|
|
|
|
|
|
# Treating this whole paragraph as one string, look for |
|
4357
|
|
|
|
|
|
|
# 1 - whitespace |
|
4358
|
|
|
|
|
|
|
# 2 - a word (ending in a hyphen, followed by a newline) |
|
4359
|
|
|
|
|
|
|
# 3 - whitespace (starting on the next line) |
|
4360
|
|
|
|
|
|
|
# 4 - a word with its punctuation |
|
4361
|
|
|
|
|
|
|
# Substitute this with |
|
4362
|
|
|
|
|
|
|
# 1-whitespace 2-word 4-word newline 3-whitespace |
|
4363
|
|
|
|
|
|
|
# We preserve the 3-whitespace because we don't want to mess up |
|
4364
|
|
|
|
|
|
|
# our existing indentation. |
|
4365
|
8
|
|
|
|
|
12
|
${$para_ref} =~ |
|
|
8
|
|
|
|
|
407
|
|
|
4366
|
|
|
|
|
|
|
/(\s*)([^\W\d_]*)\-\n(\s*)([^\W\d_]+[\)\}\]\.,:;\'\"\>]*\s*)/s; |
|
4367
|
8
|
|
|
|
|
16
|
${$para_ref} =~ |
|
|
8
|
|
|
|
|
534
|
|
|
4368
|
|
|
|
|
|
|
s/(\s*)([^\W\d_]*)\-\n(\s*)([^\W\d_]+[\)\}\]\.,:;\'\"\>]*\s*)/$1$2$4\n$3/gs; |
|
4369
|
|
|
|
|
|
|
} # unhyphenate_para |
|
4370
|
|
|
|
|
|
|
|
|
4371
|
|
|
|
|
|
|
=head2 tagline |
|
4372
|
|
|
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
$self->tagline($tag, $line_ref); |
|
4374
|
|
|
|
|
|
|
|
|
4375
|
|
|
|
|
|
|
Put the given tag around the given line. |
|
4376
|
|
|
|
|
|
|
|
|
4377
|
|
|
|
|
|
|
=cut |
|
4378
|
|
|
|
|
|
|
sub tagline ($$$) |
|
4379
|
|
|
|
|
|
|
{ |
|
4380
|
66
|
|
|
66
|
1
|
101
|
my $self = shift; |
|
4381
|
66
|
|
|
|
|
160
|
my $tag = shift; |
|
4382
|
66
|
|
|
|
|
84
|
my $line_ref = shift; |
|
4383
|
|
|
|
|
|
|
|
|
4384
|
66
|
|
|
|
|
82
|
chomp ${$line_ref}; # Drop newline |
|
|
66
|
|
|
|
|
158
|
|
|
4385
|
66
|
|
|
|
|
193
|
my $tag1 = $self->get_tag($tag); |
|
4386
|
66
|
|
|
|
|
179
|
my $tag2 = $self->close_tag($tag); |
|
4387
|
66
|
|
|
|
|
107
|
${$line_ref} =~ s/^\s*(.*)$/${tag1}$1${tag2}\n/; |
|
|
66
|
|
|
|
|
649
|
|
|
4388
|
|
|
|
|
|
|
} # tagline |
|
4389
|
|
|
|
|
|
|
|
|
4390
|
|
|
|
|
|
|
=head2 iscaps |
|
4391
|
|
|
|
|
|
|
|
|
4392
|
|
|
|
|
|
|
if ($self->iscaps($line)) |
|
4393
|
|
|
|
|
|
|
{ |
|
4394
|
|
|
|
|
|
|
... |
|
4395
|
|
|
|
|
|
|
} |
|
4396
|
|
|
|
|
|
|
|
|
4397
|
|
|
|
|
|
|
Check if a line is all capitals. |
|
4398
|
|
|
|
|
|
|
|
|
4399
|
|
|
|
|
|
|
=cut |
|
4400
|
|
|
|
|
|
|
sub iscaps |
|
4401
|
|
|
|
|
|
|
{ |
|
4402
|
1117
|
|
|
1117
|
1
|
1239
|
my $self = shift; |
|
4403
|
1117
|
|
|
|
|
2467
|
local ($_) = @_; |
|
4404
|
|
|
|
|
|
|
|
|
4405
|
1117
|
|
|
|
|
1944
|
my $min_caps_len = $self->{min_caps_length}; |
|
4406
|
|
|
|
|
|
|
|
|
4407
|
1117
|
|
|
|
|
11693
|
/^[^[:lower:]<]*[[:upper:]]{$min_caps_len,}[^[:lower:]<]*$/; |
|
4408
|
|
|
|
|
|
|
} # iscaps |
|
4409
|
|
|
|
|
|
|
|
|
4410
|
|
|
|
|
|
|
=head2 caps |
|
4411
|
|
|
|
|
|
|
|
|
4412
|
|
|
|
|
|
|
$self->caps(line_ref=>$line_ref, |
|
4413
|
|
|
|
|
|
|
line_action_ref=>$line_action_ref); |
|
4414
|
|
|
|
|
|
|
|
|
4415
|
|
|
|
|
|
|
Detect and deal with an all-caps line. |
|
4416
|
|
|
|
|
|
|
|
|
4417
|
|
|
|
|
|
|
=cut |
|
4418
|
|
|
|
|
|
|
sub caps |
|
4419
|
|
|
|
|
|
|
{ |
|
4420
|
1065
|
|
|
1065
|
1
|
1270
|
my $self = shift; |
|
4421
|
1065
|
|
|
|
|
3348
|
my %args = ( |
|
4422
|
|
|
|
|
|
|
line_ref => undef, |
|
4423
|
|
|
|
|
|
|
line_action_ref => undef, |
|
4424
|
|
|
|
|
|
|
@_ |
|
4425
|
|
|
|
|
|
|
); |
|
4426
|
1065
|
|
|
|
|
1370
|
my $line_ref = $args{line_ref}; |
|
4427
|
1065
|
|
|
|
|
1264
|
my $line_action_ref = $args{line_action_ref}; |
|
4428
|
|
|
|
|
|
|
|
|
4429
|
1065
|
100
|
100
|
|
|
2568
|
if ( $self->{caps_tag} |
|
|
1063
|
|
|
|
|
2354
|
|
|
4430
|
|
|
|
|
|
|
&& $self->iscaps(${$line_ref})) |
|
4431
|
|
|
|
|
|
|
{ |
|
4432
|
3
|
|
|
|
|
17
|
$self->tagline($self->{caps_tag}, $line_ref); |
|
4433
|
3
|
|
|
|
|
6
|
${$line_action_ref} |= $CAPS; |
|
|
3
|
|
|
|
|
10
|
|
|
4434
|
|
|
|
|
|
|
} |
|
4435
|
|
|
|
|
|
|
} # caps |
|
4436
|
|
|
|
|
|
|
|
|
4437
|
|
|
|
|
|
|
=head2 do_delim |
|
4438
|
|
|
|
|
|
|
|
|
4439
|
|
|
|
|
|
|
$self->do_delim(line_ref=>$line_ref, |
|
4440
|
|
|
|
|
|
|
line_action_ref=>$line_action_ref, |
|
4441
|
|
|
|
|
|
|
delim=>'*', |
|
4442
|
|
|
|
|
|
|
tag=>'STRONG'); |
|
4443
|
|
|
|
|
|
|
|
|
4444
|
|
|
|
|
|
|
Deal with a line which has words delimited by the given delimiter; |
|
4445
|
|
|
|
|
|
|
this is used to deal with italics, bold and underline formatting. |
|
4446
|
|
|
|
|
|
|
|
|
4447
|
|
|
|
|
|
|
=cut |
|
4448
|
|
|
|
|
|
|
sub do_delim |
|
4449
|
|
|
|
|
|
|
{ |
|
4450
|
1318
|
|
|
1318
|
1
|
1811
|
my $self = shift; |
|
4451
|
1318
|
|
|
|
|
6361
|
my %args = ( |
|
4452
|
|
|
|
|
|
|
line_ref => undef, |
|
4453
|
|
|
|
|
|
|
line_action_ref => undef, |
|
4454
|
|
|
|
|
|
|
delim => '*', |
|
4455
|
|
|
|
|
|
|
tag => 'STRONG', |
|
4456
|
|
|
|
|
|
|
@_ |
|
4457
|
|
|
|
|
|
|
); |
|
4458
|
1318
|
|
|
|
|
3039
|
my $line_ref = $args{line_ref}; |
|
4459
|
1318
|
|
|
|
|
1550
|
my $line_action_ref = $args{line_action_ref}; |
|
4460
|
1318
|
|
|
|
|
1594
|
my $delim = $args{delim}; |
|
4461
|
1318
|
|
|
|
|
1629
|
my $tag = $args{tag}; |
|
4462
|
|
|
|
|
|
|
|
|
4463
|
1318
|
100
|
|
|
|
4238
|
if ($delim eq '#') |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
4464
|
|
|
|
|
|
|
{ |
|
4465
|
438
|
50
|
|
|
|
487
|
if (${$line_ref} =~ m/\B#([[:alpha:]])#\B/s) |
|
|
438
|
|
|
|
|
1470
|
|
|
4466
|
|
|
|
|
|
|
{ |
|
4467
|
0
|
|
|
|
|
0
|
${$line_ref} =~ s/\B#([[:alpha:]])#\B/<${tag}>$1<\/${tag}>/gs; |
|
|
0
|
|
|
|
|
0
|
|
|
4468
|
|
|
|
|
|
|
} |
|
4469
|
|
|
|
|
|
|
# special treatment of # for the #num case and the #link case |
|
4470
|
438
|
100
|
|
|
|
536
|
if (${$line_ref} !~ m/<[aA]/) |
|
|
438
|
|
|
|
|
2407
|
|
|
4471
|
|
|
|
|
|
|
{ |
|
4472
|
342
|
|
|
|
|
498
|
${$line_ref} =~ |
|
|
342
|
|
|
|
|
1635
|
|
|
4473
|
|
|
|
|
|
|
s/#([^\d#](?![^#]*(?:|| | ))[^#]*[^# \t\n])#/<${tag}>$1<\/${tag}>/gs; |
|
4474
|
|
|
|
|
|
|
} |
|
4475
|
|
|
|
|
|
|
else |
|
4476
|
|
|
|
|
|
|
{ |
|
4477
|
96
|
|
|
|
|
151
|
my $line_with_links = ''; |
|
4478
|
96
|
|
|
|
|
139
|
my $linkme = ''; |
|
4479
|
96
|
|
|
|
|
102
|
my $unmatched = ${$line_ref}; |
|
|
96
|
|
|
|
|
151
|
|
|
4480
|
96
|
|
|
|
|
314
|
while ($unmatched =~ |
|
4481
|
|
|
|
|
|
|
m/#([^\d#](?![^#]*(?:|| | ))[^#]*[^# \t\n])#/s) |
|
4482
|
|
|
|
|
|
|
{ |
|
4483
|
2
|
|
|
|
|
9
|
$line_with_links .= $`; |
|
4484
|
2
|
|
|
|
|
7
|
$linkme = $&; |
|
4485
|
2
|
|
|
|
|
7
|
$unmatched = $'; |
|
4486
|
2
|
100
|
|
|
|
8
|
if (!$self->in_link_context($linkme, $line_with_links)) |
|
4487
|
|
|
|
|
|
|
{ |
|
4488
|
1
|
|
|
|
|
11
|
$linkme =~ |
|
4489
|
|
|
|
|
|
|
s/#([^\d#](?![^#]*(?:|| | ))[^#]*[^# \t\n])#/<${tag}>$1<\/${tag}>/gs; |
|
4490
|
|
|
|
|
|
|
} |
|
4491
|
2
|
|
|
|
|
10
|
$line_with_links .= $linkme; |
|
4492
|
|
|
|
|
|
|
} |
|
4493
|
96
|
|
|
|
|
252
|
${$line_ref} = $line_with_links . $unmatched; |
|
|
96
|
|
|
|
|
368
|
|
|
4494
|
|
|
|
|
|
|
} |
|
4495
|
|
|
|
|
|
|
} |
|
4496
|
|
|
|
|
|
|
elsif ($delim eq '^') |
|
4497
|
|
|
|
|
|
|
{ |
|
4498
|
1
|
|
|
|
|
2
|
${$line_ref} =~ |
|
|
1
|
|
|
|
|
16
|
|
|
4499
|
|
|
|
|
|
|
s/\^((?![^^]*(?:|| | ))(\w|["'<>])[^^]*)\^/<${tag}>$1<\/${tag}>/gs; |
|
4500
|
1
|
|
|
|
|
3
|
${$line_ref} =~ s/\B\^([[:alpha:]])\^\B/<${tag}>$1<\/${tag}>/gs; |
|
|
1
|
|
|
|
|
5
|
|
|
4501
|
|
|
|
|
|
|
} |
|
4502
|
|
|
|
|
|
|
elsif ($delim eq '_') |
|
4503
|
|
|
|
|
|
|
{ |
|
4504
|
440
|
50
|
|
|
|
529
|
if (${$line_ref} =~ m/\B_([[:alpha:]])_\B/s) |
|
|
440
|
|
|
|
|
1530
|
|
|
4505
|
|
|
|
|
|
|
{ |
|
4506
|
0
|
|
|
|
|
0
|
${$line_ref} =~ s/\B_([[:alpha:]])_\B/<${tag}>$1<\/${tag}>/gs; |
|
|
0
|
|
|
|
|
0
|
|
|
4507
|
0
|
|
|
|
|
0
|
${$line_ref} =~ |
|
|
0
|
|
|
|
|
0
|
|
|
4508
|
|
|
|
|
|
|
s#(?])_#<${tag}>$1${tag}>#gs; |
|
4509
|
|
|
|
|
|
|
} |
|
4510
|
|
|
|
|
|
|
else |
|
4511
|
|
|
|
|
|
|
{ |
|
4512
|
|
|
|
|
|
|
# make sure we don't wallop links that have underscores |
|
4513
|
|
|
|
|
|
|
# need to make sure that _ delimiters are not mistaken for |
|
4514
|
|
|
|
|
|
|
# a_variable_name |
|
4515
|
440
|
|
|
|
|
627
|
my $line_with_links = ''; |
|
4516
|
440
|
|
|
|
|
571
|
my $linkme = ''; |
|
4517
|
440
|
|
|
|
|
441
|
my $unmatched = ${$line_ref}; |
|
|
440
|
|
|
|
|
733
|
|
|
4518
|
440
|
|
|
|
|
1551
|
while ($unmatched =~ |
|
4519
|
|
|
|
|
|
|
m#(?])_#s) |
|
4520
|
|
|
|
|
|
|
{ |
|
4521
|
15
|
|
|
|
|
40
|
$line_with_links .= $`; |
|
4522
|
15
|
|
|
|
|
29
|
$linkme = $&; |
|
4523
|
15
|
|
|
|
|
42
|
$unmatched = $'; |
|
4524
|
15
|
100
|
|
|
|
43
|
if (!$self->in_link_context($linkme, $line_with_links)) |
|
4525
|
|
|
|
|
|
|
{ |
|
4526
|
11
|
|
|
|
|
90
|
$linkme =~ |
|
4527
|
|
|
|
|
|
|
s#(?])_#<${tag}>$1${tag}>#gs; |
|
4528
|
|
|
|
|
|
|
} |
|
4529
|
15
|
|
|
|
|
89
|
$line_with_links .= $linkme; |
|
4530
|
|
|
|
|
|
|
} |
|
4531
|
440
|
|
|
|
|
750
|
${$line_ref} = $line_with_links . $unmatched; |
|
|
440
|
|
|
|
|
1951
|
|
|
4532
|
|
|
|
|
|
|
} |
|
4533
|
|
|
|
|
|
|
} |
|
4534
|
|
|
|
|
|
|
elsif (length($delim) eq 1) # one-character, general |
|
4535
|
|
|
|
|
|
|
{ |
|
4536
|
438
|
50
|
|
|
|
541
|
if (${$line_ref} =~ m/\B[${delim}]([[:alpha:]])[${delim}]\B/s) |
|
|
438
|
|
|
|
|
2813
|
|
|
4537
|
|
|
|
|
|
|
{ |
|
4538
|
0
|
|
|
|
|
0
|
${$line_ref} =~ s/\B[${delim}]([[:alpha:]])[${delim}]\B/<${tag}>$1<\/${tag}>/gs; |
|
|
0
|
|
|
|
|
0
|
|
|
4539
|
|
|
|
|
|
|
} |
|
4540
|
438
|
|
|
|
|
546
|
${$line_ref} =~ |
|
|
438
|
|
|
|
|
3751
|
|
|
4541
|
|
|
|
|
|
|
s#(?])[${delim}]#<${tag}>$1${tag}>#gs; |
|
4542
|
|
|
|
|
|
|
} |
|
4543
|
|
|
|
|
|
|
else |
|
4544
|
|
|
|
|
|
|
{ |
|
4545
|
1
|
|
|
|
|
3
|
${$line_ref} =~ |
|
|
1
|
|
|
|
|
88
|
|
|
4546
|
|
|
|
|
|
|
s/(?$1<\/${tag}>/gs; |
|
4547
|
1
|
|
|
|
|
2
|
${$line_ref} =~ s/${delim}]([[:alpha:]])${delim}/<${tag}>$1<\/${tag}>/gs; |
|
|
1
|
|
|
|
|
23
|
|
|
4548
|
|
|
|
|
|
|
} |
|
4549
|
|
|
|
|
|
|
} # do_delim |
|
4550
|
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
=head2 glob2regexp |
|
4552
|
|
|
|
|
|
|
|
|
4553
|
|
|
|
|
|
|
$regexp = glob2regexp($glob); |
|
4554
|
|
|
|
|
|
|
|
|
4555
|
|
|
|
|
|
|
Convert very simple globs to regexps |
|
4556
|
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
=cut |
|
4558
|
|
|
|
|
|
|
sub glob2regexp |
|
4559
|
|
|
|
|
|
|
{ |
|
4560
|
0
|
|
|
0
|
1
|
0
|
my ($glob) = @_; |
|
4561
|
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
# Escape funky chars |
|
4563
|
0
|
|
|
|
|
0
|
$glob =~ s/[^\w\[\]\*\?\|\\]/\\$&/g; |
|
4564
|
0
|
|
|
|
|
0
|
my ($regexp, $i, $len, $escaped) = ("", 0, length($glob), 0); |
|
4565
|
|
|
|
|
|
|
|
|
4566
|
0
|
|
|
|
|
0
|
for (; $i < $len; $i++) |
|
4567
|
|
|
|
|
|
|
{ |
|
4568
|
0
|
|
|
|
|
0
|
my $char = substr($glob, $i, 1); |
|
4569
|
0
|
0
|
|
|
|
0
|
if ($escaped) |
|
4570
|
|
|
|
|
|
|
{ |
|
4571
|
0
|
|
|
|
|
0
|
$escaped = 0; |
|
4572
|
0
|
|
|
|
|
0
|
$regexp .= $char; |
|
4573
|
0
|
|
|
|
|
0
|
next; |
|
4574
|
|
|
|
|
|
|
} |
|
4575
|
0
|
0
|
|
|
|
0
|
if ($char eq "\\") |
|
4576
|
|
|
|
|
|
|
{ |
|
4577
|
0
|
|
|
|
|
0
|
$escaped = 1; |
|
4578
|
0
|
|
|
|
|
0
|
next; |
|
4579
|
0
|
|
|
|
|
0
|
$regexp .= $char; |
|
4580
|
|
|
|
|
|
|
} |
|
4581
|
0
|
0
|
|
|
|
0
|
if ($char eq "?") |
|
4582
|
|
|
|
|
|
|
{ |
|
4583
|
0
|
|
|
|
|
0
|
$regexp .= "."; |
|
4584
|
0
|
|
|
|
|
0
|
next; |
|
4585
|
|
|
|
|
|
|
} |
|
4586
|
0
|
0
|
|
|
|
0
|
if ($char eq "*") |
|
4587
|
|
|
|
|
|
|
{ |
|
4588
|
0
|
|
|
|
|
0
|
$regexp .= ".*"; |
|
4589
|
0
|
|
|
|
|
0
|
next; |
|
4590
|
|
|
|
|
|
|
} |
|
4591
|
0
|
|
|
|
|
0
|
$regexp .= $char; # Normal character |
|
4592
|
|
|
|
|
|
|
} |
|
4593
|
0
|
|
|
|
|
0
|
join('', "\\b", $regexp, "\\b"); |
|
4594
|
|
|
|
|
|
|
} # glob2regexp |
|
4595
|
|
|
|
|
|
|
|
|
4596
|
|
|
|
|
|
|
=head2 add_regexp_to_links_table |
|
4597
|
|
|
|
|
|
|
|
|
4598
|
|
|
|
|
|
|
$self->add_regexp_to_links_table(label=>$label, |
|
4599
|
|
|
|
|
|
|
pattern=>$pattern, |
|
4600
|
|
|
|
|
|
|
url=>$url, |
|
4601
|
|
|
|
|
|
|
switches=>$switches); |
|
4602
|
|
|
|
|
|
|
|
|
4603
|
|
|
|
|
|
|
Add the given regexp "link definition" to the links table. |
|
4604
|
|
|
|
|
|
|
|
|
4605
|
|
|
|
|
|
|
=cut |
|
4606
|
|
|
|
|
|
|
sub add_regexp_to_links_table ($%) |
|
4607
|
|
|
|
|
|
|
{ |
|
4608
|
1352
|
|
|
1352
|
1
|
1606
|
my $self = shift; |
|
4609
|
1352
|
|
|
|
|
6856
|
my %args = ( |
|
4610
|
|
|
|
|
|
|
label => undef, |
|
4611
|
|
|
|
|
|
|
pattern => undef, |
|
4612
|
|
|
|
|
|
|
url => undef, |
|
4613
|
|
|
|
|
|
|
switches => undef, |
|
4614
|
|
|
|
|
|
|
@_ |
|
4615
|
|
|
|
|
|
|
); |
|
4616
|
1352
|
|
|
|
|
5324
|
my $label = $args{label}; |
|
4617
|
1352
|
|
|
|
|
1610
|
my $pattern = $args{pattern}; |
|
4618
|
1352
|
|
|
|
|
1571
|
my $URL = $args{url}; |
|
4619
|
1352
|
|
|
|
|
1485
|
my $switches = $args{switches}; |
|
4620
|
|
|
|
|
|
|
|
|
4621
|
|
|
|
|
|
|
# No sense adding a second one if it's already in there. |
|
4622
|
|
|
|
|
|
|
# It would never get used. |
|
4623
|
1352
|
50
|
|
|
|
7121
|
if (!$self->{__links_table}->{$label}) |
|
4624
|
|
|
|
|
|
|
{ |
|
4625
|
|
|
|
|
|
|
|
|
4626
|
|
|
|
|
|
|
# Keep track of the order they were added so we can |
|
4627
|
|
|
|
|
|
|
# look for matches in the same order |
|
4628
|
1352
|
|
|
|
|
1286
|
push(@{$self->{__links_table_order}}, ($label)); |
|
|
1352
|
|
|
|
|
4895
|
|
|
4629
|
|
|
|
|
|
|
|
|
4630
|
1352
|
|
|
|
|
3757
|
$self->{__links_table_patterns}->{$label} = $pattern; |
|
4631
|
1352
|
|
|
|
|
2700
|
$self->{__links_table}->{$label} = $URL; # Put it in The Table |
|
4632
|
1352
|
|
|
|
|
2458
|
$self->{__links_switch_table}->{$label} = $switches; |
|
4633
|
1352
|
|
|
|
|
1577
|
my $ind = @{$self->{__links_table_order}} - 1; |
|
|
1352
|
|
|
|
|
2283
|
|
|
4634
|
1352
|
50
|
|
|
|
13506
|
print STDERR " (", $ind, |
|
4635
|
|
|
|
|
|
|
")\tLABEL: $label \tPATTERN: $pattern\n\tVALUE: $URL\n\tSWITCHES: $switches\n\n" |
|
4636
|
|
|
|
|
|
|
if ($DictDebug & 1); |
|
4637
|
|
|
|
|
|
|
} |
|
4638
|
|
|
|
|
|
|
else |
|
4639
|
|
|
|
|
|
|
{ |
|
4640
|
0
|
0
|
|
|
|
0
|
if ($DictDebug & 1) |
|
4641
|
|
|
|
|
|
|
{ |
|
4642
|
0
|
|
|
|
|
0
|
print STDERR " Skipping entry. Key already in table.\n"; |
|
4643
|
0
|
|
|
|
|
0
|
print STDERR "\tLABEL: $label \tPATTERN: $pattern\n\tVALUE: $URL\n\n"; |
|
4644
|
|
|
|
|
|
|
} |
|
4645
|
|
|
|
|
|
|
} |
|
4646
|
|
|
|
|
|
|
} # add_regexp_to_links_table |
|
4647
|
|
|
|
|
|
|
|
|
4648
|
|
|
|
|
|
|
=head2 add_literal_to_links_table |
|
4649
|
|
|
|
|
|
|
|
|
4650
|
|
|
|
|
|
|
$self->add_literal_to_links_table(label=>$label, |
|
4651
|
|
|
|
|
|
|
pattern=>$pattern, |
|
4652
|
|
|
|
|
|
|
url=>$url, |
|
4653
|
|
|
|
|
|
|
switches=>$switches); |
|
4654
|
|
|
|
|
|
|
|
|
4655
|
|
|
|
|
|
|
Add the given literal "link definition" to the links table. |
|
4656
|
|
|
|
|
|
|
|
|
4657
|
|
|
|
|
|
|
=cut |
|
4658
|
|
|
|
|
|
|
sub add_literal_to_links_table ($%) |
|
4659
|
|
|
|
|
|
|
{ |
|
4660
|
156
|
|
|
156
|
1
|
191
|
my $self = shift; |
|
4661
|
156
|
|
|
|
|
676
|
my %args = ( |
|
4662
|
|
|
|
|
|
|
label => undef, |
|
4663
|
|
|
|
|
|
|
pattern => undef, |
|
4664
|
|
|
|
|
|
|
url => undef, |
|
4665
|
|
|
|
|
|
|
switches => undef, |
|
4666
|
|
|
|
|
|
|
@_ |
|
4667
|
|
|
|
|
|
|
); |
|
4668
|
156
|
|
|
|
|
224
|
my $label = $args{label}; |
|
4669
|
156
|
|
|
|
|
197
|
my $pattern = $args{pattern}; |
|
4670
|
156
|
|
|
|
|
185
|
my $URL = $args{url}; |
|
4671
|
156
|
|
|
|
|
192
|
my $switches = $args{switches}; |
|
4672
|
|
|
|
|
|
|
|
|
4673
|
156
|
|
|
|
|
790
|
$pattern =~ s/(\W)/\\$1/g; # Escape non-alphanumeric chars |
|
4674
|
156
|
|
|
|
|
320
|
$pattern = "\\b$pattern\\b"; # Make a regexp out of it |
|
4675
|
156
|
|
|
|
|
335
|
$self->add_regexp_to_links_table(label=>$label, pattern=>$pattern, url=>$URL, switches=>$switches); |
|
4676
|
|
|
|
|
|
|
} # add_literal_to_links_table |
|
4677
|
|
|
|
|
|
|
|
|
4678
|
|
|
|
|
|
|
=head2 add_glob_to_links_table |
|
4679
|
|
|
|
|
|
|
|
|
4680
|
|
|
|
|
|
|
$self->add_glob_to_links_table(label=>$label, |
|
4681
|
|
|
|
|
|
|
pattern=>$pattern, |
|
4682
|
|
|
|
|
|
|
url=>$url, |
|
4683
|
|
|
|
|
|
|
switches=>$switches); |
|
4684
|
|
|
|
|
|
|
|
|
4685
|
|
|
|
|
|
|
Add the given glob "link definition" to the links table. |
|
4686
|
|
|
|
|
|
|
|
|
4687
|
|
|
|
|
|
|
=cut |
|
4688
|
|
|
|
|
|
|
sub add_glob_to_links_table ($%) |
|
4689
|
|
|
|
|
|
|
{ |
|
4690
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
4691
|
0
|
|
|
|
|
0
|
my %args = ( |
|
4692
|
|
|
|
|
|
|
label => undef, |
|
4693
|
|
|
|
|
|
|
pattern => undef, |
|
4694
|
|
|
|
|
|
|
url => undef, |
|
4695
|
|
|
|
|
|
|
switches => undef, |
|
4696
|
|
|
|
|
|
|
@_ |
|
4697
|
|
|
|
|
|
|
); |
|
4698
|
0
|
|
|
|
|
0
|
my $label = $args{label}; |
|
4699
|
0
|
|
|
|
|
0
|
my $pattern = $args{pattern}; |
|
4700
|
0
|
|
|
|
|
0
|
my $URL = $args{url}; |
|
4701
|
0
|
|
|
|
|
0
|
my $switches = $args{switches}; |
|
4702
|
|
|
|
|
|
|
|
|
4703
|
0
|
|
|
|
|
0
|
$self->add_regexp_to_links_table(pattern=>glob2regexp($pattern), |
|
4704
|
|
|
|
|
|
|
label=>$label, |
|
4705
|
|
|
|
|
|
|
url=>$URL, switches=>$switches); |
|
4706
|
|
|
|
|
|
|
} # add_glob_to_links_table |
|
4707
|
|
|
|
|
|
|
|
|
4708
|
|
|
|
|
|
|
=head2 parse_dict |
|
4709
|
|
|
|
|
|
|
|
|
4710
|
|
|
|
|
|
|
$self->parse_dict($dictfile, $dict); |
|
4711
|
|
|
|
|
|
|
|
|
4712
|
|
|
|
|
|
|
Parse the dictionary file. |
|
4713
|
|
|
|
|
|
|
(see also load_dictionary_links, for things that were stripped) |
|
4714
|
|
|
|
|
|
|
|
|
4715
|
|
|
|
|
|
|
=cut |
|
4716
|
|
|
|
|
|
|
sub parse_dict ($$$) |
|
4717
|
|
|
|
|
|
|
{ |
|
4718
|
26
|
|
|
26
|
1
|
96
|
my $self = shift; |
|
4719
|
|
|
|
|
|
|
|
|
4720
|
26
|
|
|
|
|
183
|
my ($dictfile, $dict) = @_; |
|
4721
|
|
|
|
|
|
|
|
|
4722
|
26
|
50
|
|
|
|
108
|
print STDERR "Parsing dictionary file $dictfile\n" |
|
4723
|
|
|
|
|
|
|
if ($DictDebug & 1); |
|
4724
|
|
|
|
|
|
|
|
|
4725
|
26
|
50
|
|
|
|
628
|
if ($dict =~ /->\s*->/) |
|
4726
|
|
|
|
|
|
|
{ |
|
4727
|
0
|
|
|
|
|
0
|
my $message = "Two consecutive '->'s found in $dictfile\n"; |
|
4728
|
0
|
|
|
|
|
0
|
my $near; |
|
4729
|
|
|
|
|
|
|
|
|
4730
|
|
|
|
|
|
|
# Print out any useful context so they can find it. |
|
4731
|
0
|
|
|
|
|
0
|
($near) = $dict =~ /([\S ]*\s*->\s*->\s*\S*)/; |
|
4732
|
0
|
0
|
|
|
|
0
|
$message .= "\n$near\n" if $near =~ /\S/; |
|
4733
|
0
|
|
|
|
|
0
|
die $message; |
|
4734
|
|
|
|
|
|
|
} |
|
4735
|
|
|
|
|
|
|
|
|
4736
|
26
|
|
|
|
|
56
|
my ($key, $URL, $switches, $options); |
|
4737
|
26
|
|
|
|
|
361
|
while ($dict =~ /\s*(.+)\s+\-+([iehos]+\-+)?\>\s*(.*\S+)\s*\n/ig) |
|
4738
|
|
|
|
|
|
|
{ |
|
4739
|
1352
|
|
|
|
|
2798
|
$key = $1; |
|
4740
|
1352
|
|
|
|
|
2025
|
$options = $2; |
|
4741
|
1352
|
100
|
|
|
|
2470
|
$options = "" unless defined($options); |
|
4742
|
1352
|
|
|
|
|
2226
|
$URL = $3; |
|
4743
|
1352
|
|
|
|
|
1402
|
$switches = 0; |
|
4744
|
|
|
|
|
|
|
# Case insensitivity |
|
4745
|
1352
|
100
|
|
|
|
2697
|
$switches += $LINK_NOCASE if $options =~ /i/i; |
|
4746
|
|
|
|
|
|
|
# Evaluate as Perl code |
|
4747
|
1352
|
50
|
|
|
|
2894
|
$switches += $LINK_EVAL if $options =~ /e/i; |
|
4748
|
|
|
|
|
|
|
# provides HTML, not just URL |
|
4749
|
1352
|
100
|
|
|
|
3351
|
$switches += $LINK_HTML if $options =~ /h/i; |
|
4750
|
|
|
|
|
|
|
# Only do this link once |
|
4751
|
1352
|
100
|
|
|
|
2506
|
$switches += $LINK_ONCE if $options =~ /o/i; |
|
4752
|
|
|
|
|
|
|
# Only do this link once per section |
|
4753
|
1352
|
50
|
|
|
|
2431
|
$switches += $LINK_SECT_ONCE if $options =~ /s/i; |
|
4754
|
|
|
|
|
|
|
|
|
4755
|
1352
|
|
|
|
|
10303
|
$key =~ s/\s*$//; # Chop trailing whitespace |
|
4756
|
|
|
|
|
|
|
|
|
4757
|
1352
|
100
|
|
|
|
4635
|
if ($key =~ m|^/|) # Regexp |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
4758
|
|
|
|
|
|
|
{ |
|
4759
|
104
|
|
|
|
|
228
|
$key = substr($key, 1); |
|
4760
|
104
|
|
|
|
|
304
|
$key =~ s|/$||; # Allow them to forget the closing / |
|
4761
|
104
|
|
|
|
|
306
|
$self->add_regexp_to_links_table(pattern=>$key, label=>$key, url=>$URL, switches=>$switches); |
|
4762
|
|
|
|
|
|
|
} |
|
4763
|
|
|
|
|
|
|
elsif ($key =~ /^\|/) # alternate regexp format |
|
4764
|
|
|
|
|
|
|
{ |
|
4765
|
1092
|
|
|
|
|
2179
|
$key = substr($key, 1); |
|
4766
|
1092
|
|
|
|
|
2814
|
$key =~ s/\|$//; # Allow them to forget the closing | |
|
4767
|
1092
|
|
|
|
|
2831
|
$key =~ s|/|\\/|g; # Escape all slashes |
|
4768
|
1092
|
|
|
|
|
2485
|
$self->add_regexp_to_links_table(pattern=>$key, label=>$key, url=>$URL, switches=>$switches); |
|
4769
|
|
|
|
|
|
|
} |
|
4770
|
|
|
|
|
|
|
elsif ($key =~ /\"/) |
|
4771
|
|
|
|
|
|
|
{ |
|
4772
|
156
|
|
|
|
|
266
|
$key = substr($key, 1); |
|
4773
|
156
|
|
|
|
|
453
|
$key =~ s/\"$//; # Allow them to forget the closing " |
|
4774
|
156
|
|
|
|
|
406
|
$self->add_literal_to_links_table(pattern=>$key, label=>$key, url=>$URL, switches=>$switches); |
|
4775
|
|
|
|
|
|
|
} |
|
4776
|
|
|
|
|
|
|
else |
|
4777
|
|
|
|
|
|
|
{ |
|
4778
|
0
|
|
|
|
|
0
|
$self->add_glob_to_links_table(pattern=>$key, label=>$key, url=>$URL, switches=>$switches); |
|
4779
|
|
|
|
|
|
|
} |
|
4780
|
|
|
|
|
|
|
} |
|
4781
|
|
|
|
|
|
|
|
|
4782
|
|
|
|
|
|
|
} # parse_dict |
|
4783
|
|
|
|
|
|
|
|
|
4784
|
|
|
|
|
|
|
=head2 setup_dict_checking |
|
4785
|
|
|
|
|
|
|
|
|
4786
|
|
|
|
|
|
|
$self->setup_dict_checking(); |
|
4787
|
|
|
|
|
|
|
|
|
4788
|
|
|
|
|
|
|
Set up the dictionary checking. |
|
4789
|
|
|
|
|
|
|
|
|
4790
|
|
|
|
|
|
|
=cut |
|
4791
|
|
|
|
|
|
|
sub setup_dict_checking ($) |
|
4792
|
|
|
|
|
|
|
{ |
|
4793
|
26
|
|
|
26
|
1
|
48
|
my $self = shift; |
|
4794
|
|
|
|
|
|
|
|
|
4795
|
|
|
|
|
|
|
# now create the replace funcs and precomile the regexes |
|
4796
|
26
|
|
|
|
|
42
|
my ($URL, $switches, $pattern, $options, $tag1, $tag2); |
|
4797
|
0
|
|
|
|
|
0
|
my ($href, $r_sw); |
|
4798
|
0
|
|
|
|
|
0
|
my @subs; |
|
4799
|
26
|
|
|
|
|
56
|
my $i = 0; |
|
4800
|
26
|
|
|
|
|
39
|
foreach my $label (@{$self->{__links_table_order}}) |
|
|
26
|
|
|
|
|
77
|
|
|
4801
|
|
|
|
|
|
|
{ |
|
4802
|
1352
|
|
|
|
|
3038
|
$switches = $self->{__links_switch_table}->{$label}; |
|
4803
|
1352
|
|
|
|
|
2518
|
$pattern = $self->{__links_table_patterns}->{$label}; |
|
4804
|
|
|
|
|
|
|
|
|
4805
|
1352
|
|
|
|
|
2451
|
$href = $self->{__links_table}->{$label}; |
|
4806
|
|
|
|
|
|
|
|
|
4807
|
1352
|
100
|
|
|
|
2445
|
if (!($switches & $LINK_HTML)) |
|
4808
|
|
|
|
|
|
|
{ |
|
4809
|
572
|
|
|
|
|
1356
|
$href =~ s#/#\\/#g; |
|
4810
|
572
|
100
|
|
|
|
1654
|
$href = ( |
|
4811
|
|
|
|
|
|
|
$self->{lower_case_tags} |
|
4812
|
|
|
|
|
|
|
? join('', '$&<\\/a>') |
|
4813
|
|
|
|
|
|
|
: join('', '$&<\\/A>') |
|
4814
|
|
|
|
|
|
|
); |
|
4815
|
|
|
|
|
|
|
} |
|
4816
|
|
|
|
|
|
|
else |
|
4817
|
|
|
|
|
|
|
{ |
|
4818
|
|
|
|
|
|
|
# change the uppercase tags to lower case |
|
4819
|
780
|
100
|
|
|
|
1614
|
if ($self->{lower_case_tags}) |
|
4820
|
|
|
|
|
|
|
{ |
|
4821
|
750
|
|
|
|
|
5118
|
$href =~ s#()([A-Z]*)(>)#${1}\L${2}${3}#g; |
|
4822
|
750
|
|
|
|
|
2297
|
$href =~ s/(<)([A-Z]*)(>)/${1}\L${2}${3}/g; |
|
4823
|
|
|
|
|
|
|
# and the anchors |
|
4824
|
750
|
|
|
|
|
4418
|
$href =~ s/(<)(A\s*HREF)([^>]*>)/$1\L$2$3/g; |
|
4825
|
|
|
|
|
|
|
} |
|
4826
|
780
|
|
|
|
|
2492
|
$href =~ s#/#\\/#g; |
|
4827
|
|
|
|
|
|
|
} |
|
4828
|
|
|
|
|
|
|
|
|
4829
|
1352
|
|
|
|
|
2117
|
$r_sw = "s"; # Options for replacing |
|
4830
|
1352
|
100
|
|
|
|
2592
|
$r_sw .= "i" if ($switches & $LINK_NOCASE); |
|
4831
|
1352
|
50
|
|
|
|
2183
|
$r_sw .= "e" if ($switches & $LINK_EVAL); |
|
4832
|
|
|
|
|
|
|
|
|
4833
|
|
|
|
|
|
|
# Generate code for replacements. |
|
4834
|
|
|
|
|
|
|
# Create an anonymous subroutine for each replacement, |
|
4835
|
|
|
|
|
|
|
# and store its reference in an array. |
|
4836
|
|
|
|
|
|
|
# We need to do an "eval" to create these because we need to |
|
4837
|
|
|
|
|
|
|
# be able to treat the *contents* of the $href variable |
|
4838
|
|
|
|
|
|
|
# as if it were perl code, because sometimes the $href |
|
4839
|
|
|
|
|
|
|
# contains things which need to be evaluated, such as $& or $1, |
|
4840
|
|
|
|
|
|
|
# not just those cases where we have a "e" switch. |
|
4841
|
1352
|
|
|
|
|
3670
|
my $code = <
|
|
4842
|
|
|
|
|
|
|
\$self->{__repl_code}->[$i] = |
|
4843
|
|
|
|
|
|
|
sub { |
|
4844
|
|
|
|
|
|
|
my \$al = shift; |
|
4845
|
|
|
|
|
|
|
\$al =~ s/$pattern/$href/$r_sw; |
|
4846
|
|
|
|
|
|
|
return \$al; |
|
4847
|
|
|
|
|
|
|
}; |
|
4848
|
|
|
|
|
|
|
EOT |
|
4849
|
1352
|
50
|
|
|
|
2572
|
print STDERR $code if ($DictDebug & 2); |
|
4850
|
1352
|
|
|
|
|
1789
|
push @subs, $code; |
|
4851
|
|
|
|
|
|
|
|
|
4852
|
|
|
|
|
|
|
# compile searching pattern |
|
4853
|
1352
|
100
|
|
|
|
2193
|
if ($switches & $LINK_NOCASE) # i |
|
4854
|
|
|
|
|
|
|
{ |
|
4855
|
26
|
|
|
|
|
184
|
$self->{__search_patterns}->[$i] = qr/$pattern/si; |
|
4856
|
|
|
|
|
|
|
} |
|
4857
|
|
|
|
|
|
|
else |
|
4858
|
|
|
|
|
|
|
{ |
|
4859
|
1326
|
|
|
|
|
98623
|
$self->{__search_patterns}->[$i] = qr/$pattern/s; |
|
4860
|
|
|
|
|
|
|
} |
|
4861
|
1352
|
|
|
|
|
2984
|
$i++; |
|
4862
|
|
|
|
|
|
|
} |
|
4863
|
|
|
|
|
|
|
# now eval the replacements code string |
|
4864
|
26
|
|
|
|
|
643
|
my $codes = join('', @subs); |
|
4865
|
26
|
|
|
|
|
188503
|
eval "$codes"; |
|
4866
|
|
|
|
|
|
|
} # setup_dict_checking |
|
4867
|
|
|
|
|
|
|
|
|
4868
|
|
|
|
|
|
|
=head2 in_link_context |
|
4869
|
|
|
|
|
|
|
|
|
4870
|
|
|
|
|
|
|
if ($self->in_link_context($match, $before)) |
|
4871
|
|
|
|
|
|
|
{ |
|
4872
|
|
|
|
|
|
|
... |
|
4873
|
|
|
|
|
|
|
} |
|
4874
|
|
|
|
|
|
|
|
|
4875
|
|
|
|
|
|
|
Check if we are inside a link (); certain kinds of substitution are |
|
4876
|
|
|
|
|
|
|
not allowed here. |
|
4877
|
|
|
|
|
|
|
|
|
4878
|
|
|
|
|
|
|
=cut |
|
4879
|
|
|
|
|
|
|
sub in_link_context ($$$) |
|
4880
|
|
|
|
|
|
|
{ |
|
4881
|
124
|
|
|
124
|
1
|
171
|
my $self = shift; |
|
4882
|
124
|
|
|
|
|
217
|
my ($match, $before) = @_; |
|
4883
|
124
|
100
|
|
|
|
369
|
return 1 if $match =~ m@?A>@i; # No links allowed inside match |
|
4884
|
|
|
|
|
|
|
|
|
4885
|
112
|
|
|
|
|
124
|
my ($final_open, $final_close); |
|
4886
|
112
|
100
|
|
|
|
260
|
if ($self->{lower_case_tags}) |
|
4887
|
|
|
|
|
|
|
{ |
|
4888
|
6
|
|
|
6
|
|
6424
|
$final_open = rindex($before, "
|
|
|
6
|
|
|
|
|
2838
|
|
|
|
6
|
|
|
|
|
20496
|
|
|
|
92
|
|
|
|
|
472
|
|
|
4889
|
92
|
|
|
|
|
257
|
$final_close = rindex($before, "") - $[; |
|
4890
|
|
|
|
|
|
|
} |
|
4891
|
|
|
|
|
|
|
else |
|
4892
|
|
|
|
|
|
|
{ |
|
4893
|
20
|
|
|
|
|
72
|
$final_open = rindex($before, "
|
|
4894
|
20
|
|
|
|
|
44
|
$final_close = rindex($before, "") - $[; |
|
4895
|
|
|
|
|
|
|
} |
|
4896
|
|
|
|
|
|
|
|
|
4897
|
112
|
100
|
100
|
|
|
601
|
return 1 if ($final_open >= 0) # Link opened |
|
|
|
|
66
|
|
|
|
|
|
4898
|
|
|
|
|
|
|
&& ( |
|
4899
|
|
|
|
|
|
|
($final_close < 0) # and not closed or |
|
4900
|
|
|
|
|
|
|
|| ($final_open > $final_close) |
|
4901
|
|
|
|
|
|
|
); # one opened after last close |
|
4902
|
|
|
|
|
|
|
|
|
4903
|
|
|
|
|
|
|
# Now check to see if we're inside a tag, matching a tag name, |
|
4904
|
|
|
|
|
|
|
# or attribute name or value |
|
4905
|
83
|
|
|
|
|
214
|
$final_open = rindex($before, "<") - $[; |
|
4906
|
83
|
|
|
|
|
207
|
$final_close = rindex($before, ">") - $[; |
|
4907
|
83
|
100
|
33
|
|
|
638
|
($final_open >= 0) # Tag opened |
|
4908
|
|
|
|
|
|
|
&& ( |
|
4909
|
|
|
|
|
|
|
($final_close < 0) # and not closed or |
|
4910
|
|
|
|
|
|
|
|| ($final_open > $final_close) |
|
4911
|
|
|
|
|
|
|
); # one opened after last close |
|
4912
|
|
|
|
|
|
|
} # in_link_context |
|
4913
|
|
|
|
|
|
|
|
|
4914
|
|
|
|
|
|
|
=head2 apply_links |
|
4915
|
|
|
|
|
|
|
|
|
4916
|
|
|
|
|
|
|
$self->apply_links(para_ref=>$para_ref, |
|
4917
|
|
|
|
|
|
|
para_action_ref=>$para_action_ref); |
|
4918
|
|
|
|
|
|
|
|
|
4919
|
|
|
|
|
|
|
Apply links and formatting to this paragraph. |
|
4920
|
|
|
|
|
|
|
|
|
4921
|
|
|
|
|
|
|
=cut |
|
4922
|
|
|
|
|
|
|
sub apply_links ($%) |
|
4923
|
|
|
|
|
|
|
{ |
|
4924
|
440
|
|
|
440
|
1
|
581
|
my $self = shift; |
|
4925
|
440
|
|
|
|
|
1608
|
my %args = ( |
|
4926
|
|
|
|
|
|
|
para_ref => undef, |
|
4927
|
|
|
|
|
|
|
para_action_ref => undef, |
|
4928
|
|
|
|
|
|
|
@_ |
|
4929
|
|
|
|
|
|
|
); |
|
4930
|
440
|
|
|
|
|
737
|
my $para_ref = $args{para_ref}; |
|
4931
|
440
|
|
|
|
|
592
|
my $para_action_ref = $args{para_action_ref}; |
|
4932
|
|
|
|
|
|
|
|
|
4933
|
440
|
50
|
33
|
|
|
1144
|
if ($self->{make_links} |
|
|
440
|
|
|
|
|
1627
|
|
|
4934
|
|
|
|
|
|
|
&& @{$self->{__links_table_order}}) |
|
4935
|
|
|
|
|
|
|
{ |
|
4936
|
440
|
|
|
|
|
1309
|
$self->check_dictionary_links( |
|
4937
|
|
|
|
|
|
|
line_ref => $para_ref, |
|
4938
|
|
|
|
|
|
|
line_action_ref => $para_action_ref |
|
4939
|
|
|
|
|
|
|
); |
|
4940
|
|
|
|
|
|
|
} |
|
4941
|
440
|
100
|
|
|
|
1300
|
if ($self->{bold_delimiter}) |
|
4942
|
|
|
|
|
|
|
{ |
|
4943
|
439
|
100
|
|
|
|
1090
|
my $tag = ($self->{lower_case_tags} ? 'strong' : 'STRONG'); |
|
4944
|
439
|
|
|
|
|
1807
|
$self->do_delim( |
|
4945
|
|
|
|
|
|
|
line_ref => $para_ref, |
|
4946
|
|
|
|
|
|
|
line_action_ref => $para_action_ref, |
|
4947
|
|
|
|
|
|
|
delim => $self->{bold_delimiter}, |
|
4948
|
|
|
|
|
|
|
tag => $tag |
|
4949
|
|
|
|
|
|
|
); |
|
4950
|
|
|
|
|
|
|
} |
|
4951
|
440
|
100
|
|
|
|
1325
|
if ($self->{italic_delimiter}) |
|
4952
|
|
|
|
|
|
|
{ |
|
4953
|
439
|
100
|
|
|
|
989
|
my $tag = ($self->{lower_case_tags} ? 'em' : 'EM'); |
|
4954
|
439
|
|
|
|
|
1276
|
$self->do_delim( |
|
4955
|
|
|
|
|
|
|
line_ref => $para_ref, |
|
4956
|
|
|
|
|
|
|
line_action_ref => $para_action_ref, |
|
4957
|
|
|
|
|
|
|
delim => $self->{italic_delimiter}, |
|
4958
|
|
|
|
|
|
|
tag => $tag |
|
4959
|
|
|
|
|
|
|
); |
|
4960
|
|
|
|
|
|
|
} |
|
4961
|
440
|
50
|
|
|
|
1315
|
if ($self->{underline_delimiter}) |
|
4962
|
|
|
|
|
|
|
{ |
|
4963
|
440
|
100
|
|
|
|
1019
|
my $tag = ($self->{lower_case_tags} ? 'u' : 'U'); |
|
4964
|
440
|
|
|
|
|
1155
|
$self->do_delim( |
|
4965
|
|
|
|
|
|
|
line_ref => $para_ref, |
|
4966
|
|
|
|
|
|
|
line_action_ref => $para_action_ref, |
|
4967
|
|
|
|
|
|
|
delim => $self->{underline_delimiter}, |
|
4968
|
|
|
|
|
|
|
tag => $tag |
|
4969
|
|
|
|
|
|
|
); |
|
4970
|
|
|
|
|
|
|
} |
|
4971
|
|
|
|
|
|
|
|
|
4972
|
|
|
|
|
|
|
} # apply_links |
|
4973
|
|
|
|
|
|
|
|
|
4974
|
|
|
|
|
|
|
=head2 check_dictionary_links |
|
4975
|
|
|
|
|
|
|
|
|
4976
|
|
|
|
|
|
|
$self->check_dictionary_links(line_ref=>$line_ref, |
|
4977
|
|
|
|
|
|
|
line_action_ref=>$line_action_ref); |
|
4978
|
|
|
|
|
|
|
|
|
4979
|
|
|
|
|
|
|
Check (and alter if need be) the bits in this line matching |
|
4980
|
|
|
|
|
|
|
the patterns in the link dictionary. |
|
4981
|
|
|
|
|
|
|
|
|
4982
|
|
|
|
|
|
|
=cut |
|
4983
|
|
|
|
|
|
|
sub check_dictionary_links ($%) |
|
4984
|
|
|
|
|
|
|
{ |
|
4985
|
440
|
|
|
440
|
1
|
604
|
my $self = shift; |
|
4986
|
440
|
|
|
|
|
1477
|
my %args = ( |
|
4987
|
|
|
|
|
|
|
line_ref => undef, |
|
4988
|
|
|
|
|
|
|
line_action_ref => undef, |
|
4989
|
|
|
|
|
|
|
@_ |
|
4990
|
|
|
|
|
|
|
); |
|
4991
|
440
|
|
|
|
|
667
|
my $line_ref = $args{line_ref}; |
|
4992
|
440
|
|
|
|
|
489
|
my $line_action_ref = $args{line_action_ref}; |
|
4993
|
|
|
|
|
|
|
|
|
4994
|
440
|
|
|
|
|
486
|
my ($switches, $pattern, $options, $repl_func); |
|
4995
|
0
|
|
|
|
|
0
|
my ($linkme, $line_with_links); |
|
4996
|
|
|
|
|
|
|
|
|
4997
|
|
|
|
|
|
|
# for each pattern, check and alter the line |
|
4998
|
440
|
|
|
|
|
537
|
my $i = 0; |
|
4999
|
440
|
|
|
|
|
569
|
foreach my $label (@{$self->{__links_table_order}}) |
|
|
440
|
|
|
|
|
1108
|
|
|
5000
|
|
|
|
|
|
|
{ |
|
5001
|
22880
|
|
|
|
|
43330
|
$switches = $self->{__links_switch_table}->{$label}; |
|
5002
|
22880
|
|
|
|
|
45146
|
$pattern = $self->{__links_table_patterns}->{$label}; |
|
5003
|
|
|
|
|
|
|
|
|
5004
|
|
|
|
|
|
|
# check the pattern |
|
5005
|
22880
|
100
|
|
|
|
45573
|
if ($switches & $LINK_ONCE) # Do link only once |
|
|
|
50
|
|
|
|
|
|
|
5006
|
|
|
|
|
|
|
{ |
|
5007
|
2640
|
|
|
|
|
2959
|
$line_with_links = ''; |
|
5008
|
2640
|
100
|
100
|
|
|
6565
|
if (!$self->{__done_with_link}->[$i] |
|
|
1970
|
|
|
|
|
10766
|
|
|
5009
|
|
|
|
|
|
|
&& ${$line_ref} =~ $self->{__search_patterns}->[$i]) |
|
5010
|
|
|
|
|
|
|
{ |
|
5011
|
11
|
|
|
|
|
31
|
$self->{__done_with_link}->[$i] = 1; |
|
5012
|
11
|
|
|
|
|
36
|
$line_with_links .= $`; |
|
5013
|
11
|
|
|
|
|
26
|
$linkme = $&; |
|
5014
|
|
|
|
|
|
|
|
|
5015
|
11
|
|
|
|
|
17
|
${$line_ref} = $'; |
|
|
11
|
|
|
|
|
28
|
|
|
5016
|
11
|
50
|
|
|
|
44
|
if (!$self->in_link_context($linkme, $line_with_links)) |
|
5017
|
|
|
|
|
|
|
{ |
|
5018
|
11
|
50
|
|
|
|
33
|
print STDERR "Link rule $i matches $linkme\n" |
|
5019
|
|
|
|
|
|
|
if ($DictDebug & 4); |
|
5020
|
|
|
|
|
|
|
|
|
5021
|
|
|
|
|
|
|
# call the special subroutine already created to do |
|
5022
|
|
|
|
|
|
|
# this replacement |
|
5023
|
11
|
|
|
|
|
30
|
$repl_func = $self->{__repl_code}->[$i]; |
|
5024
|
11
|
|
|
|
|
402
|
$linkme = &$repl_func($linkme); |
|
5025
|
|
|
|
|
|
|
} |
|
5026
|
11
|
|
|
|
|
28
|
$line_with_links .= $linkme; |
|
5027
|
|
|
|
|
|
|
} |
|
5028
|
2640
|
|
|
|
|
3281
|
${$line_ref} = $line_with_links . ${$line_ref}; |
|
|
2640
|
|
|
|
|
3495
|
|
|
|
2640
|
|
|
|
|
4232
|
|
|
5029
|
|
|
|
|
|
|
} |
|
5030
|
|
|
|
|
|
|
elsif ($switches & $LINK_SECT_ONCE) # Do link only once per section |
|
5031
|
|
|
|
|
|
|
{ |
|
5032
|
0
|
|
|
|
|
0
|
$line_with_links = ''; |
|
5033
|
0
|
0
|
0
|
|
|
0
|
if (!$self->{__done_with_sect_link}->[$i] |
|
|
0
|
|
|
|
|
0
|
|
|
5034
|
|
|
|
|
|
|
&& ${$line_ref} =~ $self->{__search_patterns}->[$i]) |
|
5035
|
|
|
|
|
|
|
{ |
|
5036
|
0
|
|
|
|
|
0
|
$self->{__done_with_sect_link}->[$i] = 1; |
|
5037
|
0
|
|
|
|
|
0
|
$line_with_links .= $`; |
|
5038
|
0
|
|
|
|
|
0
|
$linkme = $&; |
|
5039
|
|
|
|
|
|
|
|
|
5040
|
0
|
|
|
|
|
0
|
${$line_ref} = $'; |
|
|
0
|
|
|
|
|
0
|
|
|
5041
|
0
|
0
|
|
|
|
0
|
if (!$self->in_link_context($linkme, $line_with_links)) |
|
5042
|
|
|
|
|
|
|
{ |
|
5043
|
0
|
0
|
|
|
|
0
|
print STDERR "Link rule $i matches $linkme\n" |
|
5044
|
|
|
|
|
|
|
if ($DictDebug & 4); |
|
5045
|
|
|
|
|
|
|
|
|
5046
|
|
|
|
|
|
|
# call the special subroutine already created to do |
|
5047
|
|
|
|
|
|
|
# this replacement |
|
5048
|
0
|
|
|
|
|
0
|
$repl_func = $self->{__repl_code}->[$i]; |
|
5049
|
0
|
|
|
|
|
0
|
$linkme = &$repl_func($linkme); |
|
5050
|
|
|
|
|
|
|
} |
|
5051
|
0
|
|
|
|
|
0
|
$line_with_links .= $linkme; |
|
5052
|
|
|
|
|
|
|
} |
|
5053
|
0
|
|
|
|
|
0
|
${$line_ref} = $line_with_links . ${$line_ref}; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
5054
|
|
|
|
|
|
|
} |
|
5055
|
|
|
|
|
|
|
else |
|
5056
|
|
|
|
|
|
|
{ |
|
5057
|
20240
|
|
|
|
|
22075
|
$line_with_links = ''; |
|
5058
|
20240
|
|
|
|
|
20136
|
while (${$line_ref} =~ $self->{__search_patterns}->[$i]) |
|
|
20336
|
|
|
|
|
124473
|
|
|
5059
|
|
|
|
|
|
|
{ |
|
5060
|
96
|
|
|
|
|
244
|
$line_with_links .= $`; |
|
5061
|
96
|
|
|
|
|
193
|
$linkme = $&; |
|
5062
|
|
|
|
|
|
|
|
|
5063
|
96
|
|
|
|
|
111
|
${$line_ref} = $'; |
|
|
96
|
|
|
|
|
214
|
|
|
5064
|
96
|
100
|
|
|
|
263
|
if (!$self->in_link_context($linkme, $line_with_links)) |
|
5065
|
|
|
|
|
|
|
{ |
|
5066
|
60
|
50
|
|
|
|
141
|
print STDERR "Link rule $i matches $linkme\n" |
|
5067
|
|
|
|
|
|
|
if ($DictDebug & 4); |
|
5068
|
|
|
|
|
|
|
|
|
5069
|
|
|
|
|
|
|
# call the special subroutine already created to do |
|
5070
|
|
|
|
|
|
|
# this replacement |
|
5071
|
60
|
|
|
|
|
113
|
$repl_func = $self->{__repl_code}->[$i]; |
|
5072
|
60
|
|
|
|
|
1898
|
$linkme = &$repl_func($linkme); |
|
5073
|
|
|
|
|
|
|
} |
|
5074
|
96
|
|
|
|
|
183
|
$line_with_links .= $linkme; |
|
5075
|
|
|
|
|
|
|
} |
|
5076
|
20240
|
|
|
|
|
23834
|
${$line_ref} = $line_with_links . ${$line_ref}; |
|
|
20240
|
|
|
|
|
27410
|
|
|
|
20240
|
|
|
|
|
40542
|
|
|
5077
|
|
|
|
|
|
|
} |
|
5078
|
22880
|
|
|
|
|
32318
|
$i++; |
|
5079
|
|
|
|
|
|
|
} |
|
5080
|
440
|
|
|
|
|
630
|
${$line_action_ref} |= $LINK; |
|
|
440
|
|
|
|
|
1490
|
|
|
5081
|
|
|
|
|
|
|
} # check_dictionary_links |
|
5082
|
|
|
|
|
|
|
|
|
5083
|
|
|
|
|
|
|
=head2 load_dictionary_links |
|
5084
|
|
|
|
|
|
|
|
|
5085
|
|
|
|
|
|
|
$self->load_dictionary_links(); |
|
5086
|
|
|
|
|
|
|
|
|
5087
|
|
|
|
|
|
|
Load the dictionary links. |
|
5088
|
|
|
|
|
|
|
|
|
5089
|
|
|
|
|
|
|
=cut |
|
5090
|
|
|
|
|
|
|
sub load_dictionary_links ($) |
|
5091
|
|
|
|
|
|
|
{ |
|
5092
|
26
|
|
|
26
|
1
|
45
|
my $self = shift; |
|
5093
|
|
|
|
|
|
|
|
|
5094
|
26
|
|
|
|
|
52
|
@{$self->{__links_table_order}} = (); |
|
|
26
|
|
|
|
|
67
|
|
|
5095
|
26
|
|
|
|
|
42
|
%{$self->{__links_table}} = (); |
|
|
26
|
|
|
|
|
62
|
|
|
5096
|
|
|
|
|
|
|
|
|
5097
|
26
|
|
|
|
|
76
|
my $dict; |
|
5098
|
26
|
|
|
|
|
45
|
foreach $dict (@{$self->{links_dictionaries}}) |
|
|
26
|
|
|
|
|
86
|
|
|
5099
|
|
|
|
|
|
|
{ |
|
5100
|
0
|
0
|
|
|
|
0
|
next unless $dict; |
|
5101
|
0
|
0
|
|
|
|
0
|
open(DICT, "$dict") || die "Can't open Dictionary file $dict\n"; |
|
5102
|
|
|
|
|
|
|
|
|
5103
|
0
|
|
|
|
|
0
|
my @lines = (); |
|
5104
|
0
|
|
|
|
|
0
|
while () |
|
5105
|
|
|
|
|
|
|
{ |
|
5106
|
|
|
|
|
|
|
# skip lines that start with '#' |
|
5107
|
0
|
0
|
|
|
|
0
|
next if /^\#/; |
|
5108
|
|
|
|
|
|
|
# skip lines that end with unescaped ':' |
|
5109
|
0
|
0
|
|
|
|
0
|
next if /^.*[^\\]:\s*$/; |
|
5110
|
0
|
|
|
|
|
0
|
push @lines, $_; |
|
5111
|
|
|
|
|
|
|
} |
|
5112
|
0
|
|
|
|
|
0
|
close(DICT); |
|
5113
|
0
|
|
|
|
|
0
|
my $contents = join('', @lines); |
|
5114
|
0
|
|
|
|
|
0
|
$self->parse_dict($dict, $contents); |
|
5115
|
|
|
|
|
|
|
} |
|
5116
|
|
|
|
|
|
|
# last of all, do the system dictionary, already read in from DATA |
|
5117
|
26
|
50
|
|
|
|
89
|
if ($self->{__global_links_data}) |
|
5118
|
|
|
|
|
|
|
{ |
|
5119
|
26
|
|
|
|
|
125
|
$self->parse_dict("DATA", $self->{__global_links_data}); |
|
5120
|
|
|
|
|
|
|
} |
|
5121
|
|
|
|
|
|
|
|
|
5122
|
26
|
|
|
|
|
93
|
$self->setup_dict_checking(); |
|
5123
|
|
|
|
|
|
|
} # load_dictionary_links |
|
5124
|
|
|
|
|
|
|
|
|
5125
|
|
|
|
|
|
|
=head2 do_file_start |
|
5126
|
|
|
|
|
|
|
|
|
5127
|
|
|
|
|
|
|
$self->do_file_start($outhandle, $para); |
|
5128
|
|
|
|
|
|
|
|
|
5129
|
|
|
|
|
|
|
Extra stuff needed for the beginning: |
|
5130
|
|
|
|
|
|
|
HTML headers, and prepending a file if desired. |
|
5131
|
|
|
|
|
|
|
|
|
5132
|
|
|
|
|
|
|
=cut |
|
5133
|
|
|
|
|
|
|
sub do_file_start ($$$) |
|
5134
|
|
|
|
|
|
|
{ |
|
5135
|
34
|
|
|
34
|
1
|
94
|
my $self = shift; |
|
5136
|
34
|
|
|
|
|
58
|
my $outhandle = shift; |
|
5137
|
34
|
|
|
|
|
125
|
my $para = shift; |
|
5138
|
|
|
|
|
|
|
|
|
5139
|
34
|
100
|
|
|
|
159
|
if (!$self->{extract}) |
|
5140
|
|
|
|
|
|
|
{ |
|
5141
|
13
|
|
|
|
|
269
|
my @para_lines = split(/\n/, $para); |
|
5142
|
13
|
|
|
|
|
55
|
my $first_line = $para_lines[0]; |
|
5143
|
|
|
|
|
|
|
|
|
5144
|
13
|
50
|
|
|
|
68
|
if ($self->{doctype}) |
|
5145
|
|
|
|
|
|
|
{ |
|
5146
|
13
|
100
|
|
|
|
57
|
if ($self->{xhtml}) |
|
5147
|
|
|
|
|
|
|
{ |
|
5148
|
12
|
|
|
|
|
76
|
print $outhandle |
|
5149
|
|
|
|
|
|
|
'', |
|
5153
|
|
|
|
|
|
|
"\n"; |
|
5154
|
12
|
|
|
|
|
70
|
print $outhandle $self->get_tag('html', |
|
5155
|
|
|
|
|
|
|
inside_tag => ' xmlns="http://www.w3.org/1999/xhtml"'), "\n"; |
|
5156
|
|
|
|
|
|
|
} |
|
5157
|
|
|
|
|
|
|
else |
|
5158
|
|
|
|
|
|
|
{ |
|
5159
|
1
|
|
|
|
|
5
|
print $outhandle '
5160
|
|
|
|
|
|
|
"\">\n"; |
|
5161
|
1
|
|
|
|
|
6
|
print $outhandle $self->get_tag('html'), "\n"; |
|
5162
|
|
|
|
|
|
|
} |
|
5163
|
|
|
|
|
|
|
} |
|
5164
|
13
|
|
|
|
|
74
|
print $outhandle $self->get_tag('head'), "\n"; |
|
5165
|
|
|
|
|
|
|
|
|
5166
|
|
|
|
|
|
|
# if --titlefirst is set and --title isn't, use the first line |
|
5167
|
|
|
|
|
|
|
# as the title. |
|
5168
|
13
|
100
|
66
|
|
|
84
|
if ($self->{titlefirst} && !$self->{title}) |
|
5169
|
|
|
|
|
|
|
{ |
|
5170
|
2
|
|
|
|
|
11
|
my ($tit) = $first_line =~ /^ *(.*)/; # grab first line |
|
5171
|
2
|
|
|
|
|
21
|
$tit =~ s/ *$//; # strip trailing whitespace |
|
5172
|
2
|
50
|
|
|
|
14
|
$tit = escape($tit) if $self->{escape_HTML_chars}; |
|
5173
|
2
|
|
|
|
|
5
|
$self->{'title'} = $tit; |
|
5174
|
|
|
|
|
|
|
} |
|
5175
|
13
|
100
|
|
|
|
56
|
if (!$self->{title}) |
|
5176
|
|
|
|
|
|
|
{ |
|
5177
|
11
|
|
|
|
|
25
|
$self->{'title'} = ""; |
|
5178
|
|
|
|
|
|
|
} |
|
5179
|
13
|
|
|
|
|
43
|
print $outhandle $self->get_tag('title'), $self->{title}, |
|
5180
|
|
|
|
|
|
|
$self->close_tag('title'), "\n"; |
|
5181
|
|
|
|
|
|
|
|
|
5182
|
13
|
50
|
|
|
|
56
|
if ($self->{append_head}) |
|
5183
|
|
|
|
|
|
|
{ |
|
5184
|
0
|
0
|
|
|
|
0
|
open(APPEND, $self->{append_head}) |
|
5185
|
|
|
|
|
|
|
|| die "Failed to open ", $self->{append_head}, "\n"; |
|
5186
|
0
|
|
|
|
|
0
|
while () |
|
5187
|
|
|
|
|
|
|
{ |
|
5188
|
0
|
|
|
|
|
0
|
print $outhandle $_; |
|
5189
|
|
|
|
|
|
|
} |
|
5190
|
0
|
|
|
|
|
0
|
close(APPEND); |
|
5191
|
|
|
|
|
|
|
} |
|
5192
|
|
|
|
|
|
|
|
|
5193
|
13
|
100
|
|
|
|
40
|
if ($self->{lower_case_tags}) |
|
5194
|
|
|
|
|
|
|
{ |
|
5195
|
12
|
|
|
|
|
75
|
print $outhandle $self->get_tag( |
|
5196
|
|
|
|
|
|
|
'meta', |
|
5197
|
|
|
|
|
|
|
tag_type => TAG_EMPTY, |
|
5198
|
|
|
|
|
|
|
inside_tag => " name=\"generator\" content=\"$PROG v$HTML::TextToHTML::VERSION\"" |
|
5199
|
|
|
|
|
|
|
), |
|
5200
|
|
|
|
|
|
|
"\n"; |
|
5201
|
|
|
|
|
|
|
} |
|
5202
|
|
|
|
|
|
|
else |
|
5203
|
|
|
|
|
|
|
{ |
|
5204
|
1
|
|
|
|
|
5
|
print $outhandle $self->get_tag( |
|
5205
|
|
|
|
|
|
|
'meta', |
|
5206
|
|
|
|
|
|
|
tag_type => TAG_EMPTY, |
|
5207
|
|
|
|
|
|
|
inside_tag => " NAME=\"generator\" CONTENT=\"$PROG v$HTML::TextToHTML::VERSION\"" |
|
5208
|
|
|
|
|
|
|
), |
|
5209
|
|
|
|
|
|
|
"\n"; |
|
5210
|
|
|
|
|
|
|
} |
|
5211
|
13
|
50
|
|
|
|
58
|
if ($self->{style_url}) |
|
5212
|
|
|
|
|
|
|
{ |
|
5213
|
0
|
|
|
|
|
0
|
my $style_url = $self->{style_url}; |
|
5214
|
0
|
0
|
|
|
|
0
|
if ($self->{lower_case_tags}) |
|
5215
|
|
|
|
|
|
|
{ |
|
5216
|
0
|
|
|
|
|
0
|
print $outhandle $self->get_tag( |
|
5217
|
|
|
|
|
|
|
'link', |
|
5218
|
|
|
|
|
|
|
tag_type => TAG_EMPTY, |
|
5219
|
|
|
|
|
|
|
inside_tag => |
|
5220
|
|
|
|
|
|
|
" rel=\"stylesheet\" type=\"text/css\" href=\"$style_url\"" |
|
5221
|
|
|
|
|
|
|
), |
|
5222
|
|
|
|
|
|
|
"\n"; |
|
5223
|
|
|
|
|
|
|
} |
|
5224
|
|
|
|
|
|
|
else |
|
5225
|
|
|
|
|
|
|
{ |
|
5226
|
0
|
|
|
|
|
0
|
print $outhandle $self->get_tag( |
|
5227
|
|
|
|
|
|
|
'link', |
|
5228
|
|
|
|
|
|
|
tag_type => TAG_EMPTY, |
|
5229
|
|
|
|
|
|
|
inside_tag => |
|
5230
|
|
|
|
|
|
|
" REL=\"stylesheet\" TYPE=\"text/css\" HREF=\"$style_url\"" |
|
5231
|
|
|
|
|
|
|
), |
|
5232
|
|
|
|
|
|
|
"\n"; |
|
5233
|
|
|
|
|
|
|
} |
|
5234
|
|
|
|
|
|
|
} |
|
5235
|
13
|
|
|
|
|
45
|
print $outhandle $self->close_tag('head'), "\n"; |
|
5236
|
13
|
50
|
|
|
|
55
|
if ($self->{body_deco}) |
|
5237
|
|
|
|
|
|
|
{ |
|
5238
|
0
|
|
|
|
|
0
|
print $outhandle $self->get_tag('body', |
|
5239
|
|
|
|
|
|
|
inside_tag => $self->{body_deco}), "\n"; |
|
5240
|
|
|
|
|
|
|
} |
|
5241
|
|
|
|
|
|
|
else |
|
5242
|
|
|
|
|
|
|
{ |
|
5243
|
13
|
|
|
|
|
41
|
print $outhandle $self->get_tag('body'), "\n"; |
|
5244
|
|
|
|
|
|
|
} |
|
5245
|
|
|
|
|
|
|
} |
|
5246
|
|
|
|
|
|
|
|
|
5247
|
34
|
50
|
|
|
|
167
|
if ($self->{prepend_file}) |
|
5248
|
|
|
|
|
|
|
{ |
|
5249
|
0
|
0
|
|
|
|
0
|
if (-r $self->{prepend_file}) |
|
5250
|
|
|
|
|
|
|
{ |
|
5251
|
0
|
|
|
|
|
0
|
open(PREPEND, $self->{prepend_file}); |
|
5252
|
0
|
|
|
|
|
0
|
while () |
|
5253
|
|
|
|
|
|
|
{ |
|
5254
|
0
|
|
|
|
|
0
|
print $outhandle $_; |
|
5255
|
|
|
|
|
|
|
} |
|
5256
|
0
|
|
|
|
|
0
|
close(PREPEND); |
|
5257
|
|
|
|
|
|
|
} |
|
5258
|
|
|
|
|
|
|
else |
|
5259
|
|
|
|
|
|
|
{ |
|
5260
|
0
|
|
|
|
|
0
|
print STDERR "Can't find or read file ", $self->{prepend_file}, |
|
5261
|
|
|
|
|
|
|
" to prepend.\n"; |
|
5262
|
|
|
|
|
|
|
} |
|
5263
|
|
|
|
|
|
|
} |
|
5264
|
|
|
|
|
|
|
} # do_file_start |
|
5265
|
|
|
|
|
|
|
|
|
5266
|
|
|
|
|
|
|
=head2 do_init_call |
|
5267
|
|
|
|
|
|
|
|
|
5268
|
|
|
|
|
|
|
$self->do_init_call(); |
|
5269
|
|
|
|
|
|
|
|
|
5270
|
|
|
|
|
|
|
Certain things, like reading link dictionaries, need to be done only |
|
5271
|
|
|
|
|
|
|
once. |
|
5272
|
|
|
|
|
|
|
|
|
5273
|
|
|
|
|
|
|
=cut |
|
5274
|
|
|
|
|
|
|
sub do_init_call ($) |
|
5275
|
|
|
|
|
|
|
{ |
|
5276
|
478
|
|
|
478
|
1
|
777
|
my $self = shift; |
|
5277
|
|
|
|
|
|
|
|
|
5278
|
478
|
100
|
|
|
|
1768
|
if (!$self->{__call_init_done}) |
|
5279
|
|
|
|
|
|
|
{ |
|
5280
|
26
|
50
|
33
|
|
|
403
|
push(@{$self->{links_dictionaries}}, ($self->{default_link_dict})) |
|
|
0
|
|
|
|
|
0
|
|
|
5281
|
|
|
|
|
|
|
if ($self->{make_links} && (-f $self->{default_link_dict})); |
|
5282
|
26
|
50
|
|
|
|
81
|
if ($self->{links_dictionaries}) |
|
5283
|
|
|
|
|
|
|
{ |
|
5284
|
|
|
|
|
|
|
# only put into the links dictionaries files which are readable |
|
5285
|
26
|
|
|
|
|
47
|
my @dict_files = @{$self->{links_dictionaries}}; |
|
|
26
|
|
|
|
|
76
|
|
|
5286
|
26
|
|
|
|
|
101
|
$self->args(links_dictionaries => []); |
|
5287
|
|
|
|
|
|
|
|
|
5288
|
26
|
|
|
|
|
86
|
foreach my $ld (@dict_files) |
|
5289
|
|
|
|
|
|
|
{ |
|
5290
|
0
|
0
|
|
|
|
0
|
if (-r $ld) |
|
5291
|
|
|
|
|
|
|
{ |
|
5292
|
0
|
|
|
|
|
0
|
$self->{'make_links'} = 1; |
|
5293
|
0
|
|
|
|
|
0
|
$self->args(['--links_dictionaries', $ld]); |
|
5294
|
|
|
|
|
|
|
} |
|
5295
|
|
|
|
|
|
|
else |
|
5296
|
|
|
|
|
|
|
{ |
|
5297
|
0
|
|
|
|
|
0
|
print STDERR "Can't find or read link-file $ld\n"; |
|
5298
|
|
|
|
|
|
|
} |
|
5299
|
|
|
|
|
|
|
} |
|
5300
|
|
|
|
|
|
|
} |
|
5301
|
26
|
50
|
|
|
|
82
|
if ($self->{make_links}) |
|
5302
|
|
|
|
|
|
|
{ |
|
5303
|
26
|
|
|
|
|
105
|
$self->load_dictionary_links(); |
|
5304
|
|
|
|
|
|
|
} |
|
5305
|
|
|
|
|
|
|
|
|
5306
|
|
|
|
|
|
|
# various initializations |
|
5307
|
26
|
|
|
|
|
100
|
$self->{__non_header_anchor} = 0; |
|
5308
|
26
|
|
|
|
|
96
|
$self->{__mode} = 0; |
|
5309
|
26
|
|
|
|
|
75
|
$self->{__listnum} = 0; |
|
5310
|
26
|
|
|
|
|
75
|
$self->{__list_nice_indent} = ''; |
|
5311
|
26
|
|
|
|
|
93
|
$self->{__list_indent} = []; |
|
5312
|
26
|
|
|
|
|
94
|
$self->{__tags} = []; |
|
5313
|
|
|
|
|
|
|
|
|
5314
|
26
|
|
|
|
|
91
|
$self->{__call_init_done} = 1; |
|
5315
|
|
|
|
|
|
|
} |
|
5316
|
|
|
|
|
|
|
} # do_init_call |
|
5317
|
|
|
|
|
|
|
|
|
5318
|
|
|
|
|
|
|
=head1 FILE FORMATS |
|
5319
|
|
|
|
|
|
|
|
|
5320
|
|
|
|
|
|
|
There are two files which are used which can affect the outcome of the |
|
5321
|
|
|
|
|
|
|
conversion. One is the link dictionary, which contains patterns (of how |
|
5322
|
|
|
|
|
|
|
to recognise http links and other things) and how to convert them. The |
|
5323
|
|
|
|
|
|
|
other is, naturally, the format of the input file itself. |
|
5324
|
|
|
|
|
|
|
|
|
5325
|
|
|
|
|
|
|
=head2 Link Dictionary |
|
5326
|
|
|
|
|
|
|
|
|
5327
|
|
|
|
|
|
|
A link dictionary file contains patterns to match, and what to convert |
|
5328
|
|
|
|
|
|
|
them to. It is called a "link" dictionary because it was intended to be |
|
5329
|
|
|
|
|
|
|
something which defined what a href link was, but it can be used for |
|
5330
|
|
|
|
|
|
|
more than that. However, if you wish to define your own links, it is |
|
5331
|
|
|
|
|
|
|
strongly advised to read up on regular expressions (regexes) because |
|
5332
|
|
|
|
|
|
|
this relies heavily on them. |
|
5333
|
|
|
|
|
|
|
|
|
5334
|
|
|
|
|
|
|
The file consists of comments (which are lines starting with #) |
|
5335
|
|
|
|
|
|
|
and blank lines, and link entries. |
|
5336
|
|
|
|
|
|
|
Each entry consists of a regular expression, a -> separator (with |
|
5337
|
|
|
|
|
|
|
optional flags), and a link "result". |
|
5338
|
|
|
|
|
|
|
|
|
5339
|
|
|
|
|
|
|
In the simplest case, with no flags, the regular expression |
|
5340
|
|
|
|
|
|
|
defines the pattern to look for, and the result says what part |
|
5341
|
|
|
|
|
|
|
of the regular expression is the actual link, and the link which |
|
5342
|
|
|
|
|
|
|
is generated has the href as the link, and the whole matched pattern |
|
5343
|
|
|
|
|
|
|
as the visible part of the link. The first character of the regular |
|
5344
|
|
|
|
|
|
|
expression is taken to be the separator for the regex, so one |
|
5345
|
|
|
|
|
|
|
could either use the traditional / separator, or something else |
|
5346
|
|
|
|
|
|
|
such as | (which can be helpful with URLs which are full of / characters). |
|
5347
|
|
|
|
|
|
|
|
|
5348
|
|
|
|
|
|
|
So, for example, an ftp URL might be defined as: |
|
5349
|
|
|
|
|
|
|
|
|
5350
|
|
|
|
|
|
|
|ftp:[\w/\.:+\-]+| -> $& |
|
5351
|
|
|
|
|
|
|
|
|
5352
|
|
|
|
|
|
|
This takes the whole pattern as the href, and the resultant link |
|
5353
|
|
|
|
|
|
|
has the same thing in the href as in the contents of the anchor. |
|
5354
|
|
|
|
|
|
|
|
|
5355
|
|
|
|
|
|
|
But sometimes the href isn't the whole pattern. |
|
5356
|
|
|
|
|
|
|
|
|
5357
|
|
|
|
|
|
|
/<URL:\s*(\S+?)\s*>/ --> $1 |
|
5358
|
|
|
|
|
|
|
|
|
5359
|
|
|
|
|
|
|
With the above regex, a () grouping marks the first subexpression, |
|
5360
|
|
|
|
|
|
|
which is represented as $1 (rather than $& the whole expression). |
|
5361
|
|
|
|
|
|
|
This entry matches a URL which was marked explicity as a URL |
|
5362
|
|
|
|
|
|
|
with the pattern (note the < is shown as the |
|
5363
|
|
|
|
|
|
|
entity, not the actual character. This is because by the |
|
5364
|
|
|
|
|
|
|
time the links dictionary is checked, all such things have |
|
5365
|
|
|
|
|
|
|
already been converted to their HTML entity forms, unless, of course, |
|
5366
|
|
|
|
|
|
|
the escape_HTML_chars option was turned off) |
|
5367
|
|
|
|
|
|
|
This would give us a link in the form |
|
5368
|
|
|
|
|
|
|
<URL:foo> |
|
5369
|
|
|
|
|
|
|
|
|
5370
|
|
|
|
|
|
|
B |
|
5371
|
|
|
|
|
|
|
|
|
5372
|
|
|
|
|
|
|
However, if we want more control over the way the link is constructed, |
|
5373
|
|
|
|
|
|
|
we can construct it ourself. If one gives the h flag, then the |
|
5374
|
|
|
|
|
|
|
"result" part of the entry is taken not to contain the href part of |
|
5375
|
|
|
|
|
|
|
the link, but the whole link. |
|
5376
|
|
|
|
|
|
|
|
|
5377
|
|
|
|
|
|
|
For example, the entry: |
|
5378
|
|
|
|
|
|
|
|
|
5379
|
|
|
|
|
|
|
/<URL:\s*(\S+?)\s*>/ -h-> $1 |
|
5380
|
|
|
|
|
|
|
|
|
5381
|
|
|
|
|
|
|
will take and give us foo |
|
5382
|
|
|
|
|
|
|
|
|
5383
|
|
|
|
|
|
|
However, this is a very powerful mechanism, because it |
|
5384
|
|
|
|
|
|
|
can be used to construct custom tags which aren't links at all. |
|
5385
|
|
|
|
|
|
|
For example, to flag *italicised words* the following |
|
5386
|
|
|
|
|
|
|
entry will surround the words with EM tags. |
|
5387
|
|
|
|
|
|
|
|
|
5388
|
|
|
|
|
|
|
/\B\*([a-z][a-z -]*[a-z])\*\B/ -hi-> $1 |
|
5389
|
|
|
|
|
|
|
|
|
5390
|
|
|
|
|
|
|
B |
|
5391
|
|
|
|
|
|
|
|
|
5392
|
|
|
|
|
|
|
This turns on ignore case in the pattern matching. |
|
5393
|
|
|
|
|
|
|
|
|
5394
|
|
|
|
|
|
|
B |
|
5395
|
|
|
|
|
|
|
|
|
5396
|
|
|
|
|
|
|
This turns on execute in the pattern substitution. This really |
|
5397
|
|
|
|
|
|
|
only makes sense if h is turned on too. In that case, the "result" |
|
5398
|
|
|
|
|
|
|
part of the entry is taken as perl code to be executed, and the |
|
5399
|
|
|
|
|
|
|
result of that code is what replaces the pattern. |
|
5400
|
|
|
|
|
|
|
|
|
5401
|
|
|
|
|
|
|
B |
|
5402
|
|
|
|
|
|
|
|
|
5403
|
|
|
|
|
|
|
This marks the entry as a once-only link. This will convert the |
|
5404
|
|
|
|
|
|
|
first instance of a matching pattern, and ignore any others |
|
5405
|
|
|
|
|
|
|
further on. |
|
5406
|
|
|
|
|
|
|
|
|
5407
|
|
|
|
|
|
|
For example, the following pattern will take the first mention |
|
5408
|
|
|
|
|
|
|
of HTML::TextToHTML and convert it to a link to the module's home page. |
|
5409
|
|
|
|
|
|
|
|
|
5410
|
|
|
|
|
|
|
"HTML::TextToHTML" -io-> http://www.katspace.com/tools/text_to_html/ |
|
5411
|
|
|
|
|
|
|
|
|
5412
|
|
|
|
|
|
|
=head2 Input File Format |
|
5413
|
|
|
|
|
|
|
|
|
5414
|
|
|
|
|
|
|
For the most part, this module tries to use intuitive conventions for |
|
5415
|
|
|
|
|
|
|
determining the structure of the text input. Unordered lists are |
|
5416
|
|
|
|
|
|
|
marked by bullets; ordered lists are marked by numbers or letters; |
|
5417
|
|
|
|
|
|
|
in either case, an increase in indentation marks a sub-list contained |
|
5418
|
|
|
|
|
|
|
in the outer list. |
|
5419
|
|
|
|
|
|
|
|
|
5420
|
|
|
|
|
|
|
Headers (apart from custom headers) are distinguished by "underlines" |
|
5421
|
|
|
|
|
|
|
underneath them; headers in all-capitals are distinguished from |
|
5422
|
|
|
|
|
|
|
those in mixed case. All headers, both normal and custom headers, |
|
5423
|
|
|
|
|
|
|
are expected to start at the first line in a "paragraph". |
|
5424
|
|
|
|
|
|
|
|
|
5425
|
|
|
|
|
|
|
In other words, the following is a header: |
|
5426
|
|
|
|
|
|
|
|
|
5427
|
|
|
|
|
|
|
I am Head Man |
|
5428
|
|
|
|
|
|
|
------------- |
|
5429
|
|
|
|
|
|
|
|
|
5430
|
|
|
|
|
|
|
But the following does not have a header: |
|
5431
|
|
|
|
|
|
|
|
|
5432
|
|
|
|
|
|
|
I am not a head Man, man |
|
5433
|
|
|
|
|
|
|
I am Head Man |
|
5434
|
|
|
|
|
|
|
------------- |
|
5435
|
|
|
|
|
|
|
|
|
5436
|
|
|
|
|
|
|
Tables require a more rigid convention. A table must be marked as a |
|
5437
|
|
|
|
|
|
|
separate paragraph, that is, it must be surrounded by blank lines. |
|
5438
|
|
|
|
|
|
|
Tables come in different types. For a table to be parsed, its |
|
5439
|
|
|
|
|
|
|
--table_type option must be on, and the --make_tables option must be true. |
|
5440
|
|
|
|
|
|
|
|
|
5441
|
|
|
|
|
|
|
B |
|
5442
|
|
|
|
|
|
|
|
|
5443
|
|
|
|
|
|
|
Columns must be separated by two or more spaces (this prevents |
|
5444
|
|
|
|
|
|
|
accidental incorrect recognition of a paragraph where interword spaces |
|
5445
|
|
|
|
|
|
|
happen to line up). If there are two or more rows in a paragraph and |
|
5446
|
|
|
|
|
|
|
all rows share the same set of (two or more) columns, the paragraph is |
|
5447
|
|
|
|
|
|
|
assumed to be a table. For example |
|
5448
|
|
|
|
|
|
|
|
|
5449
|
|
|
|
|
|
|
-e File exists. |
|
5450
|
|
|
|
|
|
|
-z File has zero size. |
|
5451
|
|
|
|
|
|
|
-s File has nonzero size (returns size). |
|
5452
|
|
|
|
|
|
|
|
|
5453
|
|
|
|
|
|
|
becomes |
|
5454
|
|
|
|
|
|
|
|
|
5455
|
|
|
|
|
|
|
|
5456
|
|
|
|
|
|
|
| | -e | File exists. |
|
5457
|
|
|
|
|
|
|
| | -z | File has zero size. |
|
5458
|
|
|
|
|
|
|
| | -s | File has nonzero size (returns size). |
|
5459
|
|
|
|
|
|
|
| |
|
5460
|
|
|
|
|
|
|
|
|
5461
|
|
|
|
|
|
|
This guesses for each column whether it is intended to be left, |
|
5462
|
|
|
|
|
|
|
centre or right aligned. |
|
5463
|
|
|
|
|
|
|
|
|
5464
|
|
|
|
|
|
|
B |
|
5465
|
|
|
|
|
|
|
|
|
5466
|
|
|
|
|
|
|
This table type has nice borders around it, and will be rendered |
|
5467
|
|
|
|
|
|
|
with a border, like so: |
|
5468
|
|
|
|
|
|
|
|
|
5469
|
|
|
|
|
|
|
+---------+---------+ |
|
5470
|
|
|
|
|
|
|
| Column1 | Column2 | |
|
5471
|
|
|
|
|
|
|
+---------+---------+ |
|
5472
|
|
|
|
|
|
|
| val1 | val2 | |
|
5473
|
|
|
|
|
|
|
| val3 | val3 | |
|
5474
|
|
|
|
|
|
|
+---------+---------+ |
|
5475
|
|
|
|
|
|
|
|
|
5476
|
|
|
|
|
|
|
The above becomes |
|
5477
|
|
|
|
|
|
|
|
|
5478
|
|
|
|
|
|
|
|
|
5485
|
|
|
|
|
|
|
|
|
5486
|
|
|
|
|
|
|
It can also have an optional caption at the start. |
|
5487
|
|
|
|
|
|
|
|
|
5488
|
|
|
|
|
|
|
My Caption |
|
5489
|
|
|
|
|
|
|
+---------+---------+ |
|
5490
|
|
|
|
|
|
|
| Column1 | Column2 | |
|
5491
|
|
|
|
|
|
|
+---------+---------+ |
|
5492
|
|
|
|
|
|
|
| val1 | val2 | |
|
5493
|
|
|
|
|
|
|
| val3 | val3 | |
|
5494
|
|
|
|
|
|
|
+---------+---------+ |
|
5495
|
|
|
|
|
|
|
|
|
5496
|
|
|
|
|
|
|
B |
|
5497
|
|
|
|
|
|
|
|
|
5498
|
|
|
|
|
|
|
This format of table is what one gets from the output of a Postgresql |
|
5499
|
|
|
|
|
|
|
query. |
|
5500
|
|
|
|
|
|
|
|
|
5501
|
|
|
|
|
|
|
Column1 | Column2 |
|
5502
|
|
|
|
|
|
|
---------+--------- |
|
5503
|
|
|
|
|
|
|
val1 | val2 |
|
5504
|
|
|
|
|
|
|
val3 | val3 |
|
5505
|
|
|
|
|
|
|
(2 rows) |
|
5506
|
|
|
|
|
|
|
|
|
5507
|
|
|
|
|
|
|
This can also have an optional caption at the start. |
|
5508
|
|
|
|
|
|
|
This table is also rendered with a border and table-headers like |
|
5509
|
|
|
|
|
|
|
the BORDER type. |
|
5510
|
|
|
|
|
|
|
|
|
5511
|
|
|
|
|
|
|
B |
|
5512
|
|
|
|
|
|
|
|
|
5513
|
|
|
|
|
|
|
This table type is delimited by non-alphanumeric characters, and has to |
|
5514
|
|
|
|
|
|
|
have at least two rows and two columns before it's recognised as a table. |
|
5515
|
|
|
|
|
|
|
|
|
5516
|
|
|
|
|
|
|
This one is delimited by the '| character: |
|
5517
|
|
|
|
|
|
|
|
|
5518
|
|
|
|
|
|
|
| val1 | val2 | |
|
5519
|
|
|
|
|
|
|
| val3 | val3 | |
|
5520
|
|
|
|
|
|
|
|
|
5521
|
|
|
|
|
|
|
But one can use almost any suitable character such as : # $ % + and so on. |
|
5522
|
|
|
|
|
|
|
This is clever enough to figure out what you are using as the delimiter |
|
5523
|
|
|
|
|
|
|
if you have your data set up like a table. Note that the line has to |
|
5524
|
|
|
|
|
|
|
both begin and end with the delimiter, as well as using it to separate |
|
5525
|
|
|
|
|
|
|
values. |
|
5526
|
|
|
|
|
|
|
|
|
5527
|
|
|
|
|
|
|
This can also have an optional caption at the start. |
|
5528
|
|
|
|
|
|
|
|
|
5529
|
|
|
|
|
|
|
=head1 EXAMPLES |
|
5530
|
|
|
|
|
|
|
|
|
5531
|
|
|
|
|
|
|
use HTML::TextToHTML; |
|
5532
|
|
|
|
|
|
|
|
|
5533
|
|
|
|
|
|
|
=head2 Create a new object |
|
5534
|
|
|
|
|
|
|
|
|
5535
|
|
|
|
|
|
|
my $conv = new HTML::TextToHTML(); |
|
5536
|
|
|
|
|
|
|
|
|
5537
|
|
|
|
|
|
|
my $conv = new HTML::TextToHTML(title=>"Wonderful Things", |
|
5538
|
|
|
|
|
|
|
default_link_dict=>$my_link_file, |
|
5539
|
|
|
|
|
|
|
); |
|
5540
|
|
|
|
|
|
|
|
|
5541
|
|
|
|
|
|
|
=head2 Add further arguments |
|
5542
|
|
|
|
|
|
|
|
|
5543
|
|
|
|
|
|
|
$conv->args(short_line_length=>60, |
|
5544
|
|
|
|
|
|
|
preformat_trigger_lines=>4, |
|
5545
|
|
|
|
|
|
|
caps_tag=>"strong", |
|
5546
|
|
|
|
|
|
|
); |
|
5547
|
|
|
|
|
|
|
|
|
5548
|
|
|
|
|
|
|
=head2 Convert a file |
|
5549
|
|
|
|
|
|
|
|
|
5550
|
|
|
|
|
|
|
$conv->txt2html(infile=>[$text_file], |
|
5551
|
|
|
|
|
|
|
outfile=>$html_file, |
|
5552
|
|
|
|
|
|
|
title=>"Wonderful Things", |
|
5553
|
|
|
|
|
|
|
mail=>1 |
|
5554
|
|
|
|
|
|
|
); |
|
5555
|
|
|
|
|
|
|
|
|
5556
|
|
|
|
|
|
|
=head2 Make a pipleline |
|
5557
|
|
|
|
|
|
|
|
|
5558
|
|
|
|
|
|
|
open(IN, "ls |") or die "could not open!"; |
|
5559
|
|
|
|
|
|
|
$conv->txt2html(inhandle=>[\*IN], |
|
5560
|
|
|
|
|
|
|
outfile=>'-', |
|
5561
|
|
|
|
|
|
|
); |
|
5562
|
|
|
|
|
|
|
|
|
5563
|
|
|
|
|
|
|
=head1 NOTES |
|
5564
|
|
|
|
|
|
|
|
|
5565
|
|
|
|
|
|
|
=over |
|
5566
|
|
|
|
|
|
|
|
|
5567
|
|
|
|
|
|
|
=item * |
|
5568
|
|
|
|
|
|
|
|
|
5569
|
|
|
|
|
|
|
If the underline used to mark a header is off by more than 1, then |
|
5570
|
|
|
|
|
|
|
that part of the text will not be picked up as a header unless you |
|
5571
|
|
|
|
|
|
|
change the value of --underline_length_tolerance and/or |
|
5572
|
|
|
|
|
|
|
--underline_offset_tolerance. People tend to forget this. |
|
5573
|
|
|
|
|
|
|
|
|
5574
|
|
|
|
|
|
|
=back |
|
5575
|
|
|
|
|
|
|
|
|
5576
|
|
|
|
|
|
|
=head1 REQUIRES |
|
5577
|
|
|
|
|
|
|
|
|
5578
|
|
|
|
|
|
|
HTML::TextToHTML requires Perl 5.8.1 or later. |
|
5579
|
|
|
|
|
|
|
|
|
5580
|
|
|
|
|
|
|
For installation, it needs: |
|
5581
|
|
|
|
|
|
|
|
|
5582
|
|
|
|
|
|
|
Module::Build |
|
5583
|
|
|
|
|
|
|
|
|
5584
|
|
|
|
|
|
|
The txt2html script needs: |
|
5585
|
|
|
|
|
|
|
|
|
5586
|
|
|
|
|
|
|
Getopt::Long |
|
5587
|
|
|
|
|
|
|
Getopt::ArgvFile |
|
5588
|
|
|
|
|
|
|
Pod::Usage |
|
5589
|
|
|
|
|
|
|
File::Basename |
|
5590
|
|
|
|
|
|
|
|
|
5591
|
|
|
|
|
|
|
For testing, it also needs: |
|
5592
|
|
|
|
|
|
|
|
|
5593
|
|
|
|
|
|
|
Test::More |
|
5594
|
|
|
|
|
|
|
|
|
5595
|
|
|
|
|
|
|
For debugging, it also needs: |
|
5596
|
|
|
|
|
|
|
|
|
5597
|
|
|
|
|
|
|
YAML::Syck |
|
5598
|
|
|
|
|
|
|
|
|
5599
|
|
|
|
|
|
|
=head1 INSTALLATION |
|
5600
|
|
|
|
|
|
|
|
|
5601
|
|
|
|
|
|
|
Make sure you have the dependencies installed first! |
|
5602
|
|
|
|
|
|
|
(see REQUIRES above) |
|
5603
|
|
|
|
|
|
|
|
|
5604
|
|
|
|
|
|
|
Some of those modules come standard with more recent versions of perl, |
|
5605
|
|
|
|
|
|
|
but I thought I'd mention them anyway, just in case you may not have |
|
5606
|
|
|
|
|
|
|
them. |
|
5607
|
|
|
|
|
|
|
|
|
5608
|
|
|
|
|
|
|
If you don't know how to install these, try using the CPAN module, an |
|
5609
|
|
|
|
|
|
|
easy way of auto-installing modules from the Comprehensive Perl Archive |
|
5610
|
|
|
|
|
|
|
Network, where the above modules reside. |
|
5611
|
|
|
|
|
|
|
Do "perldoc perlmodinstall" or "perldoc CPAN" for more information. |
|
5612
|
|
|
|
|
|
|
|
|
5613
|
|
|
|
|
|
|
To install this module type the following: |
|
5614
|
|
|
|
|
|
|
|
|
5615
|
|
|
|
|
|
|
perl Build.PL |
|
5616
|
|
|
|
|
|
|
./Build |
|
5617
|
|
|
|
|
|
|
./Build test |
|
5618
|
|
|
|
|
|
|
./Build install |
|
5619
|
|
|
|
|
|
|
|
|
5620
|
|
|
|
|
|
|
Or, if you're on a platform (like DOS or Windows) that doesn't like the |
|
5621
|
|
|
|
|
|
|
"./" notation, you can do this: |
|
5622
|
|
|
|
|
|
|
|
|
5623
|
|
|
|
|
|
|
perl Build.PL |
|
5624
|
|
|
|
|
|
|
perl Build |
|
5625
|
|
|
|
|
|
|
perl Build test |
|
5626
|
|
|
|
|
|
|
perl Build install |
|
5627
|
|
|
|
|
|
|
|
|
5628
|
|
|
|
|
|
|
In order to install somewhere other than the default, such as |
|
5629
|
|
|
|
|
|
|
in a directory under your home directory, like "/home/fred/perl" |
|
5630
|
|
|
|
|
|
|
go |
|
5631
|
|
|
|
|
|
|
|
|
5632
|
|
|
|
|
|
|
perl Build.PL --install_base /home/fred/perl |
|
5633
|
|
|
|
|
|
|
|
|
5634
|
|
|
|
|
|
|
as the first step instead. |
|
5635
|
|
|
|
|
|
|
|
|
5636
|
|
|
|
|
|
|
This will install the files underneath /home/fred/perl. |
|
5637
|
|
|
|
|
|
|
|
|
5638
|
|
|
|
|
|
|
You will then need to make sure that you alter the PERL5LIB variable to |
|
5639
|
|
|
|
|
|
|
find the modules, and the PATH variable to find the script. |
|
5640
|
|
|
|
|
|
|
|
|
5641
|
|
|
|
|
|
|
Therefore you will need to change: |
|
5642
|
|
|
|
|
|
|
your path, to include /home/fred/perl/script (where the script will be) |
|
5643
|
|
|
|
|
|
|
|
|
5644
|
|
|
|
|
|
|
PATH=/home/fred/perl/script:${PATH} |
|
5645
|
|
|
|
|
|
|
|
|
5646
|
|
|
|
|
|
|
the PERL5LIB variable to add /home/fred/perl/lib |
|
5647
|
|
|
|
|
|
|
|
|
5648
|
|
|
|
|
|
|
PERL5LIB=/home/fred/perl/lib:${PERL5LIB} |
|
5649
|
|
|
|
|
|
|
|
|
5650
|
|
|
|
|
|
|
Note that the system links dictionary will be installed as |
|
5651
|
|
|
|
|
|
|
"/home/fred/perl/share/txt2html/txt2html.dict" |
|
5652
|
|
|
|
|
|
|
|
|
5653
|
|
|
|
|
|
|
If you want to install in a temporary install directory (such as |
|
5654
|
|
|
|
|
|
|
if you are building a package) then instead of going |
|
5655
|
|
|
|
|
|
|
|
|
5656
|
|
|
|
|
|
|
perl Build install |
|
5657
|
|
|
|
|
|
|
|
|
5658
|
|
|
|
|
|
|
go |
|
5659
|
|
|
|
|
|
|
|
|
5660
|
|
|
|
|
|
|
perl Build install destdir=/my/temp/dir |
|
5661
|
|
|
|
|
|
|
|
|
5662
|
|
|
|
|
|
|
and it will be installed there, with a directory structure under |
|
5663
|
|
|
|
|
|
|
/my/temp/dir the same as it would be if it were installed plain. |
|
5664
|
|
|
|
|
|
|
Note that this is NOT the same as setting --install_base, because |
|
5665
|
|
|
|
|
|
|
certain things are done at build-time which use the install_base info. |
|
5666
|
|
|
|
|
|
|
|
|
5667
|
|
|
|
|
|
|
See "perldoc perlrun" for more information on PERL5LIB, and |
|
5668
|
|
|
|
|
|
|
see "perldoc Module::Build" for more information on |
|
5669
|
|
|
|
|
|
|
installation options. |
|
5670
|
|
|
|
|
|
|
|
|
5671
|
|
|
|
|
|
|
=head1 BUGS |
|
5672
|
|
|
|
|
|
|
|
|
5673
|
|
|
|
|
|
|
Tell me about them. |
|
5674
|
|
|
|
|
|
|
|
|
5675
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
5676
|
|
|
|
|
|
|
|
|
5677
|
|
|
|
|
|
|
perl |
|
5678
|
|
|
|
|
|
|
L. |
|
5679
|
|
|
|
|
|
|
|
|
5680
|
|
|
|
|
|
|
=head1 AUTHOR |
|
5681
|
|
|
|
|
|
|
|
|
5682
|
|
|
|
|
|
|
Kathryn Andersen (RUBYKAT) |
|
5683
|
|
|
|
|
|
|
perlkat AT katspace dot com |
|
5684
|
|
|
|
|
|
|
http//www.katspace.com/ |
|
5685
|
|
|
|
|
|
|
|
|
5686
|
|
|
|
|
|
|
based on txt2html by Seth Golub |
|
5687
|
|
|
|
|
|
|
|
|
5688
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENCE |
|
5689
|
|
|
|
|
|
|
|
|
5690
|
|
|
|
|
|
|
Original txt2html script copyright (c) 1994-2000 Seth Golub |
|
5691
|
|
|
|
|
|
|
|
|
5692
|
|
|
|
|
|
|
Copyright (c) 2002-2005 by Kathryn Andersen |
|
5693
|
|
|
|
|
|
|
|
|
5694
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or |
|
5695
|
|
|
|
|
|
|
modify it under the same terms as Perl itself. |
|
5696
|
|
|
|
|
|
|
|
|
5697
|
|
|
|
|
|
|
=cut |
|
5698
|
|
|
|
|
|
|
|
|
5699
|
|
|
|
|
|
|
#------------------------------------------------------------------------ |
|
5700
|
|
|
|
|
|
|
1; |
|