line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MKDoc::Text::Structured::Inline; |
2
|
21
|
|
|
21
|
|
57281
|
use URI::Find; |
|
21
|
|
|
|
|
311477
|
|
|
21
|
|
|
|
|
1622
|
|
3
|
21
|
|
|
21
|
|
235
|
use warnings; |
|
21
|
|
|
|
|
46
|
|
|
21
|
|
|
|
|
641
|
|
4
|
21
|
|
|
21
|
|
105
|
use strict; |
|
21
|
|
|
|
|
44
|
|
|
21
|
|
|
|
|
36553
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $Text = ''; |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
our $LongestWord = 78; |
9
|
|
|
|
|
|
|
our $NoFollow = 0; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
MKDoc::Text::Structured::Inline - convert text to HTML without handling block-level tags |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 SYNOPSIS |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $text = some_structured_text(); |
18
|
|
|
|
|
|
|
my $this = MKDoc::Text::Structured::Inline::process ($text); |
19
|
|
|
|
|
|
|
my $that = MKDoc::Text::Structured::Inline::process_entities_only ($text); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SUMMARY |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
L is used by L to |
24
|
|
|
|
|
|
|
generate inline HTML elements such as hyperlinks, emphasis and entities. |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
This module is also useful directly when the full block-level rendering of |
27
|
|
|
|
|
|
|
L is unwanted. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 USAGE |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head2 Processing text and adding HTML tags |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
For example, when processing text that is going to end up in an header, |
34
|
|
|
|
|
|
|
you wouldn't want any block level tags generated: |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
$header = "< My (c) symbol should be *bold* > -- and http://example.com/ 'linked'"; |
37
|
|
|
|
|
|
|
$header = MKDoc::Text::Structured::Inline::process ($title); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
$header is now: |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
< My © symbol should be bold > — and http://example.com/ ‘linked’ |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=cut |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub process |
46
|
|
|
|
|
|
|
{ |
47
|
93
|
|
|
93
|
0
|
160
|
local $Text; |
48
|
93
|
|
|
|
|
169
|
$Text = shift; |
49
|
93
|
|
|
|
|
247
|
$Text = " $Text "; |
50
|
93
|
|
|
|
|
206
|
$Text =~ s/\n/ /gsm; |
51
|
|
|
|
|
|
|
|
52
|
93
|
|
|
|
|
223
|
_make_entities(); |
53
|
|
|
|
|
|
|
|
54
|
93
|
|
|
|
|
167
|
$Text =~ s/>/ >/g; |
55
|
|
|
|
|
|
|
# automagically finds hyperlinks |
56
|
|
|
|
|
|
|
my $finder = URI::Find->new ( |
57
|
|
|
|
|
|
|
sub { |
58
|
10
|
|
|
10
|
|
62553
|
my ($uri, $orig_uri) = @_; |
59
|
10
|
|
|
|
|
34
|
$orig_uri =~ s/^mailto://; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# http://googleblog.blogspot.com/2005/01/preventing-comment-spam.html |
62
|
10
|
100
|
|
|
|
52
|
if ($NoFollow) |
63
|
|
|
|
|
|
|
{ |
64
|
3
|
|
|
|
|
14
|
return qq|$orig_uri|; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
else |
67
|
|
|
|
|
|
|
{ |
68
|
7
|
|
|
|
|
31
|
return qq|$orig_uri|; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
} |
71
|
93
|
|
|
|
|
1254
|
); |
72
|
93
|
|
|
|
|
1439
|
$finder->find (\$Text); |
73
|
93
|
|
|
|
|
33405
|
$Text =~ s/ >/>/g; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# abbreviations |
76
|
93
|
|
|
|
|
379
|
while ($Text =~ s/([[:upper:]][[:upper:]]+)\s+(\(.*?\))/_make_abbr_implicit ($1, $2)/e) {}; # implicit |
|
2
|
|
|
|
|
10
|
|
77
|
93
|
|
|
|
|
492
|
while ($Text =~ s/([[:upper:]][[:upper:]]+)(\(.*?\))/_make_abbr_explicit ($1, $2)/e) {}; # explicit |
|
9
|
|
|
|
|
37
|
|
78
|
93
|
|
|
|
|
245
|
_make_simplequotes(); |
79
|
93
|
|
|
|
|
286
|
_make_doublequotes(); |
80
|
93
|
|
|
|
|
272
|
_make_strong(); |
81
|
93
|
|
|
|
|
257
|
_make_em(); |
82
|
93
|
|
|
|
|
263
|
_make_smilies(); |
83
|
93
|
|
|
|
|
319
|
_break_long_words(); |
84
|
|
|
|
|
|
|
|
85
|
93
|
|
|
|
|
386
|
$Text =~ s/^ //; |
86
|
93
|
|
|
|
|
405
|
$Text =~ s/ $//; |
87
|
93
|
|
|
|
|
1138
|
return $Text; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=pod |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head2 Processing text without adding tags |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
Another example, if you were processing text that will end up in an HTML |
95
|
|
|
|
|
|
|
tag, this tag should never contain any other tags, so you should use |
96
|
|
|
|
|
|
|
the MKDoc::Text::Structured::Inline::process_entities_only() method: |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
$title = "< My (c) symbol shouldn't be *bold* > -- or http://example.com/ 'linked'"; |
99
|
|
|
|
|
|
|
$title = MKDoc::Text::Structured::Inline::process_entities_only ($title); |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$title is now: |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
< My © symbol shouldn't be *bold* — > or http://example.com/ ‘linked’ |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=cut |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub process_entities_only |
108
|
|
|
|
|
|
|
{ |
109
|
2
|
|
|
2
|
0
|
955
|
local $Text; |
110
|
2
|
|
|
|
|
2
|
$Text = shift; |
111
|
2
|
|
|
|
|
6
|
$Text = " $Text "; |
112
|
2
|
|
|
|
|
5
|
$Text =~ s/\n/ /gsm; |
113
|
|
|
|
|
|
|
|
114
|
2
|
|
|
|
|
5
|
_make_entities(); |
115
|
2
|
|
|
|
|
6
|
_make_simplequotes(); |
116
|
2
|
|
|
|
|
6
|
_make_doublequotes(); |
117
|
2
|
|
|
|
|
5
|
_break_long_words(); |
118
|
|
|
|
|
|
|
|
119
|
2
|
|
|
|
|
6
|
$Text =~ s/^ //; |
120
|
2
|
|
|
|
|
5
|
$Text =~ s/ $//; |
121
|
2
|
|
|
|
|
6
|
return $Text; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub _make_entities |
126
|
|
|
|
|
|
|
{ |
127
|
95
|
|
|
95
|
|
340
|
$Text =~ s/&/&/g; |
128
|
95
|
|
|
|
|
166
|
$Text =~ s/</g; |
129
|
95
|
|
|
|
|
166
|
$Text =~ s/>/>/g; |
130
|
95
|
|
|
|
|
165
|
$Text =~ s/"/"/g; |
131
|
|
|
|
|
|
|
|
132
|
95
|
|
|
|
|
183
|
$Text =~ s/(?<=(?:\s|\n))--(?=(?:\s|\n))/\—/g; # -- becomes em-dash |
133
|
95
|
|
|
|
|
184
|
$Text =~ s/(?<=(?:\s|\n))-(?=(?:\s|\n))/\–/g; # - becomes en-dash |
134
|
95
|
|
|
|
|
229
|
$Text =~ s/(?
|
135
|
|
|
|
|
|
|
|
136
|
21
|
|
|
21
|
|
40006
|
$Text =~ s/\(tm\)(?=(?:\s|\n|\p{IsPunct}))/\™/gi; # (tm) becomes trademark |
|
21
|
|
|
|
|
241
|
|
|
21
|
|
|
|
|
321
|
|
|
95
|
|
|
|
|
223
|
|
137
|
95
|
|
|
|
|
351
|
$Text =~ s/\(r\)(?=(?:\s|\n|\p{IsPunct}))/\®/gi; # (r) becomes registered |
138
|
95
|
|
|
|
|
210
|
$Text =~ s/\(c\)(?=(?:\s|\n|\p{IsPunct}))/\©/gi; # (c) becomes copyright |
139
|
95
|
|
|
|
|
210
|
$Text =~ s/(?<=(?:\s|\n))(\d+)\s*x\s*(\d+)(?=(?:\s|\n|\p{isPunct}))/$1\×$2/g; # x becomes dimension |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub _make_abbr_implicit |
144
|
|
|
|
|
|
|
{ |
145
|
2
|
|
|
2
|
|
6
|
my $abbr = shift; |
146
|
2
|
|
|
|
|
5
|
my $brack = shift; |
147
|
2
|
|
|
|
|
4
|
my $title = $brack; |
148
|
2
|
|
|
|
|
11
|
$title =~ s/^\s*\(\s*//; |
149
|
2
|
|
|
|
|
12
|
$title =~ s/\s*\)\s*$//; |
150
|
2
|
|
|
|
|
31
|
return qq|$abbr ($title)|; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub _make_abbr_explicit |
155
|
|
|
|
|
|
|
{ |
156
|
9
|
|
|
9
|
|
24
|
my $abbr = shift; |
157
|
9
|
|
|
|
|
19
|
my $brack = shift; |
158
|
9
|
|
|
|
|
18
|
my $title = $brack; |
159
|
9
|
|
|
|
|
47
|
$title =~ s/^\s*\(\s*//; |
160
|
9
|
|
|
|
|
57
|
$title =~ s/\s*\)\s*$//; |
161
|
9
|
|
|
|
|
263
|
return qq|$abbr|; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub _make_simplequotes |
166
|
|
|
|
|
|
|
{ |
167
|
179
|
|
|
|
|
411
|
$Text = join '', map { |
168
|
95
|
|
|
95
|
|
272
|
my $stuff = $_; |
169
|
179
|
|
|
|
|
355
|
$stuff = " $stuff "; |
170
|
179
|
|
|
|
|
647
|
while ($stuff =~ s/ |
171
|
|
|
|
|
|
|
(?<=(?:\s|\n)) # must start with space or carriage return |
172
|
|
|
|
|
|
|
\' # simple quote |
173
|
|
|
|
|
|
|
([^ \t\n\']|[^ \t\n\'].*?[^ \t\n\']) # stuff to capture and smart-quotize |
174
|
|
|
|
|
|
|
\' # simple quote |
175
|
|
|
|
|
|
|
(?=(?:<|\s|\n|\p{IsPunct}(?:\s|\n|<))) # must be followed by space, \n or (punctuation + space or \n) |
176
|
6
|
|
|
|
|
22
|
/_make_simplequotes_wrap ($1)/xes) {} |
177
|
|
|
|
|
|
|
|
178
|
179
|
|
|
|
|
584
|
$stuff =~ s/^ //; |
179
|
179
|
|
|
|
|
774
|
$stuff =~ s/ $//; |
180
|
179
|
|
|
|
|
792
|
$stuff; |
181
|
|
|
|
|
|
|
} _tokenize ($Text); |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
sub _make_simplequotes_wrap |
186
|
|
|
|
|
|
|
{ |
187
|
6
|
|
|
6
|
|
15
|
my $stuff = shift; |
188
|
6
|
|
|
|
|
14
|
local $Text = $stuff; |
189
|
6
|
|
|
|
|
53
|
return "‘$Text’"; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub _make_doublequotes |
195
|
|
|
|
|
|
|
{ |
196
|
179
|
|
|
|
|
256
|
$Text = join '', map { |
197
|
95
|
|
|
95
|
|
213
|
my $stuff = $_; |
198
|
179
|
|
|
|
|
342
|
$stuff = " $stuff "; |
199
|
179
|
|
|
|
|
462
|
$stuff =~ s/"//g; |
200
|
179
|
|
|
|
|
534
|
$stuff =~ s/"/"/g; |
201
|
179
|
|
|
|
|
534
|
while ($stuff =~ s/ |
202
|
|
|
|
|
|
|
(?<=(?:\s|\n)) # must start with space or carriage return |
203
|
|
|
|
|
|
|
\" # double quote |
204
|
|
|
|
|
|
|
([^ \t\n\"]|[^ \t\n\"].*?[^ \t\n\"]) # stuff to capture and smart-quotize |
205
|
|
|
|
|
|
|
\" # double quote |
206
|
|
|
|
|
|
|
(?=(?:<|\s|\n|\p{IsPunct}(?:\s|\n|<))) # must be followed by space, \n or (punctuation + space or \n) |
207
|
6
|
|
|
|
|
20
|
/_make_doublequotes_wrap ($1)/xes) {} |
208
|
|
|
|
|
|
|
|
209
|
179
|
|
|
|
|
554
|
$stuff =~ s/^ //; |
210
|
179
|
|
|
|
|
585
|
$stuff =~ s/ $//; |
211
|
179
|
|
|
|
|
346
|
$stuff =~ s/"/"/g; |
212
|
179
|
|
|
|
|
295
|
$stuff =~ s//"/g; |
213
|
179
|
|
|
|
|
571
|
$stuff; |
214
|
|
|
|
|
|
|
} _tokenize ($Text); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub _make_doublequotes_wrap |
219
|
|
|
|
|
|
|
{ |
220
|
6
|
|
|
6
|
|
60
|
my $stuff = shift; |
221
|
6
|
|
|
|
|
13
|
local $Text = $stuff; |
222
|
6
|
|
|
|
|
54
|
return "“$Text”"; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
sub _make_strong |
227
|
|
|
|
|
|
|
{ |
228
|
181
|
|
|
|
|
251
|
$Text = join '', map { |
229
|
97
|
|
|
97
|
|
218
|
my $stuff = $_; |
230
|
181
|
|
|
|
|
575
|
$stuff = " $stuff "; |
231
|
181
|
|
|
|
|
508
|
while ($stuff =~ s/ |
232
|
|
|
|
|
|
|
(?<=(?:\s|\n)) # must start with space or carriage return |
233
|
|
|
|
|
|
|
\* # star |
234
|
|
|
|
|
|
|
(\S|\S.*?\S) # stuff to capture and emphasize |
235
|
|
|
|
|
|
|
\* # star |
236
|
|
|
|
|
|
|
(?=(?:<|\s|\n|\p{IsPunct}(?:\s|\n|<))) # must be followed by space, \n or (punctuation + space or \n) |
237
|
5
|
|
|
|
|
16
|
/_make_strong_wrap ($1)/xes) {} |
238
|
|
|
|
|
|
|
|
239
|
181
|
|
|
|
|
14605
|
$stuff =~ s/^ //; |
240
|
181
|
|
|
|
|
543
|
$stuff =~ s/ $//; |
241
|
181
|
|
|
|
|
582
|
$stuff; |
242
|
|
|
|
|
|
|
} _tokenize ($Text); |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
sub _make_strong_wrap |
247
|
|
|
|
|
|
|
{ |
248
|
5
|
|
|
5
|
|
12
|
my $stuff = shift; |
249
|
5
|
|
|
|
|
12
|
local $Text = $stuff; |
250
|
5
|
|
|
|
|
26
|
_make_em ($Text); |
251
|
5
|
|
|
|
|
44
|
return "$Text"; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub _make_em |
256
|
|
|
|
|
|
|
{ |
257
|
202
|
|
|
|
|
290
|
$Text = join '', map { |
258
|
98
|
|
|
98
|
|
258
|
my $stuff = $_; |
259
|
202
|
|
|
|
|
369
|
$stuff = " $stuff "; |
260
|
202
|
|
|
|
|
591
|
while ($stuff =~ s/ |
261
|
|
|
|
|
|
|
(?<=(?:\s|\n)) # must start with space or carriage return |
262
|
|
|
|
|
|
|
_ # underscore |
263
|
|
|
|
|
|
|
(\S|\S.*?\S) # stuff to capture and emphasize |
264
|
|
|
|
|
|
|
_ # underscore |
265
|
|
|
|
|
|
|
(?=(?:<|\s|\n|\p{IsPunct}(?:\s|\n))) # must be followed by space, \n or (punctuation + space or \n) |
266
|
4
|
|
|
|
|
12
|
/_make_em_wrap ($1)/xes) {} |
267
|
|
|
|
|
|
|
|
268
|
202
|
|
|
|
|
575
|
$stuff =~ s/^ //; |
269
|
202
|
|
|
|
|
568
|
$stuff =~ s/ $//; |
270
|
202
|
|
|
|
|
633
|
$stuff; |
271
|
|
|
|
|
|
|
} _tokenize ($Text); |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
sub _make_em_wrap |
276
|
|
|
|
|
|
|
{ |
277
|
4
|
|
|
4
|
|
8
|
my $stuff = shift; |
278
|
|
|
|
|
|
|
|
279
|
4
|
|
|
|
|
7
|
local $Text = $stuff; |
280
|
4
|
|
|
|
|
17
|
_make_strong ($Text); |
281
|
4
|
|
|
|
|
26
|
return "$Text"; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub _make_smilies |
286
|
|
|
|
|
|
|
{ |
287
|
213
|
|
|
|
|
287
|
$Text = join '', map { |
288
|
93
|
|
|
93
|
|
208
|
my $stuff = $_; |
289
|
213
|
100
|
|
|
|
631
|
$stuff =~ s/:-\)/:-)<\/span>/g unless ($stuff =~ /^); |
290
|
213
|
100
|
|
|
|
588
|
$stuff =~ s/:-\(/:-(<\/span>/g unless ($stuff =~ /^); |
291
|
|
|
|
|
|
|
# don't do ;-) think about what happens with &-) |
292
|
213
|
|
|
|
|
488
|
$stuff; |
293
|
|
|
|
|
|
|
} _tokenize ($Text); |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
sub _break_long_words |
298
|
|
|
|
|
|
|
{ |
299
|
241
|
|
|
|
|
332
|
$Text = join '', map { |
300
|
97
|
|
|
97
|
|
3678
|
my $stuff = $_; |
301
|
241
|
100
|
|
|
|
4147
|
$stuff = _insert_spaces ($stuff, $LongestWord) unless ($stuff =~ /^); |
302
|
241
|
|
|
|
|
631
|
$stuff; |
303
|
|
|
|
|
|
|
} _tokenize ($Text); |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub _insert_spaces |
308
|
|
|
|
|
|
|
{ |
309
|
172
|
|
|
172
|
|
10365
|
my $text = shift; |
310
|
172
|
|
100
|
|
|
1270
|
my $length = shift || return $text; |
311
|
|
|
|
|
|
|
# we can break continuous non-space text after "/", ";" or "-" |
312
|
169
|
|
|
|
|
1435
|
$text =~ s/(\S{$length}[\/;-])(?=\S)/$1 /g; |
313
|
|
|
|
|
|
|
# we can break continuous non-space text so long as it doesn't contain an ampersand |
314
|
169
|
|
|
|
|
983
|
$text =~ s/([^[:space:]&]{$length})(?=\S)/$1 /g; |
315
|
169
|
|
|
|
|
475
|
return $text; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
sub _tokenize |
320
|
|
|
|
|
|
|
{ |
321
|
575
|
|
|
575
|
|
822
|
my $text = shift; |
322
|
575
|
|
|
|
|
3544
|
my @res = $text =~ /([^<]+)|(<.+?>)/g; |
323
|
575
|
|
|
|
|
1130
|
return grep { defined $_ } @res; |
|
2390
|
|
|
|
|
7510
|
|
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
1; |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
__END__ |