| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Text::MediawikiFormat; |
|
2
|
|
|
|
|
|
|
|
|
3
|
14
|
|
|
14
|
|
208599
|
use strict; |
|
|
14
|
|
|
|
|
29
|
|
|
|
14
|
|
|
|
|
602
|
|
|
4
|
14
|
|
|
14
|
|
65
|
use warnings::register; |
|
|
14
|
|
|
|
|
21
|
|
|
|
14
|
|
|
|
|
2488
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 NAME |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
Text::MediawikiFormat - Translate Wiki markup into other text formats |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 VERSION |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Version 1.04 |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=cut |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '1.04'; |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Text::MediawikiFormat 'wikiformat'; |
|
21
|
|
|
|
|
|
|
my $html = wikiformat ($raw); |
|
22
|
|
|
|
|
|
|
my $text = wikiformat ($raw, {}, {implicit_links => 1}); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
L and its sister projects use the PHP Mediawiki to format |
|
27
|
|
|
|
|
|
|
their pages. This module attempts to duplicate the Mediawiki formatting rules. |
|
28
|
|
|
|
|
|
|
Those formatting rules can be simple and easy to use, while providing more |
|
29
|
|
|
|
|
|
|
advanced options for the power user. They are also easy to translate into |
|
30
|
|
|
|
|
|
|
other, more complicated markup languages with this module. It creates HTML by |
|
31
|
|
|
|
|
|
|
default, but could produce valid POD, DocBook, XML, or any other format |
|
32
|
|
|
|
|
|
|
imaginable. |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
The most important function is C. It is |
|
35
|
|
|
|
|
|
|
not exported by default, but will be exported as C if any |
|
36
|
|
|
|
|
|
|
options at all are passed to the exporter, unless the name is overridden |
|
37
|
|
|
|
|
|
|
explicitly. See L<"EXPORT"> for more information. |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
It should be noted that this module is written as a drop in replacement for |
|
40
|
|
|
|
|
|
|
L that expands on that modules functionality and provides |
|
41
|
|
|
|
|
|
|
a default rule set that may be used to format text like the PHP Mediawiki. It |
|
42
|
|
|
|
|
|
|
is also well to note early that if you just want a Mediawiki clone (you don't |
|
43
|
|
|
|
|
|
|
need to customize it heavily and you want integration with a back end |
|
44
|
|
|
|
|
|
|
database), you should look at L. |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=cut |
|
47
|
|
|
|
|
|
|
|
|
48
|
14
|
|
|
14
|
|
75
|
use Carp qw(carp confess croak); |
|
|
14
|
|
|
|
|
21
|
|
|
|
14
|
|
|
|
|
1016
|
|
|
49
|
14
|
|
|
14
|
|
27225
|
use CGI qw(:standard); |
|
|
14
|
|
|
|
|
196326
|
|
|
|
14
|
|
|
|
|
109
|
|
|
50
|
14
|
|
|
14
|
|
40323
|
use Scalar::Util qw(blessed); |
|
|
14
|
|
|
|
|
28
|
|
|
|
14
|
|
|
|
|
1514
|
|
|
51
|
14
|
|
|
14
|
|
6749
|
use Text::MediawikiFormat::Blocks; |
|
|
14
|
|
|
|
|
29
|
|
|
|
14
|
|
|
|
|
78
|
|
|
52
|
14
|
|
|
14
|
|
8654
|
use URI; |
|
|
14
|
|
|
|
|
56265
|
|
|
|
14
|
|
|
|
|
562
|
|
|
53
|
14
|
|
|
14
|
|
122
|
use URI::Escape qw(uri_escape uri_escape_utf8); |
|
|
14
|
|
|
|
|
19
|
|
|
|
14
|
|
|
|
|
1162
|
|
|
54
|
|
|
|
|
|
|
|
|
55
|
14
|
|
|
|
|
1860
|
use vars qw($missing_html_packages %tags %opts %merge_matrix |
|
56
|
14
|
|
|
14
|
|
70
|
$uric $uricCheat $uriCruft); |
|
|
14
|
|
|
|
|
19
|
|
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
BEGIN { |
|
59
|
|
|
|
|
|
|
# Try to load optional HTML packages, recording any errors. |
|
60
|
14
|
|
|
14
|
|
30
|
eval { require HTML::Parser }; |
|
|
14
|
|
|
|
|
12637
|
|
|
61
|
14
|
|
|
|
|
75984
|
$missing_html_packages = $@; |
|
62
|
14
|
|
|
|
|
35
|
eval { require HTML::Tagset }; |
|
|
14
|
|
|
|
|
8202
|
|
|
63
|
14
|
|
|
|
|
34545
|
$missing_html_packages .= $@; |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
### |
|
67
|
|
|
|
|
|
|
### Defaults |
|
68
|
|
|
|
|
|
|
### |
|
69
|
|
|
|
|
|
|
%tags = ( |
|
70
|
|
|
|
|
|
|
indent => qr/^(?:[:*#;]*)(?=[:*#;])/, |
|
71
|
|
|
|
|
|
|
link => \&_make_html_link, |
|
72
|
|
|
|
|
|
|
strong => sub {"$_[0]"}, |
|
73
|
|
|
|
|
|
|
emphasized => sub {"$_[0]"}, |
|
74
|
|
|
|
|
|
|
strong_tag => qr/'''(.+?)'''/, |
|
75
|
|
|
|
|
|
|
emphasized_tag => qr/''(.+?)''/, |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
code => [ '', " \n", '', "\n" ], |
|
78
|
|
|
|
|
|
|
line => [ '', '', ' ', "\n" ], |
|
79
|
|
|
|
|
|
|
paragraph => [ " ", " \n", '', "\n", 1 ], |
|
80
|
|
|
|
|
|
|
paragraph_break => [ '', '', '', "\n" ], |
|
81
|
|
|
|
|
|
|
unordered => [ "\n", '', "\n" ], |
|
82
|
|
|
|
|
|
|
ordered => [ "\n", " \n", '', "\n" ], |
|
83
|
|
|
|
|
|
|
definition => [ "\n", " \n", \&_dl ], |
|
84
|
|
|
|
|
|
|
header => [ '', "\n", \&_make_header ], |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
blocks => { |
|
87
|
|
|
|
|
|
|
code => qr/^ /, |
|
88
|
|
|
|
|
|
|
header => qr/^(=+)\s*(.+?)\s*\1$/, |
|
89
|
|
|
|
|
|
|
line => qr/^-{4,}$/, |
|
90
|
|
|
|
|
|
|
ordered => qr/^#\s*/, |
|
91
|
|
|
|
|
|
|
unordered => qr/^\*\s*/, |
|
92
|
|
|
|
|
|
|
definition => qr/^([;:])\s*/, |
|
93
|
|
|
|
|
|
|
paragraph => qr/^/, |
|
94
|
|
|
|
|
|
|
paragraph_break => qr/^\s*$/, |
|
95
|
|
|
|
|
|
|
}, |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
indented => { map { $_ => 1 } qw(ordered unordered definition) }, |
|
98
|
|
|
|
|
|
|
nests => { map { $_ => 1 } qw(ordered unordered definition) }, |
|
99
|
|
|
|
|
|
|
nests_anywhere => { map { $_ => 1 } qw(nowiki) }, |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
blockorder => [ |
|
102
|
|
|
|
|
|
|
qw(code header line ordered unordered definition |
|
103
|
|
|
|
|
|
|
paragraph_break paragraph) |
|
104
|
|
|
|
|
|
|
], |
|
105
|
|
|
|
|
|
|
implicit_link_delimiters => qr!\b(?:[A-Z][a-z0-9]\w*){2,}!, |
|
106
|
|
|
|
|
|
|
extended_link_delimiters => qr!\[(?:\[[^][]*\]|[^][]*)\]!, |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
schemas => [qw(http https ftp mailto gopher)], |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
unformatted_blocks => [qw(header nowiki pre)], |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
allowed_tags => [ #HTML |
|
113
|
|
|
|
|
|
|
qw(b big blockquote br caption center cite code dd |
|
114
|
|
|
|
|
|
|
div dl dt em font h1 h2 h3 h4 h5 h6 hr i li ol p |
|
115
|
|
|
|
|
|
|
pre rb rp rt ruby s samp small strike strong sub |
|
116
|
|
|
|
|
|
|
sup table td th tr tt u ul var), |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# Mediawiki Specific |
|
119
|
|
|
|
|
|
|
qw(nowiki), |
|
120
|
|
|
|
|
|
|
], |
|
121
|
|
|
|
|
|
|
allowed_attrs => [ |
|
122
|
|
|
|
|
|
|
qw(title align lang dir width height bgcolor), |
|
123
|
|
|
|
|
|
|
qw(clear), # BR |
|
124
|
|
|
|
|
|
|
qw(noshade), # HR |
|
125
|
|
|
|
|
|
|
qw(cite), # BLOCKQUOTE, Q |
|
126
|
|
|
|
|
|
|
qw(size face color), # FONT |
|
127
|
|
|
|
|
|
|
# For various lists, mostly deprecated but safe |
|
128
|
|
|
|
|
|
|
qw(type start value compact), |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# Tables |
|
131
|
|
|
|
|
|
|
qw(summary width border frame rules cellspacing |
|
132
|
|
|
|
|
|
|
cellpadding valign char charoff colgroup col |
|
133
|
|
|
|
|
|
|
span abbr axis headers scope rowspan colspan), |
|
134
|
|
|
|
|
|
|
qw(id class name style), # For CSS |
|
135
|
|
|
|
|
|
|
], |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
_toc => [], |
|
138
|
|
|
|
|
|
|
); |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
%opts = ( |
|
141
|
|
|
|
|
|
|
extended => 1, |
|
142
|
|
|
|
|
|
|
implicit_links => 0, |
|
143
|
|
|
|
|
|
|
absolute_links => 1, |
|
144
|
|
|
|
|
|
|
prefix => '', |
|
145
|
|
|
|
|
|
|
process_html => 1, |
|
146
|
|
|
|
|
|
|
charset => 'utf-8', |
|
147
|
|
|
|
|
|
|
); |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# Make sure import's argument hash contains an `as' entry. `as' defaults to |
|
150
|
|
|
|
|
|
|
# `wikiformat' when none is given. |
|
151
|
|
|
|
|
|
|
sub _process_args { |
|
152
|
15
|
|
|
15
|
|
23
|
shift; # Class |
|
153
|
15
|
100
|
|
|
|
57
|
return as => shift if @_ == 1; |
|
154
|
14
|
|
|
|
|
92
|
return as => 'wikiformat', @_; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# Delete the options (prefix, extended, implicit_links, ...) from a hash, |
|
158
|
|
|
|
|
|
|
# returning a new hash with the deleted options. |
|
159
|
|
|
|
|
|
|
sub _extract_opts { |
|
160
|
15
|
|
|
15
|
|
29
|
my %newopts; |
|
161
|
|
|
|
|
|
|
|
|
162
|
15
|
|
|
|
|
47
|
for my $key ( |
|
163
|
|
|
|
|
|
|
qw{prefix extended implicit_links absolute_links |
|
164
|
|
|
|
|
|
|
process_html debug} |
|
165
|
|
|
|
|
|
|
) |
|
166
|
|
|
|
|
|
|
{ |
|
167
|
90
|
100
|
|
|
|
253
|
if ( defined( my $val = delete $_[0]->{$key} ) ) { |
|
168
|
19
|
|
|
|
|
53
|
$newopts{$key} = $val; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
15
|
|
|
|
|
69
|
return \%newopts; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# Shamelessly ripped from Hash::Merge, which doesn't work in a threaded |
|
176
|
|
|
|
|
|
|
# environment with two threads trying to use different merge matrices. |
|
177
|
|
|
|
|
|
|
%merge_matrix = ( |
|
178
|
|
|
|
|
|
|
SCALAR => { |
|
179
|
|
|
|
|
|
|
SCALAR => sub { return $_[0] }, |
|
180
|
|
|
|
|
|
|
ARRAY => sub { # Need to be able to replace scalar with array |
|
181
|
|
|
|
|
|
|
# for extended_link_delimiters (could be array |
|
182
|
|
|
|
|
|
|
# or regex). |
|
183
|
|
|
|
|
|
|
return $_[0]; |
|
184
|
|
|
|
|
|
|
}, |
|
185
|
|
|
|
|
|
|
HASH => sub { |
|
186
|
|
|
|
|
|
|
confess "Attempt to replace hash with scalar" |
|
187
|
|
|
|
|
|
|
if defined $_[0]; |
|
188
|
|
|
|
|
|
|
return _clone( $_[1] ); |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
}, |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
ARRAY => { |
|
193
|
|
|
|
|
|
|
SCALAR => sub { # Need to be able to replace array with scalar |
|
194
|
|
|
|
|
|
|
# for extended_link_delimiters (could be array |
|
195
|
|
|
|
|
|
|
# or regex). |
|
196
|
|
|
|
|
|
|
return _clone( $_[0] ); |
|
197
|
|
|
|
|
|
|
}, |
|
198
|
|
|
|
|
|
|
ARRAY => sub { return _clone( $_[0] ); }, |
|
199
|
|
|
|
|
|
|
HASH => sub { confess "Attempt to replace hash with array" } |
|
200
|
|
|
|
|
|
|
}, |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
HASH => { |
|
203
|
|
|
|
|
|
|
SCALAR => sub { confess "Attempt to replace scalar with hash" }, |
|
204
|
|
|
|
|
|
|
ARRAY => sub { confess "Attempt to replace array with hash" }, |
|
205
|
|
|
|
|
|
|
HASH => sub { _merge_hash_elements( $_[0], $_[1] ) } |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
); |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# Return arrays and a deep copy of hashes. |
|
210
|
|
|
|
|
|
|
sub _clone { |
|
211
|
2890
|
|
|
2890
|
|
2487
|
my ($obj) = @_; |
|
212
|
2890
|
|
|
|
|
2030
|
my $type; |
|
213
|
2890
|
50
|
|
|
|
6495
|
if ( !defined $obj ) { # Perl 5.005 compatibility |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
214
|
0
|
|
|
|
|
0
|
$type = 'SCALAR'; |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
elsif ( ref $obj eq 'HASH' ) { |
|
217
|
231
|
|
|
|
|
226
|
$type = 'HASH'; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
elsif ( ref $obj eq 'ARRAY' ) { |
|
220
|
913
|
|
|
|
|
838
|
$type = 'ARRAY'; |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
else { |
|
223
|
1746
|
|
|
|
|
1475
|
$type = 'SCALAR'; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
|
|
226
|
2890
|
100
|
|
|
|
6314
|
return $obj if $type eq 'SCALAR'; |
|
227
|
1144
|
100
|
|
|
|
2596
|
return $obj if $type eq 'ARRAY'; |
|
228
|
|
|
|
|
|
|
|
|
229
|
231
|
|
|
|
|
201
|
my %copy; |
|
230
|
231
|
|
|
|
|
492
|
foreach my $key ( keys %$obj ) { |
|
231
|
800
|
|
|
|
|
1105
|
$copy{$key} = _clone( $obj->{$key} ); |
|
232
|
|
|
|
|
|
|
} |
|
233
|
231
|
|
|
|
|
615
|
return \%copy; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# This does a straight merge of hashes, delegating the merge-specific |
|
237
|
|
|
|
|
|
|
# work to '_merge_hashes'. |
|
238
|
|
|
|
|
|
|
sub _merge_hash_elements { |
|
239
|
169
|
|
|
169
|
|
177
|
my ( $left, $right ) = @_; |
|
240
|
169
|
50
|
33
|
|
|
987
|
die "Arguments for _merge_hash_elements must be hash references" |
|
241
|
|
|
|
|
|
|
unless UNIVERSAL::isa( $left, 'HASH' ) && UNIVERSAL::isa( $right, 'HASH' ); |
|
242
|
|
|
|
|
|
|
|
|
243
|
169
|
|
|
|
|
158
|
my %newhash; |
|
244
|
169
|
|
|
|
|
512
|
foreach my $leftkey ( keys %$left ) { |
|
245
|
243
|
100
|
|
|
|
378
|
if ( exists $right->{$leftkey} ) { |
|
246
|
235
|
|
|
|
|
435
|
$newhash{$leftkey} = _merge_hashes( $left->{$leftkey}, $right->{$leftkey} ); |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
else { |
|
249
|
8
|
|
|
|
|
15
|
$newhash{$leftkey} = _clone( $left->{$leftkey} ); |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
} |
|
252
|
169
|
|
|
|
|
694
|
foreach my $rightkey ( keys %$right ) { |
|
253
|
2266
|
100
|
|
|
|
4650
|
$newhash{$rightkey} = _clone( $right->{$rightkey} ) |
|
254
|
|
|
|
|
|
|
if !exists $left->{$rightkey}; |
|
255
|
|
|
|
|
|
|
} |
|
256
|
169
|
|
|
|
|
508
|
return \%newhash; |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
sub _merge_hashes { |
|
260
|
371
|
|
|
371
|
|
3762
|
my ( $left, $right ) = @_; |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# if one argument or the other is undefined or empty, don't worry about |
|
263
|
|
|
|
|
|
|
# copying, just return the original. |
|
264
|
371
|
50
|
|
|
|
707
|
return $right unless defined $left; |
|
265
|
371
|
50
|
|
|
|
549
|
return $left unless defined $right; |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# For the general use of this function, we want to create duplicates |
|
268
|
|
|
|
|
|
|
# of all data that is merged. |
|
269
|
|
|
|
|
|
|
|
|
270
|
371
|
|
|
|
|
285
|
my ( $lefttype, $righttype ); |
|
271
|
371
|
100
|
|
|
|
829
|
if ( ref $left eq 'HASH' ) { |
|
|
|
100
|
|
|
|
|
|
|
272
|
169
|
|
|
|
|
204
|
$lefttype = 'HASH'; |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
elsif ( ref $left eq 'ARRAY' ) { |
|
275
|
51
|
|
|
|
|
54
|
$lefttype = 'ARRAY'; |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
else { |
|
278
|
151
|
|
|
|
|
157
|
$lefttype = 'SCALAR'; |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
|
|
281
|
371
|
100
|
|
|
|
664
|
if ( ref $right eq 'HASH' ) { |
|
|
|
100
|
|
|
|
|
|
|
282
|
169
|
|
|
|
|
176
|
$righttype = 'HASH'; |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
elsif ( ref $right eq 'ARRAY' ) { |
|
285
|
49
|
|
|
|
|
43
|
$righttype = 'ARRAY'; |
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
else { |
|
288
|
153
|
|
|
|
|
155
|
$righttype = 'SCALAR'; |
|
289
|
|
|
|
|
|
|
} |
|
290
|
|
|
|
|
|
|
|
|
291
|
371
|
|
|
|
|
847
|
return $merge_matrix{$lefttype}->{$righttype}( $left, $right ); |
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub _require_html_packages { |
|
295
|
1
|
50
|
|
1
|
|
4
|
croak "$missing_html_packages\n" . "HTML::Parser & HTML::Tagset is required for process_html\n" |
|
296
|
|
|
|
|
|
|
if $missing_html_packages; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
sub import { |
|
300
|
18
|
100
|
|
18
|
|
4025
|
return unless @_ > 1; |
|
301
|
|
|
|
|
|
|
|
|
302
|
15
|
|
|
|
|
42
|
my $class = shift; |
|
303
|
15
|
|
|
|
|
71
|
my %args = $class->_process_args(@_); |
|
304
|
15
|
|
|
|
|
56
|
my $name = delete $args{as}; |
|
305
|
|
|
|
|
|
|
|
|
306
|
15
|
|
|
|
|
69
|
my $caller = caller(); |
|
307
|
15
|
|
|
|
|
229
|
my $iopts = _merge_hashes _extract_opts( \%args ), \%opts; |
|
308
|
15
|
|
|
|
|
67
|
my $itags = _merge_hashes \%args, \%tags; |
|
309
|
|
|
|
|
|
|
|
|
310
|
15
|
100
|
|
|
|
82
|
_require_html_packages |
|
311
|
|
|
|
|
|
|
if $iopts->{process_html}; |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
# Could verify ITAGS here via _check_blocks, but what if a user |
|
314
|
|
|
|
|
|
|
# wants to add a block to block_order that they intend to override |
|
315
|
|
|
|
|
|
|
# the implementation of with every call to format()? |
|
316
|
|
|
|
|
|
|
|
|
317
|
14
|
|
|
14
|
|
121
|
no strict 'refs'; |
|
|
14
|
|
|
|
|
20
|
|
|
|
14
|
|
|
|
|
50271
|
|
|
318
|
15
|
|
|
|
|
17904
|
*{ $caller . "::" . $name } = sub { |
|
319
|
45
|
|
|
45
|
|
30730
|
Text::MediawikiFormat::_format( $itags, $iopts, @_ ); |
|
320
|
|
|
|
|
|
|
} |
|
321
|
15
|
|
|
|
|
80
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=head1 FUNCTIONS |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=head2 format |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
C takes one required argument, the text to convert, and returns the |
|
328
|
|
|
|
|
|
|
converted text. It allows two optional arguments. The first is a reference to |
|
329
|
|
|
|
|
|
|
a hash of tags used to override the function's default behavior. Anything |
|
330
|
|
|
|
|
|
|
passed in here will override the default tags. The second argument is a hash |
|
331
|
|
|
|
|
|
|
reference of options. The options are currently: |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=over 4 |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=item prefix |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
The prefix of any links to wiki pages. In HTML mode, this is the path to the |
|
338
|
|
|
|
|
|
|
Wiki. The actual linked item itself will be appended to the prefix. This is |
|
339
|
|
|
|
|
|
|
useful to create full URIs: |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
{prefix => 'http://example.com/wiki.pl?page='} |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=item extended |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
A boolean flag, true by default, to let square brackets mark links. |
|
346
|
|
|
|
|
|
|
An optional title may occur after the Wiki targets, preceded by an open pipe. |
|
347
|
|
|
|
|
|
|
URI titles are separated from their title with a space. These are valid |
|
348
|
|
|
|
|
|
|
extended links: |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
[[A wiki page|and the title to display]] |
|
351
|
|
|
|
|
|
|
[http://ximbiot.com URI title] |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Where the linking semantics of the destination format allow it, the result will |
|
354
|
|
|
|
|
|
|
display the title instead of the URI. In HTML terms, the title is the content |
|
355
|
|
|
|
|
|
|
of an C element (not the content of its C attribute). |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
You can use delimiters other than single square brackets for marking extended |
|
358
|
|
|
|
|
|
|
links by passing a value for C in the C<%tags> hash |
|
359
|
|
|
|
|
|
|
when calling C. |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
Note that if you disable this flag, you should probably enable |
|
362
|
|
|
|
|
|
|
C or there will be no automated way to link to other pages in |
|
363
|
|
|
|
|
|
|
your wiki. |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=item implicit_links |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
A boolean flag, false by default, to create links from StudlyCapsStrings. |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=item absolute_links |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
A boolean flag, true by default, which treats any links that are absolute URIs |
|
372
|
|
|
|
|
|
|
(such as C) specially. Any prefix will not apply. |
|
373
|
|
|
|
|
|
|
This should maybe be called implicit_absolute_links since the C |
|
374
|
|
|
|
|
|
|
option enables absolute links inside square brackets by default. |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
A link is any text that starts with a known schema followed by a colon and one |
|
377
|
|
|
|
|
|
|
or more non-whitespace characters. This is a distinct subset of what L |
|
378
|
|
|
|
|
|
|
recognizes as a URI, but is a good first-order approximation. If you need to |
|
379
|
|
|
|
|
|
|
recognize more complex URIs, use the standard wiki formatting explained |
|
380
|
|
|
|
|
|
|
earlier. |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
The recognized schemas are those defined in the C value in the C<%tags> |
|
383
|
|
|
|
|
|
|
hash. C defaults to C, C, C, C, and |
|
384
|
|
|
|
|
|
|
C. |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=item process_html |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
This flag, true by default, causes the formatter to ignore block level wiki |
|
389
|
|
|
|
|
|
|
markup (code, ordered, unordered, etc...) when they occur on lines which also |
|
390
|
|
|
|
|
|
|
contain allowed block-level HTML tags (, , , etc...). |
|
391
|
|
|
|
|
|
|
Phrase level wiki markup (emphasis, strong, & links) is unaffected by this |
|
392
|
|
|
|
|
|
|
flag. |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=back |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=cut |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
sub format { |
|
399
|
7
|
|
|
7
|
1
|
22763
|
_format( \%tags, \%opts, @_ ); |
|
400
|
|
|
|
|
|
|
} |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
# Turn the contents after a ; or : into a dictionary list. |
|
403
|
|
|
|
|
|
|
# Using : without ; just looks like an indent. |
|
404
|
|
|
|
|
|
|
sub _dl { |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
#my ($line, $indent, $lead) = @_; |
|
407
|
23
|
|
|
23
|
|
17
|
my ( $term, $def ); |
|
408
|
|
|
|
|
|
|
|
|
409
|
23
|
100
|
|
|
|
31
|
if ( $_[2] eq ';' ) { |
|
410
|
11
|
100
|
|
|
|
35
|
if ( $_[0] =~ /^(.*?)\s+:\s+(.*)$/ ) { |
|
411
|
6
|
|
|
|
|
10
|
$term = $1; |
|
412
|
6
|
|
|
|
|
9
|
$def = $2; |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
else { |
|
415
|
5
|
|
|
|
|
7
|
$term = $_[0]; |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
else { |
|
419
|
12
|
|
|
|
|
12
|
$def = $_[0]; |
|
420
|
|
|
|
|
|
|
} |
|
421
|
|
|
|
|
|
|
|
|
422
|
23
|
|
|
|
|
18
|
my @retval; |
|
423
|
23
|
100
|
|
|
|
47
|
push @retval, "", $term, "\n" if defined $term; |
|
424
|
23
|
100
|
|
|
|
43
|
push @retval, "", $def, "\n" if defined $def; |
|
425
|
23
|
|
|
|
|
63
|
return @retval; |
|
426
|
|
|
|
|
|
|
} |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# Makes a regex out of the allowed schema array. |
|
429
|
|
|
|
|
|
|
sub _make_schema_regex { |
|
430
|
47
|
|
|
47
|
|
85
|
my $re = join "|", map {qr/\Q$_\E/} @_; |
|
|
231
|
|
|
|
|
1998
|
|
|
431
|
47
|
|
|
|
|
871
|
return qr/(?:$re)/; |
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
$uric = $URI::uric; |
|
435
|
|
|
|
|
|
|
$uricCheat = $uric; |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
# We need to avoid picking up 'HTTP::Request::Common' so we have a |
|
438
|
|
|
|
|
|
|
# subset of uric without a colon. |
|
439
|
|
|
|
|
|
|
$uricCheat =~ tr/://d; |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
# Identifying characters often accidentally picked up trailing a URI. |
|
442
|
|
|
|
|
|
|
$uriCruft = q/]),.!'";}/; |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
# escape a URI based on our charset. |
|
445
|
|
|
|
|
|
|
sub _escape_uri { |
|
446
|
28
|
|
|
28
|
|
43
|
my ( $opts, $uri ) = @_; |
|
447
|
28
|
50
|
|
|
|
76
|
confess "charset not initialized" unless $opts->{charset}; |
|
448
|
28
|
50
|
|
|
|
247
|
return uri_escape_utf8 $uri if $opts->{charset} =~ /^utf-?8$/i; |
|
449
|
0
|
|
|
|
|
0
|
return uri_escape $uri; |
|
450
|
|
|
|
|
|
|
} |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
# Turn [[Wiki Link|Title]], [URI Title], scheme:url, or StudlyCaps into links. |
|
453
|
|
|
|
|
|
|
sub _make_html_link { |
|
454
|
32
|
|
|
32
|
|
77
|
my ( $tag, $opts, $tags ) = @_; |
|
455
|
|
|
|
|
|
|
|
|
456
|
32
|
|
|
|
|
48
|
my ( $class, $trailing ) = ( '', '' ); |
|
457
|
32
|
|
|
|
|
34
|
my ( $href, $title ); |
|
458
|
32
|
100
|
|
|
|
147
|
if ( $tag =~ /^\[\[([^|#]*)(?:(#)([^|]*))?(?:(\|)(.*))?\]\]$/ ) { |
|
|
|
100
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# Wiki link |
|
461
|
7
|
50
|
|
|
|
55
|
$href = $opts->{prefix} . _escape_uri $opts, $1 if $1; |
|
462
|
7
|
50
|
|
|
|
194
|
$href .= $2 . _escape_uri $opts, $3 if $2; |
|
463
|
|
|
|
|
|
|
|
|
464
|
7
|
100
|
|
|
|
21
|
if ($4) { |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
# Title specified explicitly. |
|
467
|
3
|
50
|
|
|
|
10
|
if ( length $5 ) { |
|
468
|
3
|
|
|
|
|
8
|
$title = $5; |
|
469
|
|
|
|
|
|
|
} |
|
470
|
|
|
|
|
|
|
else { |
|
471
|
|
|
|
|
|
|
# An empty title asks Mediawiki to strip any parens off the end |
|
472
|
|
|
|
|
|
|
# of the node name. |
|
473
|
0
|
|
|
|
|
0
|
$1 =~ /^([^(]*)(?:\s*\()?/; |
|
474
|
0
|
|
|
|
|
0
|
$title = $1; |
|
475
|
|
|
|
|
|
|
} |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
else { |
|
478
|
|
|
|
|
|
|
# Title defaults to the node name. |
|
479
|
4
|
|
|
|
|
7
|
$title = $1; |
|
480
|
|
|
|
|
|
|
} |
|
481
|
|
|
|
|
|
|
} |
|
482
|
|
|
|
|
|
|
elsif ( $tag =~ /^\[(\S*)(?:(\s+)(.*))?\]$/ ) { |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
# URI |
|
485
|
5
|
|
|
|
|
10
|
$href = $1; |
|
486
|
5
|
50
|
|
|
|
13
|
if ($2) { |
|
487
|
5
|
|
|
|
|
8
|
$title = $3; |
|
488
|
|
|
|
|
|
|
} |
|
489
|
|
|
|
|
|
|
else { |
|
490
|
0
|
|
|
|
|
0
|
$title = ++$opts->{_uri_refs}; |
|
491
|
|
|
|
|
|
|
} |
|
492
|
5
|
|
|
|
|
10
|
$href =~ s/'/%27/g; |
|
493
|
|
|
|
|
|
|
} |
|
494
|
|
|
|
|
|
|
else { |
|
495
|
|
|
|
|
|
|
# Shouldn't be able to get here without either $opts->{absolute_links} |
|
496
|
|
|
|
|
|
|
# or $opts->{implicit_links}; |
|
497
|
20
|
|
33
|
|
|
50
|
$tags->{_schema_regex} ||= _make_schema_regex @{ $tags->{schemas} }; |
|
|
0
|
|
|
|
|
0
|
|
|
498
|
20
|
|
|
|
|
25
|
my $s = $tags->{_schema_regex}; |
|
499
|
|
|
|
|
|
|
|
|
500
|
20
|
100
|
|
|
|
525
|
if ( $tag =~ /^$s:[$uricCheat][$uric]*$/ ) { |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# absolute link |
|
503
|
8
|
|
|
|
|
17
|
$href = $&; |
|
504
|
8
|
100
|
|
|
|
67
|
$trailing = $& if $href =~ s/[$uriCruft]$//; |
|
505
|
8
|
|
|
|
|
16
|
$title = $href; |
|
506
|
|
|
|
|
|
|
} |
|
507
|
|
|
|
|
|
|
else { |
|
508
|
|
|
|
|
|
|
# StudlyCaps |
|
509
|
12
|
|
|
|
|
38
|
$href = $opts->{prefix} . _escape_uri $opts, $tag; |
|
510
|
12
|
|
|
|
|
290
|
$title = $tag; |
|
511
|
|
|
|
|
|
|
} |
|
512
|
|
|
|
|
|
|
} |
|
513
|
|
|
|
|
|
|
|
|
514
|
32
|
|
|
|
|
223
|
return "$title$trailing"; |
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
# Store a TOC line for later. |
|
518
|
|
|
|
|
|
|
# |
|
519
|
|
|
|
|
|
|
# ASSUMPTIONS |
|
520
|
|
|
|
|
|
|
# $level >= 1 |
|
521
|
|
|
|
|
|
|
sub _store_toc_line { |
|
522
|
12
|
|
|
12
|
|
20
|
my ( $toc, $level, $title, $name ) = @_; |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
# TODO: Strip formatting from $title. |
|
525
|
|
|
|
|
|
|
|
|
526
|
12
|
100
|
100
|
|
|
63
|
if ( @$toc && $level > $toc->[-1]->{level} ) { |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
# Nest a sublevel. |
|
529
|
3
|
100
|
|
|
|
19
|
$toc->[-1]->{sublevel} = [] |
|
530
|
|
|
|
|
|
|
unless exists $toc->[-1]->{sublevel}; |
|
531
|
3
|
|
|
|
|
13
|
_store_toc_line( $toc->[-1]->{sublevel}, $level, $title, $name ); |
|
532
|
|
|
|
|
|
|
} |
|
533
|
|
|
|
|
|
|
else { |
|
534
|
9
|
|
|
|
|
54
|
push @$toc, { level => $level, title => $title, name => $name }; |
|
535
|
|
|
|
|
|
|
} |
|
536
|
|
|
|
|
|
|
|
|
537
|
12
|
|
|
|
|
23
|
return $level; |
|
538
|
|
|
|
|
|
|
} |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
# Make header text, storing the line for the TOC. |
|
541
|
|
|
|
|
|
|
# |
|
542
|
|
|
|
|
|
|
# ASSUMPTIONS |
|
543
|
|
|
|
|
|
|
# $tags->{_toc} has been initialized to an array ref. |
|
544
|
|
|
|
|
|
|
sub _make_header { |
|
545
|
9
|
|
|
9
|
|
16
|
my $level = length $_[2]; |
|
546
|
9
|
|
|
|
|
29
|
my $n = _escape_uri $_[-1], $_[3]; |
|
547
|
|
|
|
|
|
|
|
|
548
|
9
|
|
|
|
|
249
|
_store_toc_line( $_[-2]->{_toc}, $level, $_[3], $n ); |
|
549
|
|
|
|
|
|
|
|
|
550
|
9
|
|
|
|
|
50
|
return "", Text::MediawikiFormat::format_line( $_[3], @_[ -2, -1 ] ), "\n"; |
|
551
|
|
|
|
|
|
|
} |
|
552
|
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
sub _format { |
|
554
|
50
|
|
|
50
|
|
117
|
my ( $itags, $iopts, $text, $tags, $opts ) = @_; |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
# Overwriting the caller's hashes locally after merging its contents |
|
557
|
|
|
|
|
|
|
# is okay. |
|
558
|
50
|
|
100
|
|
|
366
|
$tags = _merge_hashes( $tags || {}, $itags ); |
|
559
|
50
|
|
100
|
|
|
253
|
$opts = _merge_hashes( $opts || {}, $iopts ); |
|
560
|
|
|
|
|
|
|
|
|
561
|
50
|
50
|
|
|
|
157
|
_require_html_packages |
|
562
|
|
|
|
|
|
|
if $opts->{process_html}; |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
# Always verify the blocks since the user may have slagged the |
|
565
|
|
|
|
|
|
|
# default hash on import. |
|
566
|
50
|
|
|
|
|
114
|
_check_blocks($tags); |
|
567
|
|
|
|
|
|
|
|
|
568
|
50
|
|
|
|
|
157
|
my @blocks = _find_blocks( $text, $tags, $opts ); |
|
569
|
50
|
|
|
|
|
144
|
@blocks = _nest_blocks( \@blocks ); |
|
570
|
50
|
|
|
|
|
200
|
return _process_blocks( \@blocks, $tags, $opts ); |
|
571
|
|
|
|
|
|
|
} |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
sub _check_blocks { |
|
574
|
52
|
|
|
52
|
|
2467
|
my $tags = shift; |
|
575
|
52
|
|
|
|
|
64
|
my %blocks = %{ $tags->{blocks} }; |
|
|
52
|
|
|
|
|
305
|
|
|
576
|
52
|
|
|
|
|
80
|
delete @blocks{ @{ $tags->{blockorder} } }; |
|
|
52
|
|
|
|
|
212
|
|
|
577
|
|
|
|
|
|
|
|
|
578
|
52
|
100
|
|
|
|
666
|
carp "No order specified for blocks: " . join( ', ', keys %blocks ) . ".\n" |
|
579
|
|
|
|
|
|
|
if keys %blocks; |
|
580
|
|
|
|
|
|
|
} |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
# This sub recognizes three states: |
|
583
|
|
|
|
|
|
|
# |
|
584
|
|
|
|
|
|
|
# 1. undef |
|
585
|
|
|
|
|
|
|
# Normal wiki processing will be done on this line. |
|
586
|
|
|
|
|
|
|
# |
|
587
|
|
|
|
|
|
|
# 2. html |
|
588
|
|
|
|
|
|
|
# Links and phrasal processing will be done, but formatting should be |
|
589
|
|
|
|
|
|
|
# ignored. |
|
590
|
|
|
|
|
|
|
# |
|
591
|
|
|
|
|
|
|
# 3. nowiki |
|
592
|
|
|
|
|
|
|
# No further wiki processing should be done. |
|
593
|
|
|
|
|
|
|
# |
|
594
|
|
|
|
|
|
|
# Each state may override the lower ones if already set on a given line. |
|
595
|
|
|
|
|
|
|
# |
|
596
|
|
|
|
|
|
|
sub _append_processed_line { |
|
597
|
0
|
|
|
0
|
|
0
|
my ( $parser, $text, $state ) = @_; |
|
598
|
0
|
|
|
|
|
0
|
my $lines = $parser->{processed_lines}; |
|
599
|
|
|
|
|
|
|
|
|
600
|
0
|
|
0
|
|
|
0
|
$state ||= ''; |
|
601
|
|
|
|
|
|
|
|
|
602
|
0
|
|
|
|
|
0
|
my @newlines = split /(?<=\n)/, $text; |
|
603
|
0
|
0
|
0
|
|
|
0
|
if ( |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
604
|
|
|
|
|
|
|
@$lines |
|
605
|
|
|
|
|
|
|
&& $lines->[-1]->[1] !~ /\n$/ |
|
606
|
|
|
|
|
|
|
&& # State not changing from or to 'nowiki' |
|
607
|
|
|
|
|
|
|
!( $state ne $lines->[-1]->[0] && grep /^nowiki$/, $state, $lines->[-1]->[0] ) |
|
608
|
|
|
|
|
|
|
) |
|
609
|
|
|
|
|
|
|
{ |
|
610
|
0
|
|
|
|
|
0
|
$lines->[-1]->[1] .= shift @newlines; |
|
611
|
0
|
0
|
|
|
|
0
|
$lines->[-1]->[0] = $state if $state eq 'html'; |
|
612
|
|
|
|
|
|
|
} |
|
613
|
|
|
|
|
|
|
|
|
614
|
0
|
|
|
|
|
0
|
foreach my $line (@newlines) { |
|
615
|
0
|
0
|
|
|
|
0
|
$lines->[-1]->[2] = '1' if @$lines; |
|
616
|
0
|
|
|
|
|
0
|
push @$lines, [ $state, $line ]; |
|
617
|
|
|
|
|
|
|
} |
|
618
|
0
|
0
|
0
|
|
|
0
|
$lines->[-1]->[2] = '1' |
|
619
|
|
|
|
|
|
|
if @$lines && $lines->[-1]->[1] =~ /\n$/; |
|
620
|
|
|
|
|
|
|
} |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
sub _html_tag { |
|
623
|
0
|
|
|
0
|
|
0
|
my ( $parser, $type, $tagname, $orig, $attr ) = @_; |
|
624
|
0
|
|
|
|
|
0
|
my $tags = $parser->{tags}; |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
# $tagname may have been generated by an empty tag. If so, HTML::Parser |
|
627
|
|
|
|
|
|
|
# will sometimes include the trailing / in the tag name. |
|
628
|
0
|
|
|
|
|
0
|
my $isEmptyTag = $orig =~ m#/>$#; |
|
629
|
0
|
0
|
|
|
|
0
|
$tagname =~ s#/$## if $isEmptyTag; |
|
630
|
|
|
|
|
|
|
|
|
631
|
0
|
0
|
|
|
|
0
|
unless ( grep /^\Q$tagname\E$/, @{ $tags->{allowed_tags} } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
632
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, CGI::escapeHTML $orig; |
|
633
|
0
|
|
|
|
|
0
|
return; |
|
634
|
|
|
|
|
|
|
} |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
# Any $tagname must now be in the allowed list, including . |
|
637
|
|
|
|
|
|
|
|
|
638
|
0
|
|
|
|
|
0
|
my $tagstack = $parser->{tag_stack}; |
|
639
|
0
|
0
|
|
|
|
0
|
my $stacktop = @$tagstack ? $tagstack->[-1] : ''; |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
# First, process end tags, since they can change our state. |
|
642
|
0
|
0
|
0
|
|
|
0
|
if ( $type eq 'E' && $stacktop eq $tagname ) { |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
# The closing tag is at the top of the stack, like it should be. |
|
645
|
|
|
|
|
|
|
# Pop it and append the close tag to the output. |
|
646
|
0
|
|
|
|
|
0
|
pop @$tagstack; |
|
647
|
0
|
|
|
|
|
0
|
my $newtag; |
|
648
|
|
|
|
|
|
|
|
|
649
|
0
|
0
|
|
|
|
0
|
if ( $tagname eq 'nowiki' ) { |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# The browser doesn't need to see the tag. |
|
652
|
0
|
|
|
|
|
0
|
$newtag = ''; |
|
653
|
|
|
|
|
|
|
} |
|
654
|
|
|
|
|
|
|
else { |
|
655
|
0
|
|
|
|
|
0
|
$newtag = "$tagname>"; |
|
656
|
|
|
|
|
|
|
} |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
# Can't close a state into or |
|
659
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, $newtag, 'html'; |
|
660
|
0
|
|
|
|
|
0
|
return; |
|
661
|
|
|
|
|
|
|
} |
|
662
|
|
|
|
|
|
|
|
|
663
|
0
|
0
|
0
|
|
|
0
|
if ( @$tagstack && grep /^\Q$stacktop\E$/, qw{nowiki pre} ) { |
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
# Ignore all markup within or tags. |
|
666
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, CGI::escapeHTML($orig), 'nowiki'; |
|
667
|
0
|
|
|
|
|
0
|
return; |
|
668
|
|
|
|
|
|
|
} |
|
669
|
|
|
|
|
|
|
|
|
670
|
0
|
0
|
0
|
|
|
0
|
if ( $type eq 'E' && $HTML::Tagset::isPhraseMarkup{$tagname} ) |
|
671
|
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
# If we ask for artificial end element events for self-closed elements, |
|
673
|
|
|
|
|
|
|
# then we need to check $HTML::Tagset::emptyElement($tagname) here too. |
|
674
|
|
|
|
|
|
|
{ |
|
675
|
|
|
|
|
|
|
# We didn't record phrase markup on the stack, so it's okay to just |
|
676
|
|
|
|
|
|
|
# let it close. |
|
677
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, "$tagname>"; |
|
678
|
0
|
|
|
|
|
0
|
return; |
|
679
|
|
|
|
|
|
|
} |
|
680
|
|
|
|
|
|
|
|
|
681
|
0
|
0
|
|
|
|
0
|
if ( $type eq 'E' ) { |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
# We got a non-phrase end tag that wasn't on the stack. Escape it. |
|
684
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, CGI::escapeHTML($orig); |
|
685
|
0
|
|
|
|
|
0
|
return; |
|
686
|
|
|
|
|
|
|
} |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
### |
|
689
|
|
|
|
|
|
|
### $type must now eq 'S'. |
|
690
|
|
|
|
|
|
|
### |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
# The browser doesn't need to see the tag. |
|
693
|
0
|
0
|
|
|
|
0
|
if ( $tagname eq 'nowiki' ) { |
|
694
|
0
|
0
|
|
|
|
0
|
push @$tagstack, $tagname |
|
695
|
|
|
|
|
|
|
unless $isEmptyTag; |
|
696
|
0
|
|
|
|
|
0
|
return; |
|
697
|
|
|
|
|
|
|
} |
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
# Strip disallowed attributes. |
|
700
|
0
|
|
|
|
|
0
|
my $newtag = "<$tagname"; |
|
701
|
0
|
|
|
|
|
0
|
foreach ( @{ $tags->{allowed_attrs} } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
702
|
0
|
0
|
|
|
|
0
|
if ( defined $attr->{$_} ) { |
|
703
|
0
|
|
|
|
|
0
|
$newtag .= " $_"; |
|
704
|
0
|
0
|
|
|
|
0
|
unless ( $attr->{$_} eq '__TEXT_MEDIAWIKIFORMAT_BOOL__' ) { |
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
# CGI::escapeHTML escapes single quotes. |
|
707
|
0
|
|
|
|
|
0
|
$attr->{$_} = CGI::escapeHTML $attr->{$_}; |
|
708
|
0
|
|
|
|
|
0
|
$newtag .= "='" . $attr->{$_} . "'"; |
|
709
|
|
|
|
|
|
|
} |
|
710
|
|
|
|
|
|
|
} |
|
711
|
|
|
|
|
|
|
} |
|
712
|
0
|
0
|
0
|
|
|
0
|
$newtag .= " /" if $HTML::Tagset::emptyElement{$tagname} || $isEmptyTag; |
|
713
|
0
|
|
|
|
|
0
|
$newtag .= ">"; |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
# If this isn't a block level element, there's no need to track nesting. |
|
716
|
0
|
0
|
0
|
|
|
0
|
if ( $HTML::Tagset::isPhraseMarkup{$tagname} |
|
717
|
|
|
|
|
|
|
|| $HTML::Tagset::emptyElement{$tagname} ) |
|
718
|
|
|
|
|
|
|
{ |
|
719
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, $newtag; |
|
720
|
0
|
|
|
|
|
0
|
return; |
|
721
|
|
|
|
|
|
|
} |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
# Some elements can close implicitly |
|
724
|
0
|
0
|
|
|
|
0
|
if (@$tagstack) { |
|
725
|
0
|
0
|
0
|
|
|
0
|
if ( $tagname eq $stacktop |
|
|
|
0
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
&& $HTML::Tagset::optionalEndTag{$tagname} ) |
|
727
|
|
|
|
|
|
|
{ |
|
728
|
0
|
|
|
|
|
0
|
pop @$tagstack; |
|
729
|
|
|
|
|
|
|
} |
|
730
|
|
|
|
|
|
|
elsif ( !$HTML::Tagset::is_Possible_Strict_P_Content{$tagname} ) { |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
# Need to check more than the last item for paragraphs. |
|
733
|
0
|
|
|
|
|
0
|
for ( my $i = $#{$tagstack}; $i >= 0; $i-- ) { |
|
|
0
|
|
|
|
|
0
|
|
|
734
|
0
|
|
|
|
|
0
|
my $checking = $tagstack->[$i]; |
|
735
|
0
|
0
|
|
|
|
0
|
last if grep /^\Q$checking\E$/, @HTML::Tagset::p_closure_barriers; |
|
736
|
|
|
|
|
|
|
|
|
737
|
0
|
0
|
|
|
|
0
|
if ( $checking eq 'p' ) { |
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
# pop 'em all. |
|
740
|
0
|
|
|
|
|
0
|
splice @$tagstack, $i; |
|
741
|
0
|
|
|
|
|
0
|
last; |
|
742
|
|
|
|
|
|
|
} |
|
743
|
|
|
|
|
|
|
} |
|
744
|
|
|
|
|
|
|
} |
|
745
|
|
|
|
|
|
|
} |
|
746
|
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
# Could verify here that and sub-elements only appear where
|
748
|
|
|
|
|
|
|
# they belong. |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
# Push the new tag onto the stack. |
|
751
|
0
|
0
|
|
|
|
0
|
push @$tagstack, $tagname |
|
752
|
|
|
|
|
|
|
unless $isEmptyTag; |
|
753
|
|
|
|
|
|
|
|
|
754
|
0
|
0
|
|
|
|
0
|
_append_processed_line $parser, $newtag, $tagname eq 'pre' ? 'nowiki' : 'html'; |
|
755
|
0
|
|
|
|
|
0
|
return; |
|
756
|
|
|
|
|
|
|
} |
|
757
|
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
sub _html_comment { |
|
759
|
0
|
|
|
0
|
|
0
|
my ( $parser, $text ) = @_; |
|
760
|
|
|
|
|
|
|
|
|
761
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, $text, 'nowiki'; |
|
762
|
|
|
|
|
|
|
} |
|
763
|
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
sub _html_text { |
|
765
|
0
|
|
|
0
|
|
0
|
my ( $parser, $dtext, $skipped_text, $is_cdata ) = @_; |
|
766
|
0
|
|
|
|
|
0
|
my $tagstack = $parser->{tag_stack}; |
|
767
|
0
|
|
|
|
|
0
|
my ( $newtext, $newstate ); |
|
768
|
|
|
|
|
|
|
|
|
769
|
0
|
0
|
|
|
|
0
|
warnings::warnif("Got skipped_text: `$skipped_text'") |
|
770
|
|
|
|
|
|
|
if $skipped_text; |
|
771
|
|
|
|
|
|
|
|
|
772
|
0
|
0
|
|
|
|
0
|
if (@$tagstack) { |
|
773
|
0
|
0
|
0
|
|
|
0
|
if ( grep /\Q$tagstack->[-1]\E/, qw{nowiki pre} ) { |
|
|
|
0
|
|
|
|
|
|
|
774
|
0
|
|
|
|
|
0
|
$newstate = 'nowiki'; |
|
775
|
|
|
|
|
|
|
} |
|
776
|
|
|
|
|
|
|
elsif ( $is_cdata && $HTML::Tagset::isCDATA_Parent{ $tagstack->[-1] } ) { |
|
777
|
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
# If the user hadn't specifically allowed a tag which contains |
|
779
|
|
|
|
|
|
|
# CDATA, then it won't be on the tag stack. |
|
780
|
0
|
|
|
|
|
0
|
$newtext = $dtext; |
|
781
|
|
|
|
|
|
|
} |
|
782
|
|
|
|
|
|
|
} |
|
783
|
|
|
|
|
|
|
|
|
784
|
0
|
0
|
|
|
|
0
|
unless ( defined $newtext ) { |
|
785
|
0
|
0
|
|
|
|
0
|
$newtext = CGI::escapeHTML $dtext unless defined $newtext; |
|
786
|
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
# CGI::escapeHTML escapes single quotes so the text may be included |
|
788
|
|
|
|
|
|
|
# in attribute values, but we know we aren't processing an attribute |
|
789
|
|
|
|
|
|
|
# value here. |
|
790
|
0
|
|
|
|
|
0
|
$newtext =~ s/'/'/g; |
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
|
|
793
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, $newtext, $newstate; |
|
794
|
|
|
|
|
|
|
} |
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
sub _find_blocks_in_html { |
|
797
|
0
|
|
|
0
|
|
0
|
my ( $text, $tags, $opts ) = @_; |
|
798
|
|
|
|
|
|
|
|
|
799
|
0
|
|
|
|
|
0
|
my $parser = HTML::Parser->new( |
|
800
|
|
|
|
|
|
|
start_h => [ \&_html_tag, 'self, "S", tagname, text, attr' ], |
|
801
|
|
|
|
|
|
|
end_h => [ \&_html_tag, 'self, "E", tagname, text' ], |
|
802
|
|
|
|
|
|
|
comment_h => [ \&_html_comment, 'self, text' ], |
|
803
|
|
|
|
|
|
|
text_h => [ \&_html_text, 'self, dtext, skipped_text, is_cdata' ], |
|
804
|
|
|
|
|
|
|
marked_sections => 1, |
|
805
|
|
|
|
|
|
|
boolean_attribute_value => '__TEXT_MEDIAWIKIFORMAT_BOOL__', |
|
806
|
|
|
|
|
|
|
); |
|
807
|
0
|
|
|
|
|
0
|
$parser->{opts} = $opts; |
|
808
|
0
|
|
|
|
|
0
|
$parser->{tags} = $tags; |
|
809
|
0
|
|
|
|
|
0
|
$parser->{processed_lines} = []; |
|
810
|
0
|
|
|
|
|
0
|
$parser->{tag_stack} = []; |
|
811
|
|
|
|
|
|
|
|
|
812
|
0
|
|
|
|
|
0
|
my @blocks; |
|
813
|
0
|
|
|
|
|
0
|
my @lines = split /\r?\n/, $text; |
|
814
|
0
|
|
|
|
|
0
|
for ( my $i = 0; $i < @lines; $i++ ) { |
|
815
|
0
|
|
|
|
|
0
|
$parser->parse( $lines[$i] ); |
|
816
|
0
|
|
|
|
|
0
|
$parser->parse("\n"); |
|
817
|
0
|
0
|
|
|
|
0
|
$parser->eof if $i == $#lines; |
|
818
|
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
# @{$parser->{processed_lines}} may be empty when tags are |
|
820
|
|
|
|
|
|
|
# still open. |
|
821
|
0
|
|
0
|
|
|
0
|
while ( @{ $parser->{processed_lines} } |
|
|
0
|
|
|
|
|
0
|
|
|
822
|
|
|
|
|
|
|
&& $parser->{processed_lines}->[0]->[2] ) |
|
823
|
|
|
|
|
|
|
{ |
|
824
|
0
|
|
|
|
|
0
|
my ( $type, $dtext ) |
|
825
|
0
|
|
|
|
|
0
|
= @{ shift @{ $parser->{processed_lines} } }; |
|
|
0
|
|
|
|
|
0
|
|
|
826
|
|
|
|
|
|
|
|
|
827
|
0
|
|
|
|
|
0
|
my $block; |
|
828
|
0
|
0
|
|
|
|
0
|
if ($type) { |
|
829
|
0
|
|
|
|
|
0
|
$block = _start_block( $dtext, $tags, $opts, $type ); |
|
830
|
|
|
|
|
|
|
} |
|
831
|
|
|
|
|
|
|
else { |
|
832
|
0
|
|
|
|
|
0
|
chomp $dtext; |
|
833
|
0
|
|
|
|
|
0
|
$block = _start_block( $dtext, $tags, $opts ); |
|
834
|
|
|
|
|
|
|
} |
|
835
|
0
|
0
|
|
|
|
0
|
push @blocks, $block if $block; |
|
836
|
|
|
|
|
|
|
} |
|
837
|
|
|
|
|
|
|
} |
|
838
|
|
|
|
|
|
|
|
|
839
|
0
|
|
|
|
|
0
|
return @blocks; |
|
840
|
|
|
|
|
|
|
} |
|
841
|
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
sub _find_blocks { |
|
843
|
50
|
|
|
50
|
|
74
|
my ( $text, $tags, $opts ) = @_; |
|
844
|
50
|
|
|
|
|
64
|
my @blocks; |
|
845
|
|
|
|
|
|
|
|
|
846
|
50
|
50
|
|
|
|
143
|
if ( $opts->{process_html} ) { |
|
847
|
0
|
|
|
|
|
0
|
@blocks = _find_blocks_in_html $text, $tags, $opts; |
|
848
|
|
|
|
|
|
|
} |
|
849
|
|
|
|
|
|
|
else { |
|
850
|
|
|
|
|
|
|
# The original behavior. |
|
851
|
50
|
|
|
|
|
599
|
for my $line ( split /\r?\n/, $text ) { |
|
852
|
300
|
|
|
|
|
501
|
my $block = _start_block( $line, $tags, $opts ); |
|
853
|
300
|
100
|
|
|
|
926
|
push @blocks, $block if $block; |
|
854
|
|
|
|
|
|
|
} |
|
855
|
|
|
|
|
|
|
} |
|
856
|
|
|
|
|
|
|
|
|
857
|
50
|
|
|
|
|
265
|
return @blocks; |
|
858
|
|
|
|
|
|
|
} |
|
859
|
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
sub _start_block { |
|
861
|
305
|
|
|
305
|
|
1458
|
my ( $text, $tags, $opts, $type ) = @_; |
|
862
|
|
|
|
|
|
|
|
|
863
|
305
|
100
|
|
|
|
723
|
return new_block( 'end', level => 0 ) unless $text; |
|
864
|
218
|
50
|
|
|
|
381
|
return new_block( |
|
865
|
|
|
|
|
|
|
$type, |
|
866
|
|
|
|
|
|
|
level => 0, |
|
867
|
|
|
|
|
|
|
opts => $opts, |
|
868
|
|
|
|
|
|
|
text => $text, |
|
869
|
|
|
|
|
|
|
tags => $tags, |
|
870
|
|
|
|
|
|
|
) if $type; |
|
871
|
|
|
|
|
|
|
|
|
872
|
218
|
|
|
|
|
194
|
for my $block ( @{ $tags->{blockorder} } ) { |
|
|
218
|
|
|
|
|
421
|
|
|
873
|
1216
|
|
|
|
|
1277
|
my ( $line, $level, $indentation ) = ( $text, 0, '' ); |
|
874
|
|
|
|
|
|
|
|
|
875
|
1216
|
100
|
|
|
|
2385
|
( $level, $line, $indentation ) = _get_indentation( $tags, $line ) |
|
876
|
|
|
|
|
|
|
if $tags->{indented}{$block}; |
|
877
|
|
|
|
|
|
|
|
|
878
|
1216
|
|
|
|
|
3785
|
my $marker_removed = length( $line =~ s/$tags->{blocks}{$block}// ); |
|
879
|
|
|
|
|
|
|
|
|
880
|
1216
|
100
|
|
|
|
2175
|
next unless $marker_removed; |
|
881
|
|
|
|
|
|
|
|
|
882
|
1944
|
|
|
|
|
3448
|
return new_block( |
|
883
|
|
|
|
|
|
|
$block, |
|
884
|
216
|
|
100
|
|
|
502
|
args => [ grep {defined} $1, $2, $3, $4, $5, $6, $7, $8, $9 ], |
|
885
|
|
|
|
|
|
|
level => $level || 0, |
|
886
|
|
|
|
|
|
|
opts => $opts, |
|
887
|
|
|
|
|
|
|
text => $line, |
|
888
|
|
|
|
|
|
|
tags => $tags, |
|
889
|
|
|
|
|
|
|
); |
|
890
|
|
|
|
|
|
|
} |
|
891
|
|
|
|
|
|
|
} |
|
892
|
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
sub _nest_blocks { |
|
894
|
54
|
|
|
54
|
|
1442
|
my $blocks = shift; |
|
895
|
54
|
100
|
|
|
|
141
|
return unless @$blocks; |
|
896
|
|
|
|
|
|
|
|
|
897
|
53
|
|
|
|
|
93
|
my @processed = shift @$blocks; |
|
898
|
|
|
|
|
|
|
|
|
899
|
53
|
|
|
|
|
108
|
for my $block (@$blocks) { |
|
900
|
251
|
|
|
|
|
925
|
push @processed, $processed[-1]->nest($block); |
|
901
|
|
|
|
|
|
|
} |
|
902
|
|
|
|
|
|
|
|
|
903
|
53
|
|
|
|
|
337
|
return @processed; |
|
904
|
|
|
|
|
|
|
} |
|
905
|
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
sub _process_blocks { |
|
907
|
51
|
|
|
51
|
|
93
|
my ( $blocks, $tags, $opts ) = @_; |
|
908
|
|
|
|
|
|
|
|
|
909
|
51
|
|
|
|
|
55
|
my @open; |
|
910
|
51
|
|
|
|
|
86
|
for my $block (@$blocks) { |
|
911
|
205
|
100
|
|
|
|
461
|
push @open, _process_block( $block, $tags, $opts ) |
|
912
|
|
|
|
|
|
|
unless $block->type() eq 'end'; |
|
913
|
|
|
|
|
|
|
} |
|
914
|
|
|
|
|
|
|
|
|
915
|
51
|
|
|
|
|
280
|
return join '', @open; |
|
916
|
|
|
|
|
|
|
} |
|
917
|
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
sub _process_block { |
|
919
|
135
|
|
|
135
|
|
172
|
my ( $block, $tags, $opts ) = @_; |
|
920
|
135
|
|
|
|
|
262
|
my $type = $block->type(); |
|
921
|
|
|
|
|
|
|
|
|
922
|
135
|
|
|
|
|
144
|
my ( $start, $end, $start_line, $end_line, $between ); |
|
923
|
135
|
50
|
|
|
|
284
|
if ( $tags->{$type} ) { |
|
924
|
135
|
|
|
|
|
136
|
( $start, $end, $start_line, $end_line, $between ) = @{ $tags->{$type} }; |
|
|
135
|
|
|
|
|
385
|
|
|
925
|
|
|
|
|
|
|
} |
|
926
|
|
|
|
|
|
|
else { |
|
927
|
0
|
|
|
|
|
0
|
( $start, $end, $start_line, $end_line ) = ( '', '', '', '' ); |
|
928
|
|
|
|
|
|
|
} |
|
929
|
|
|
|
|
|
|
|
|
930
|
135
|
|
|
|
|
185
|
my @text = (); |
|
931
|
135
|
100
|
|
|
|
135
|
for my $line ( |
|
|
135
|
|
|
|
|
2638
|
|
|
932
|
|
|
|
|
|
|
grep ( /^\Q$type\E$/, @{ $tags->{unformatted_blocks} } ) |
|
933
|
|
|
|
|
|
|
? $block->text() |
|
934
|
|
|
|
|
|
|
: $block->formatted_text() |
|
935
|
|
|
|
|
|
|
) |
|
936
|
|
|
|
|
|
|
{ |
|
937
|
240
|
100
|
|
|
|
538
|
if ( blessed $line) { |
|
938
|
18
|
|
33
|
|
|
44
|
my $prev_end = pop @text || (); |
|
939
|
18
|
|
|
|
|
47
|
push @text, _process_block( $line, $tags, $opts ), $prev_end; |
|
940
|
18
|
|
|
|
|
31
|
next; |
|
941
|
|
|
|
|
|
|
} |
|
942
|
|
|
|
|
|
|
|
|
943
|
222
|
|
|
|
|
201
|
my @triplets; |
|
944
|
222
|
100
|
100
|
|
|
785
|
if ( ( ref($start_line) || '' ) eq 'CODE' ) { |
|
945
|
32
|
|
|
|
|
78
|
@triplets = $start_line->( $line, $block->level(), $block->shift_args(), $tags, $opts ); |
|
946
|
|
|
|
|
|
|
} |
|
947
|
|
|
|
|
|
|
else { |
|
948
|
190
|
|
|
|
|
380
|
@triplets = ( $start_line, $line, $end_line ); |
|
949
|
|
|
|
|
|
|
} |
|
950
|
222
|
|
|
|
|
469
|
push @text, @triplets; |
|
951
|
|
|
|
|
|
|
} |
|
952
|
|
|
|
|
|
|
|
|
953
|
135
|
100
|
|
|
|
285
|
pop @text if $between; |
|
954
|
135
|
|
|
|
|
514
|
return join '', $start, @text, $end; |
|
955
|
|
|
|
|
|
|
} |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
sub _get_indentation { |
|
958
|
443
|
|
|
443
|
|
442
|
my ( $tags, $text ) = @_; |
|
959
|
|
|
|
|
|
|
|
|
960
|
443
|
100
|
|
|
|
2882
|
return 1, $text unless $text =~ s/($tags->{indent})//; |
|
961
|
238
|
|
|
|
|
797
|
return length($1) + 1, $text, $1; |
|
962
|
|
|
|
|
|
|
} |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
=head2 format_line |
|
965
|
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
$formatted = format_line ($raw, $tags, $opts); |
|
967
|
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
This function is never exported. It formats the phrase elements of a single |
|
969
|
|
|
|
|
|
|
line of text (emphasised, strong, and links). |
|
970
|
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
This is only meant to be called from L and so |
|
972
|
|
|
|
|
|
|
requires $tags and $opts to have all elements filled in. If you find a use for |
|
973
|
|
|
|
|
|
|
it, please let me know and maybe I will have it default the missing elements as |
|
974
|
|
|
|
|
|
|
C does. |
|
975
|
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
=cut |
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
sub format_line { |
|
979
|
227
|
|
|
227
|
1
|
3702
|
my ( $text, $tags, $opts ) = @_; |
|
980
|
|
|
|
|
|
|
|
|
981
|
227
|
|
|
|
|
712
|
$text =~ s!$tags->{strong_tag}!$tags->{strong}->($1, $opts)!eg; |
|
|
5
|
|
|
|
|
28
|
|
|
982
|
227
|
|
|
|
|
508
|
$text =~ s!$tags->{emphasized_tag}!$tags->{emphasized}->($1, $opts)!eg; |
|
|
6
|
|
|
|
|
22
|
|
|
983
|
|
|
|
|
|
|
|
|
984
|
227
|
50
|
66
|
|
|
717
|
$text = _find_links( $text, $tags, $opts ) |
|
|
|
|
33
|
|
|
|
|
|
985
|
|
|
|
|
|
|
if $opts->{extended} |
|
986
|
|
|
|
|
|
|
|| $opts->{absolute_links} |
|
987
|
|
|
|
|
|
|
|| $opts->{implicit_links}; |
|
988
|
|
|
|
|
|
|
|
|
989
|
227
|
|
|
|
|
676
|
return $text; |
|
990
|
|
|
|
|
|
|
} |
|
991
|
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
sub _find_innermost_balanced_pair { |
|
993
|
11
|
|
|
11
|
|
12
|
my ( $text, $open, $close ) = @_; |
|
994
|
|
|
|
|
|
|
|
|
995
|
11
|
|
|
|
|
18
|
my $start_pos = rindex $text, $open; |
|
996
|
11
|
100
|
|
|
|
29
|
return if $start_pos == -1; |
|
997
|
|
|
|
|
|
|
|
|
998
|
7
|
|
|
|
|
8
|
my $end_pos = index $text, $close, $start_pos; |
|
999
|
7
|
50
|
|
|
|
20
|
return if $end_pos == -1; |
|
1000
|
|
|
|
|
|
|
|
|
1001
|
7
|
|
|
|
|
8
|
my $open_length = length $open; |
|
1002
|
7
|
|
|
|
|
8
|
my $close_length = length $close; |
|
1003
|
7
|
|
|
|
|
9
|
my $close_pos = $end_pos + $close_length; |
|
1004
|
7
|
|
|
|
|
7
|
my $enclosed_length = $close_pos - $start_pos; |
|
1005
|
|
|
|
|
|
|
|
|
1006
|
7
|
|
|
|
|
12
|
my $enclosed_atom = substr $text, $start_pos, $enclosed_length; |
|
1007
|
7
|
|
|
|
|
28
|
return substr( $enclosed_atom, $open_length, 0 - $close_length ), |
|
1008
|
|
|
|
|
|
|
substr( $text, 0, $start_pos ), |
|
1009
|
|
|
|
|
|
|
substr( $text, $close_pos ); |
|
1010
|
|
|
|
|
|
|
} |
|
1011
|
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
sub _find_links { |
|
1013
|
227
|
|
|
227
|
|
253
|
my ( $text, $tags, $opts ) = @_; |
|
1014
|
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
# Build Regexp |
|
1016
|
227
|
|
|
|
|
190
|
my @res; |
|
1017
|
|
|
|
|
|
|
|
|
1018
|
227
|
100
|
|
|
|
460
|
if ( $opts->{absolute_links} ) { |
|
1019
|
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
# URI |
|
1021
|
225
|
|
|
|
|
182
|
my $s; |
|
1022
|
225
|
|
66
|
|
|
493
|
$tags->{_schema_regex} ||= _make_schema_regex @{ $tags->{schemas} }; |
|
|
47
|
|
|
|
|
163
|
|
|
1023
|
225
|
|
|
|
|
243
|
$s = $tags->{_schema_regex}; |
|
1024
|
225
|
|
|
|
|
1700
|
push @res, qr/\b$s:[$uricCheat][$uric]*/; |
|
1025
|
|
|
|
|
|
|
} |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
227
|
100
|
|
|
|
454
|
if ( $opts->{implicit_links} ) { |
|
1028
|
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
# StudlyCaps |
|
1030
|
17
|
50
|
|
|
|
36
|
if ( $tags->{implicit_link_delimiters} ) { |
|
1031
|
17
|
|
|
|
|
47
|
push @res, qr/$tags->{implicit_link_delimiters}/; |
|
1032
|
|
|
|
|
|
|
} |
|
1033
|
|
|
|
|
|
|
else { |
|
1034
|
0
|
|
|
|
|
0
|
warnings::warnif("Ignoring implicit_links option since implicit_link_delimiters is empty"); |
|
1035
|
|
|
|
|
|
|
} |
|
1036
|
|
|
|
|
|
|
} |
|
1037
|
|
|
|
|
|
|
|
|
1038
|
227
|
100
|
|
|
|
400
|
if ( $opts->{extended} ) { |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
# [[Wiki Page]] |
|
1041
|
226
|
100
|
|
|
|
658
|
if ( !$tags->{extended_link_delimiters} ) { |
|
|
|
100
|
|
|
|
|
|
|
1042
|
3
|
|
|
|
|
70
|
warnings::warnif("Ignoring extended option since extended_link_delimiters is empty"); |
|
1043
|
|
|
|
|
|
|
} |
|
1044
|
|
|
|
|
|
|
elsif ( ref $tags->{extended_link_delimiters} eq "ARRAY" ) { |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
# Backwards compatibility for extended links. |
|
1047
|
|
|
|
|
|
|
# Bypasses the regex substitution used by absolute and implicit |
|
1048
|
|
|
|
|
|
|
# links. |
|
1049
|
4
|
|
|
|
|
6
|
my ( $start, $end ) = @{ $tags->{extended_link_delimiters} }; |
|
|
4
|
|
|
|
|
8
|
|
|
1050
|
4
|
|
|
|
|
10
|
while ( my @pieces = _find_innermost_balanced_pair( $text, $start, $end ) ) { |
|
1051
|
7
|
50
|
|
|
|
21
|
my ( $tag, $before, $after ) = map { defined $_ ? $_ : '' } @pieces; |
|
|
21
|
|
|
|
|
36
|
|
|
1052
|
7
|
|
50
|
|
|
20
|
my $extended = $tags->{link}->( $tag, $opts, $tags ) || ''; |
|
1053
|
7
|
|
|
|
|
50
|
$text = $before . $extended . $after; |
|
1054
|
|
|
|
|
|
|
} |
|
1055
|
|
|
|
|
|
|
} |
|
1056
|
|
|
|
|
|
|
else { |
|
1057
|
219
|
|
|
|
|
677
|
push @res, qr/$tags->{extended_link_delimiters}/; |
|
1058
|
|
|
|
|
|
|
} |
|
1059
|
|
|
|
|
|
|
} |
|
1060
|
|
|
|
|
|
|
|
|
1061
|
227
|
50
|
|
|
|
633
|
if (@res) { |
|
1062
|
227
|
|
|
|
|
395
|
my $re = join "|", @res; |
|
1063
|
227
|
|
|
|
|
2783
|
$text =~ s/$re/$tags->{link}->($&, $opts, $tags)/ge; |
|
|
30
|
|
|
|
|
104
|
|
|
1064
|
|
|
|
|
|
|
} |
|
1065
|
|
|
|
|
|
|
|
|
1066
|
227
|
|
|
|
|
577
|
return $text; |
|
1067
|
|
|
|
|
|
|
} |
|
1068
|
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
=head1 Wiki Format |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
Refer to L for |
|
1072
|
|
|
|
|
|
|
description of the default wiki format, as interpreted by this module. Any |
|
1073
|
|
|
|
|
|
|
discrepencies will be considered bugs in this module, with a few exceptions. |
|
1074
|
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
=head2 Unimplemented Wiki Markup |
|
1076
|
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
=over 4 |
|
1078
|
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
=item Templates, Magic Words, and Wanted Links |
|
1080
|
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
Templates, magic words, and the colorization of wanted links all require a back |
|
1082
|
|
|
|
|
|
|
end data store that can be consulted on the existance and content of named |
|
1083
|
|
|
|
|
|
|
pages. C has deliberately been constructed such that it |
|
1084
|
|
|
|
|
|
|
operates independantly from such a back end. For an interface to |
|
1085
|
|
|
|
|
|
|
C which implements these features, see |
|
1086
|
|
|
|
|
|
|
L. |
|
1087
|
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
=item Tables |
|
1089
|
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
This is on the TODO list. |
|
1091
|
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
=back |
|
1093
|
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
=head1 EXPORT |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
If you'd like to make your life more convenient, you can optionally import a |
|
1097
|
|
|
|
|
|
|
subroutine that already has default tags and options set up. This is |
|
1098
|
|
|
|
|
|
|
especially handy if you use a prefix: |
|
1099
|
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
use Text::MediawikiFormat prefix => 'http://www.example.com/'; |
|
1101
|
|
|
|
|
|
|
wikiformat ('some text'); |
|
1102
|
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
Tags are interpreted as default members of the $tags hash normally passed to |
|
1104
|
|
|
|
|
|
|
C, except for the five options (see above) and the C key, who's |
|
1105
|
|
|
|
|
|
|
value is interpreted as an alternate name for the imported function. |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
To use the C flag to control the name by which your code calls the imported |
|
1108
|
|
|
|
|
|
|
function, for example, |
|
1109
|
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
use Text::MediawikiFormat as => 'formatTextWithWikiStyle'; |
|
1111
|
|
|
|
|
|
|
formatTextWithWikiStyle ('some text'); |
|
1112
|
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
You might choose a better name, though. |
|
1114
|
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
The calling semantics are effectively the same as those of the C |
|
1116
|
|
|
|
|
|
|
function. Any additional tags or options to the imported function will |
|
1117
|
|
|
|
|
|
|
override the defaults. This code: |
|
1118
|
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
use Text::MediawikiFormat as => 'wf', extended => 0; |
|
1120
|
|
|
|
|
|
|
wf ('some text', {}, {extended => 1}); |
|
1121
|
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
enables extended links, after specifying that the default behavior should be |
|
1123
|
|
|
|
|
|
|
to disable them. |
|
1124
|
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
=head1 GORY DETAILS |
|
1126
|
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
=head2 Tags |
|
1128
|
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
There are two types of Wiki markup: phrase markup and blocks. Blocks include |
|
1130
|
|
|
|
|
|
|
lists, which are made up of lines and can also contain other lists. |
|
1131
|
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
=head3 Phrase Markup |
|
1133
|
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
The are currently three types of wiki phrase markup. These are the |
|
1135
|
|
|
|
|
|
|
strong and emphasized markup and links. Links may additionally be of three |
|
1136
|
|
|
|
|
|
|
subtypes, extended, implicit, or absolute. |
|
1137
|
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
You can change the regular expressions used to find strong and emphasized tags: |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
%tags = ( |
|
1141
|
|
|
|
|
|
|
strong_tag => qr/\*([^*]+?)\*/, |
|
1142
|
|
|
|
|
|
|
emphasized_tag => qr|/([^/]+?)/|, |
|
1143
|
|
|
|
|
|
|
); |
|
1144
|
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
$wikitext = 'this is *strong*, /emphasized/, and */em+strong/*'; |
|
1146
|
|
|
|
|
|
|
$htmltext = wikiformat ($wikitext, \%tags, {}); |
|
1147
|
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
You can also change the regular expressions used to find links. The following |
|
1149
|
|
|
|
|
|
|
just sets them to their default states (but enables parsing of implicit links, |
|
1150
|
|
|
|
|
|
|
which is I the default): |
|
1151
|
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
my $html = wikiformat |
|
1153
|
|
|
|
|
|
|
( |
|
1154
|
|
|
|
|
|
|
$raw, |
|
1155
|
|
|
|
|
|
|
{implicit_link_delimiters => qr!\b(?:[A-Z][a-z0-9]\w*){2,}!, |
|
1156
|
|
|
|
|
|
|
extended_link_delimiters => qr!\[(?:\[[^][]*\]|[^][]*)\]!, |
|
1157
|
|
|
|
|
|
|
}, |
|
1158
|
|
|
|
|
|
|
{implicit_links => 1} |
|
1159
|
|
|
|
|
|
|
); |
|
1160
|
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
In addition, you may set the function references that format strong and |
|
1162
|
|
|
|
|
|
|
emphasized text and links. The strong and emphasized functions receive only |
|
1163
|
|
|
|
|
|
|
the text to be formatted as an argument and are expected to return the |
|
1164
|
|
|
|
|
|
|
formatted text. The link formatter also recieves references to the C<$tags> |
|
1165
|
|
|
|
|
|
|
and C<$opts> arrays. For example, the following sets the strong and |
|
1166
|
|
|
|
|
|
|
emphasized formatters to their default state while replacing the link formatter |
|
1167
|
|
|
|
|
|
|
with one which strips href information and returns only the title text: |
|
1168
|
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
my $html = wikiformat |
|
1170
|
|
|
|
|
|
|
( |
|
1171
|
|
|
|
|
|
|
$raw, |
|
1172
|
|
|
|
|
|
|
{strong => sub {"$_[0]"}, |
|
1173
|
|
|
|
|
|
|
emphasized => sub {"$_[0]"}, |
|
1174
|
|
|
|
|
|
|
link => sub |
|
1175
|
|
|
|
|
|
|
{ |
|
1176
|
|
|
|
|
|
|
my ($tag, $opts, $tags) = @_; |
|
1177
|
|
|
|
|
|
|
if ($tag =~ s/^\[\[([^][]+)\]\]$/$1/) |
|
1178
|
|
|
|
|
|
|
{ |
|
1179
|
|
|
|
|
|
|
my ($page, $title) = split qr/\|/, $tag, 2; |
|
1180
|
|
|
|
|
|
|
return $title if $title; |
|
1181
|
|
|
|
|
|
|
return $page; |
|
1182
|
|
|
|
|
|
|
} |
|
1183
|
|
|
|
|
|
|
elsif ($tag =~ s/^\[([^][]+)\]$/$1/) |
|
1184
|
|
|
|
|
|
|
{ |
|
1185
|
|
|
|
|
|
|
my ($href, $title) = split qr/ /, $tag, 2; |
|
1186
|
|
|
|
|
|
|
return $title if $title; |
|
1187
|
|
|
|
|
|
|
return $href; |
|
1188
|
|
|
|
|
|
|
} |
|
1189
|
|
|
|
|
|
|
else |
|
1190
|
|
|
|
|
|
|
{ |
|
1191
|
|
|
|
|
|
|
return $tag; |
|
1192
|
|
|
|
|
|
|
} |
|
1193
|
|
|
|
|
|
|
}, |
|
1194
|
|
|
|
|
|
|
}, |
|
1195
|
|
|
|
|
|
|
); |
|
1196
|
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
=head3 Blocks |
|
1198
|
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
The default block types are C, C, C, C, |
|
1200
|
|
|
|
|
|
|
C, C, C, and C |
|
1201
|
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
Block entries in the tag hashes must contain array references. The first two |
|
1203
|
|
|
|
|
|
|
items are the tags used at the start and end of the block. The third and |
|
1204
|
|
|
|
|
|
|
fourth contain the tags used at the start and end of each line. Where there |
|
1205
|
|
|
|
|
|
|
needs to be more processing of individual lines, use a subref as the third |
|
1206
|
|
|
|
|
|
|
item. This is how the module processes ordered lines in HTML lists and |
|
1207
|
|
|
|
|
|
|
headers: |
|
1208
|
|
|
|
|
|
|
|
|
1209
|
|
|
|
|
|
|
my $html = wikiformat |
|
1210
|
|
|
|
|
|
|
( |
|
1211
|
|
|
|
|
|
|
$raw, |
|
1212
|
|
|
|
|
|
|
{ordered => ['', " \n", '', "\n"], |
|
1213
|
|
|
|
|
|
|
header => ['', "\n", \&_make_header], |
|
1214
|
|
|
|
|
|
|
}, |
|
1215
|
|
|
|
|
|
|
); |
|
1216
|
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
The first argument to these subrefs is the post-processed text of the line |
|
1218
|
|
|
|
|
|
|
itself. (Processing removes the indentation and tokens used to mark this as a |
|
1219
|
|
|
|
|
|
|
list and checks the rest of the line for other line formattings.) The second |
|
1220
|
|
|
|
|
|
|
argument is the indentation level (see below). The subsequent arguments are |
|
1221
|
|
|
|
|
|
|
captured variables in the regular expression used to find this list type. The |
|
1222
|
|
|
|
|
|
|
regexp for headers is: |
|
1223
|
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
$html = wikiformat |
|
1225
|
|
|
|
|
|
|
( |
|
1226
|
|
|
|
|
|
|
$raw, |
|
1227
|
|
|
|
|
|
|
{blocks => {header => qr/^(=+)\s*(.+?)\s*\1$/}} |
|
1228
|
|
|
|
|
|
|
); |
|
1229
|
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
The module processes indentation first, if applicable, and stores the |
|
1231
|
|
|
|
|
|
|
indentation level (the length of the indentation removed). |
|
1232
|
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
Lists automatically start and end as necessary. |
|
1234
|
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
Because regular expressions could conceivably match more than one line, block |
|
1236
|
|
|
|
|
|
|
level markup is processed in a specific order. The C tag governs |
|
1237
|
|
|
|
|
|
|
this order. It contains a reference to an array of the names of the |
|
1238
|
|
|
|
|
|
|
appropriate blocks to process. If you add a block type, be sure to add an |
|
1239
|
|
|
|
|
|
|
entry for it in C: |
|
1240
|
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
my $html = wikiformat |
|
1242
|
|
|
|
|
|
|
( |
|
1243
|
|
|
|
|
|
|
$raw, |
|
1244
|
|
|
|
|
|
|
{invisible => ['', '', '', ''], |
|
1245
|
|
|
|
|
|
|
blocks => {invisible => qr!^--(.*?)--$!}, |
|
1246
|
|
|
|
|
|
|
blockorder => [qw(code header line ordered |
|
1247
|
|
|
|
|
|
|
unordered definition invisible |
|
1248
|
|
|
|
|
|
|
paragraph_break paragraph)] |
|
1249
|
|
|
|
|
|
|
}, |
|
1250
|
|
|
|
|
|
|
}, |
|
1251
|
|
|
|
|
|
|
); |
|
1252
|
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
=head3 Finding blocks |
|
1254
|
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
As has already been mentioned in passing, C uses regular |
|
1256
|
|
|
|
|
|
|
expressions to find blocks. These are in the C<%tags> hash under the C |
|
1257
|
|
|
|
|
|
|
key. For example, to change the regular expression to find code block items, |
|
1258
|
|
|
|
|
|
|
use: |
|
1259
|
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
my $html = wikiformat ($raw, {blocks => {code => qr/^:\s+/}}); |
|
1261
|
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
This will require a leading colon to mark code lines (note that as writted |
|
1263
|
|
|
|
|
|
|
here, this would interfere with the default processing of definition lists). |
|
1264
|
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
=head3 Finding Blocks in the Correct Order |
|
1266
|
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
As intrepid bug reporter Tom Hukins pointed out in CPAN RT bug #671, the order |
|
1268
|
|
|
|
|
|
|
in which C searches for blocks varies by platform and |
|
1269
|
|
|
|
|
|
|
version of Perl. Because some block-finding regular expressions are more |
|
1270
|
|
|
|
|
|
|
specific than others, what you intend to be one type of block may turn into a |
|
1271
|
|
|
|
|
|
|
different list type. |
|
1272
|
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
If you're adding new block types, be aware of this. The C entry in |
|
1274
|
|
|
|
|
|
|
C<%tags> exists to force C to apply its regexes from |
|
1275
|
|
|
|
|
|
|
most specific to least specific. It contains an array reference. By default, |
|
1276
|
|
|
|
|
|
|
it looks for ordered lists first, unordered lists second, and code references |
|
1277
|
|
|
|
|
|
|
at the end. |
|
1278
|
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
1280
|
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
L |
|
1282
|
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
=head1 SUPPORT |
|
1284
|
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
|
1286
|
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
perldoc Text::MediawikiFormat |
|
1288
|
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
You can also look for information at: |
|
1290
|
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
=over 4 |
|
1292
|
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
|
1294
|
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
L |
|
1296
|
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
=item * CPAN Ratings |
|
1298
|
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
L |
|
1300
|
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker |
|
1302
|
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
L |
|
1304
|
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
=item * Search CPAN |
|
1306
|
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
L |
|
1308
|
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
=back |
|
1310
|
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
=head1 AUTHOR |
|
1312
|
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
Derek Price C is the author. |
|
1314
|
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
1316
|
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
This module is derived from L, written by chromatic. |
|
1318
|
|
|
|
|
|
|
chromatic's original credits are below: |
|
1319
|
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
chromatic, C, with much input from the Jellybean team |
|
1321
|
|
|
|
|
|
|
(including Jonathan Paulett). Kate L Pugh has also provided several patches, |
|
1322
|
|
|
|
|
|
|
many failing tests, and is usually the driving force behind new features and |
|
1323
|
|
|
|
|
|
|
releases. If you think this module is worth buying me a beer, she deserves at |
|
1324
|
|
|
|
|
|
|
least half of it. |
|
1325
|
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
Alex Vandiver added a nice patch and tests for extended links. |
|
1327
|
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
Tony Bowden, Tom Hukins, and Andy H. all suggested useful features that are now |
|
1329
|
|
|
|
|
|
|
implemented. |
|
1330
|
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
Sam Vilain, Chris Winters, Paul Schmidt, and Art Henry have all found and |
|
1332
|
|
|
|
|
|
|
reported silly bugs. |
|
1333
|
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
Blame me for the implementation. |
|
1335
|
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
=head1 BUGS |
|
1337
|
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
The link checker in C may fail to detect existing links that do |
|
1339
|
|
|
|
|
|
|
not follow HTML, XML, or SGML style. They may die with some SGML styles too. |
|
1340
|
|
|
|
|
|
|
I. |
|
1341
|
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
=head1 TODO |
|
1343
|
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
=over 4 |
|
1345
|
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
=item * Optimize C to work on a list of lines |
|
1347
|
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
=back |
|
1349
|
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
|
1351
|
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
Copyright (c) 2006-2008 Derek R. Price, all rights reserved. |
|
1353
|
|
|
|
|
|
|
Copyright (c) 2002 - 2006, chromatic, all rights reserved. |
|
1354
|
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
|
1356
|
|
|
|
|
|
|
under the same terms as Perl itself. |
|
1357
|
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
=cut |
|
1359
|
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
1; # End of Text::MediaiwkiFormat |
|