line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## |
2
|
|
|
|
|
|
|
# name: Stardoc::Kwim::Parser |
3
|
|
|
|
|
|
|
# abstract: Stardoc Kwim Parser Module |
4
|
|
|
|
|
|
|
# author: Ingy döt Net |
5
|
|
|
|
|
|
|
# license: perl |
6
|
|
|
|
|
|
|
# copyright: 2008, 2010, 2011 |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
package Stardoc::Kwim::Parser; |
9
|
1
|
|
|
1
|
|
1475
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
33
|
|
10
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
11
|
1
|
|
|
1
|
|
5
|
use base 'WikiText::Parser'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1154
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# Reusable regexp generators used by the grammar |
14
|
|
|
|
|
|
|
my $ALPHANUM = '\p{Letter}\p{Number}\pM'; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# These are all stolen from URI.pm |
17
|
|
|
|
|
|
|
my $reserved = q{;/?:@&=+$,[]#}; |
18
|
|
|
|
|
|
|
my $mark = q{-_.!~*'()}; |
19
|
|
|
|
|
|
|
my $unreserved = "A-Za-z0-9\Q$mark\E"; |
20
|
|
|
|
|
|
|
my $uric = quotemeta($reserved) . $unreserved . "%"; |
21
|
|
|
|
|
|
|
my %im_types = ( |
22
|
|
|
|
|
|
|
yahoo => 'yahoo', |
23
|
|
|
|
|
|
|
ymsgr => 'yahoo', |
24
|
|
|
|
|
|
|
callto => 'callto', |
25
|
|
|
|
|
|
|
skype => 'callto', |
26
|
|
|
|
|
|
|
callme => 'callto', |
27
|
|
|
|
|
|
|
aim => 'aim', |
28
|
|
|
|
|
|
|
msn => 'msn', |
29
|
|
|
|
|
|
|
asap => 'asap', |
30
|
|
|
|
|
|
|
); |
31
|
|
|
|
|
|
|
my $im_re = join '|', keys %im_types; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub create_grammar { |
34
|
0
|
|
|
0
|
0
|
|
my $all_phrases = [ |
35
|
|
|
|
|
|
|
qw(waflphrase asis wikilink a im mail tt b i del) |
36
|
|
|
|
|
|
|
]; |
37
|
0
|
|
|
|
|
|
my $all_blocks = [ |
38
|
|
|
|
|
|
|
qw( |
39
|
|
|
|
|
|
|
pre wafl_block |
40
|
|
|
|
|
|
|
hr hx |
41
|
|
|
|
|
|
|
waflparagraph |
42
|
|
|
|
|
|
|
ul ol |
43
|
|
|
|
|
|
|
blockquote table |
44
|
|
|
|
|
|
|
p empty |
45
|
|
|
|
|
|
|
else |
46
|
|
|
|
|
|
|
) |
47
|
|
|
|
|
|
|
]; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
return { |
50
|
|
|
|
|
|
|
_all_blocks => $all_blocks, |
51
|
|
|
|
|
|
|
_all_phrases => $all_phrases, |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
top => { |
54
|
|
|
|
|
|
|
blocks => $all_blocks, |
55
|
|
|
|
|
|
|
}, |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
empty => { |
58
|
|
|
|
|
|
|
match => qr/^\s*\n/, |
59
|
|
|
|
|
|
|
filter => sub { |
60
|
0
|
|
|
0
|
|
|
my $node = shift; |
61
|
0
|
|
|
|
|
|
$node->{type} = ''; |
62
|
|
|
|
|
|
|
}, |
63
|
|
|
|
|
|
|
}, |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
wafl_block => { |
66
|
|
|
|
|
|
|
match => qr/(?:^\.([\w\-]+)\ *\n)((?:.*\n)*?)(?:\.\1\ *\n|\z)/, |
67
|
|
|
|
|
|
|
}, |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
p => { |
70
|
|
|
|
|
|
|
match => qr/^( # Capture whole thing |
71
|
|
|
|
|
|
|
(?m: |
72
|
|
|
|
|
|
|
^(?! # All consecutive lines *not* starting with |
73
|
|
|
|
|
|
|
(?: |
74
|
|
|
|
|
|
|
[\#\-\*]+[\ ] | |
75
|
|
|
|
|
|
|
[\^\|\>] | |
76
|
|
|
|
|
|
|
\.\w+\s*\n | |
77
|
|
|
|
|
|
|
\{[^\}]+\}\s*\n |
78
|
|
|
|
|
|
|
) |
79
|
|
|
|
|
|
|
) |
80
|
|
|
|
|
|
|
.*\S.*\n |
81
|
|
|
|
|
|
|
)+ |
82
|
|
|
|
|
|
|
) |
83
|
|
|
|
|
|
|
(\s*\n)* # and all blank lines after |
84
|
|
|
|
|
|
|
/x, |
85
|
|
|
|
|
|
|
phrases => $all_phrases, |
86
|
0
|
|
|
0
|
|
|
filter => sub { chomp }, |
87
|
|
|
|
|
|
|
}, |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
else => { |
90
|
|
|
|
|
|
|
match => qr/^(.*)\n/, |
91
|
|
|
|
|
|
|
phrases => [], |
92
|
|
|
|
|
|
|
filter => sub { |
93
|
0
|
|
|
0
|
|
|
my $node = shift; |
94
|
0
|
|
|
|
|
|
$node->{type} = 'p'; |
95
|
|
|
|
|
|
|
}, |
96
|
|
|
|
|
|
|
}, |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
pre => { |
99
|
|
|
|
|
|
|
match => qr/^(?m:^\.pre\ *\n)((?:.*\n)*?)(?m:^\.pre\ *\n)(?:\s*\n)?/, |
100
|
|
|
|
|
|
|
}, |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
blockquote => { |
103
|
|
|
|
|
|
|
match => qr/^((?m:^>.*\n)+)(\s*\n)?/, |
104
|
|
|
|
|
|
|
blocks => $all_blocks, |
105
|
|
|
|
|
|
|
filter => sub { |
106
|
0
|
|
|
0
|
|
|
s/^>\ ?//gm; |
107
|
|
|
|
|
|
|
}, |
108
|
|
|
|
|
|
|
}, |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
waflparagraph => { |
111
|
|
|
|
|
|
|
match => qr/^\{(.*)\}[\ \t]*\n(?:\s*\n)?/, |
112
|
|
|
|
|
|
|
filter => sub { |
113
|
0
|
|
|
0
|
|
|
my $node = shift; |
114
|
0
|
|
|
|
|
|
my ($function, $options) = split /[: ]/, $node->{text}, 2; |
115
|
0
|
0
|
|
|
|
|
my $replacement = defined $1 ? $1 : ''; |
116
|
0
|
0
|
|
|
|
|
$options = '' unless defined $options; # protect against an undefined here |
117
|
0
|
|
|
|
|
|
$options =~ s/\s*(.*?)\s*/$replacement/; |
118
|
0
|
|
|
|
|
|
$node->{attributes}{function} = $function; |
119
|
0
|
|
|
|
|
|
$node->{attributes}{options} = $options; |
120
|
0
|
|
|
|
|
|
undef $_; |
121
|
|
|
|
|
|
|
}, |
122
|
|
|
|
|
|
|
}, |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
hx => { |
125
|
|
|
|
|
|
|
match => qr/^(\=+) *(.*?)(\s+=+)?\s*?\n+/, |
126
|
|
|
|
|
|
|
phrases => $all_phrases, |
127
|
|
|
|
|
|
|
filter => sub { |
128
|
0
|
|
|
0
|
|
|
my $node = shift; |
129
|
0
|
|
|
|
|
|
$node->{type} = 'h' . length($node->{1}); |
130
|
0
|
|
|
|
|
|
$_ = $node->{text} = $node->{2}; |
131
|
|
|
|
|
|
|
}, |
132
|
|
|
|
|
|
|
}, |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
ul => { |
135
|
|
|
|
|
|
|
match => re_list('[\*\-\+]'), |
136
|
|
|
|
|
|
|
blocks => [qw(ul ol subl li)], |
137
|
0
|
|
|
0
|
|
|
filter => sub { s/^[\*\-\+\#] *//mg }, |
138
|
|
|
|
|
|
|
}, |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
ol => { |
141
|
|
|
|
|
|
|
match => re_list('\#'), |
142
|
|
|
|
|
|
|
blocks => [qw(ul ol subl li)], |
143
|
0
|
|
|
0
|
|
|
filter => sub { s/^[\*\#] *//mg }, |
144
|
|
|
|
|
|
|
}, |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
subl => { |
147
|
|
|
|
|
|
|
type => 'li', |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
match => qr/^( # Block must start at beginning |
150
|
|
|
|
|
|
|
# Capture everything in $1 |
151
|
|
|
|
|
|
|
(.*)\n # Capture the whole first line |
152
|
|
|
|
|
|
|
[\*\#]+\ .*\n # Line starting with one or more bullet |
153
|
|
|
|
|
|
|
(?:[\*\#]+\ .*\n)* # Lines starting with '*' or '#' |
154
|
|
|
|
|
|
|
)(?:\s*\n)?/x, # Eat trailing lines |
155
|
|
|
|
|
|
|
blocks => [qw(ul ol li2)], |
156
|
|
|
|
|
|
|
}, |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
li => { |
159
|
|
|
|
|
|
|
match => qr/(.*)\n/, # Capture the whole line |
160
|
|
|
|
|
|
|
phrases => $all_phrases, |
161
|
|
|
|
|
|
|
}, |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
li2 => { |
164
|
|
|
|
|
|
|
type => '', |
165
|
|
|
|
|
|
|
match => qr/(.*)\n/, # Capture the whole line |
166
|
|
|
|
|
|
|
phrases => $all_phrases, |
167
|
|
|
|
|
|
|
}, |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
hr => { |
170
|
|
|
|
|
|
|
match => qr/^--+(?:\s*\n)?/, |
171
|
|
|
|
|
|
|
}, |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
table => { |
174
|
|
|
|
|
|
|
match => qr/^( |
175
|
|
|
|
|
|
|
( |
176
|
|
|
|
|
|
|
(?m:^\|.*\|\ \n(?=\|)) |
177
|
|
|
|
|
|
|
| |
178
|
|
|
|
|
|
|
(?m:^\|.*\|\ \ +\n) |
179
|
|
|
|
|
|
|
| |
180
|
|
|
|
|
|
|
(?ms:^\|.*?\|\n) |
181
|
|
|
|
|
|
|
)+ |
182
|
|
|
|
|
|
|
)(?:\s*\n)?/x, |
183
|
|
|
|
|
|
|
blocks => ['tr'], |
184
|
|
|
|
|
|
|
}, |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
tr => { |
187
|
|
|
|
|
|
|
match => qr/^((?m:^\|.*?\|(?:\n| \n(?=\|)| +\n)))/s, |
188
|
|
|
|
|
|
|
blocks => ['td'], |
189
|
0
|
|
|
0
|
|
|
filter => sub { s/\s+\z// }, |
190
|
|
|
|
|
|
|
}, |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# XXX Need to support blocks in TD |
193
|
|
|
|
|
|
|
td => { |
194
|
|
|
|
|
|
|
match => qr/\|?\s*(.*?)\s*\|\n?/s, |
195
|
|
|
|
|
|
|
phrases => $all_phrases, |
196
|
|
|
|
|
|
|
}, |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
wikilink => { |
199
|
|
|
|
|
|
|
match => qr/ |
200
|
|
|
|
|
|
|
(?:"([^"]*)"\s*)?(?:^|(?<=[^$ALPHANUM]))\[(?=[^\s\[\]]) |
201
|
|
|
|
|
|
|
(.*?) |
202
|
|
|
|
|
|
|
\](?=[^$ALPHANUM]|\z) |
203
|
|
|
|
|
|
|
/x, |
204
|
|
|
|
|
|
|
filter => sub { |
205
|
0
|
|
|
0
|
|
|
my $node = shift; |
206
|
0
|
|
|
|
|
|
$node->{attributes}{target} = $node->{2}; |
207
|
0
|
|
0
|
|
|
|
$_ = $node->{1} || $node->{2}; |
208
|
|
|
|
|
|
|
}, |
209
|
|
|
|
|
|
|
}, |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
b => { |
212
|
|
|
|
|
|
|
match => re_huggy(q{\*}), |
213
|
|
|
|
|
|
|
phrases => $all_phrases, |
214
|
|
|
|
|
|
|
}, |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
tt => { |
217
|
|
|
|
|
|
|
match => re_huggy(q{\`}), |
218
|
|
|
|
|
|
|
}, |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
i => { |
221
|
|
|
|
|
|
|
match => Stardoc::Kwim::Parser::re_huggy(q{\_}), |
222
|
|
|
|
|
|
|
phrases => $all_phrases, |
223
|
|
|
|
|
|
|
}, |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
del => { |
226
|
|
|
|
|
|
|
match => re_huggy(q{\-}), |
227
|
|
|
|
|
|
|
phrases => $all_phrases, |
228
|
|
|
|
|
|
|
}, |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
im => { |
231
|
|
|
|
|
|
|
match => qr/(\b(?:$im_re)\:[^\s\>\)]+)/, |
232
|
|
|
|
|
|
|
filter => sub { |
233
|
0
|
|
|
0
|
|
|
my $node = shift; |
234
|
0
|
|
|
|
|
|
my ($type, $id) = split /:/, $node->{text}, 2; |
235
|
0
|
|
|
|
|
|
$node->{attributes}{type} = $type; |
236
|
0
|
|
|
|
|
|
$node->{attributes}{id} = $id; |
237
|
0
|
|
|
|
|
|
undef $_; |
238
|
|
|
|
|
|
|
}, |
239
|
|
|
|
|
|
|
}, |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
waflphrase => { |
242
|
|
|
|
|
|
|
match => qr/ |
243
|
|
|
|
|
|
|
(?:^|(?<=[\s\-])) |
244
|
|
|
|
|
|
|
(?:"(.+?)")? |
245
|
|
|
|
|
|
|
\{ |
246
|
|
|
|
|
|
|
([\w-]+) |
247
|
|
|
|
|
|
|
(?=[\:\ \}]) |
248
|
|
|
|
|
|
|
(?:\s*:)? |
249
|
|
|
|
|
|
|
\s*(.*?)\s* |
250
|
|
|
|
|
|
|
\} |
251
|
|
|
|
|
|
|
(?=[^A-Za-z0-9]|\z) |
252
|
|
|
|
|
|
|
/x, |
253
|
|
|
|
|
|
|
filter => sub { |
254
|
0
|
|
|
0
|
|
|
my $node = shift; |
255
|
0
|
|
|
|
|
|
my ($label, $function, $options) = @{$node}{qw(1 2 3)}; |
|
0
|
|
|
|
|
|
|
256
|
0
|
|
0
|
|
|
|
$label ||= ''; |
257
|
0
|
|
|
|
|
|
$node->{attributes}{function} = $function; |
258
|
0
|
|
|
|
|
|
$node->{attributes}{options} = $options; |
259
|
0
|
|
|
|
|
|
$_ = $label; |
260
|
|
|
|
|
|
|
}, |
261
|
|
|
|
|
|
|
}, |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
mail => { |
264
|
|
|
|
|
|
|
match => qr/ |
265
|
|
|
|
|
|
|
(?:"([^"]*)"\s*)? |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
(?:mailto:)? |
268
|
|
|
|
|
|
|
([\w+%\-\.]+@(?:[\w\-]+\.)+[\w\-]+) |
269
|
|
|
|
|
|
|
>? |
270
|
|
|
|
|
|
|
/x, |
271
|
|
|
|
|
|
|
filter => sub { |
272
|
0
|
|
|
0
|
|
|
my $node = shift; |
273
|
0
|
|
0
|
|
|
|
$_ = $node->{1} || $node->{2}; |
274
|
0
|
|
|
|
|
|
$node->{attributes}{address} = $node->{2}; |
275
|
|
|
|
|
|
|
}, |
276
|
|
|
|
|
|
|
}, |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
a => { |
279
|
|
|
|
|
|
|
type => 'hyperlink', |
280
|
|
|
|
|
|
|
match => qr{ |
281
|
|
|
|
|
|
|
(?:"([^"]*)"\s*)? |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
( |
284
|
|
|
|
|
|
|
(?:http|https|ftp|irc|file): |
285
|
|
|
|
|
|
|
(?://)? |
286
|
|
|
|
|
|
|
[$uric]+ |
287
|
|
|
|
|
|
|
[A-Za-z0-9/#] |
288
|
|
|
|
|
|
|
) |
289
|
|
|
|
|
|
|
>? |
290
|
|
|
|
|
|
|
}x, |
291
|
|
|
|
|
|
|
filter => sub { |
292
|
0
|
|
|
0
|
|
|
my $node = shift; |
293
|
0
|
|
0
|
|
|
|
$_ = $node->{1} || $node->{2}; |
294
|
0
|
|
|
|
|
|
$node->{attributes}{target} = $node->{2}; |
295
|
|
|
|
|
|
|
}, |
296
|
|
|
|
|
|
|
}, |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
asis => { |
299
|
|
|
|
|
|
|
match => qr/ |
300
|
|
|
|
|
|
|
\{\{ |
301
|
|
|
|
|
|
|
(.*?) |
302
|
|
|
|
|
|
|
\}\}(\}*) |
303
|
|
|
|
|
|
|
/xs, |
304
|
|
|
|
|
|
|
filter => sub { |
305
|
0
|
|
|
0
|
|
|
my $node = shift; |
306
|
0
|
|
|
|
|
|
$node->{type} = ''; |
307
|
0
|
|
|
|
|
|
$_ = $node->{1} . $node->{2}; |
308
|
|
|
|
|
|
|
}, |
309
|
|
|
|
|
|
|
}, |
310
|
|
|
|
|
|
|
|
311
|
0
|
|
|
|
|
|
}; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub re_huggy { |
315
|
0
|
|
|
0
|
0
|
|
my $brace1 = shift; |
316
|
0
|
|
0
|
|
|
|
my $brace2 = shift || $brace1; |
317
|
|
|
|
|
|
|
|
318
|
0
|
|
|
|
|
|
qr/ |
319
|
|
|
|
|
|
|
(?:^|(?<=[^{$ALPHANUM}$brace1]))$brace1(?=\S)(?!$brace2) |
320
|
|
|
|
|
|
|
(.*?) |
321
|
|
|
|
|
|
|
$brace2(?=[^{$ALPHANUM}$brace2]|\z) |
322
|
|
|
|
|
|
|
/x; |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub re_list { |
326
|
0
|
|
|
0
|
0
|
|
my $bullet = shift; |
327
|
0
|
|
|
|
|
|
return qr/^( # Block must start at beginning |
328
|
|
|
|
|
|
|
# Capture everything in $1 |
329
|
|
|
|
|
|
|
^$bullet+\ .*\n # Line starting with one or more bullet |
330
|
|
|
|
|
|
|
(?:[\*\-\+\#]+\ .*\n)* # Lines starting with '*' or '#' |
331
|
|
|
|
|
|
|
)(?:\s*\n)?/x, # Eat trailing lines |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
1; |