line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## Base Class for Creating Text Format Parsers |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Document::Parser is a base class that you can use to easily generate a |
4
|
|
|
|
|
|
|
# parser for text document markups (like Wiki or POD markups). |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# See this parser as an example: |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Parser.pm |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# And this module for usage of the parser: |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Formatter2.pm |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
# Copyright (c) 2007. Ingy döt Net. All rights reserved. |
15
|
|
|
|
|
|
|
# |
16
|
|
|
|
|
|
|
# Licensed under the same terms as Perl itself. |
17
|
|
|
|
|
|
|
## |
18
|
|
|
|
|
|
|
package Document::Parser; |
19
|
1
|
|
|
1
|
|
367
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
27
|
|
20
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1120
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
## Synopsis: |
23
|
|
|
|
|
|
|
# |
24
|
|
|
|
|
|
|
# package MyParser; |
25
|
|
|
|
|
|
|
# use base 'Document::Parser'; |
26
|
|
|
|
|
|
|
# |
27
|
|
|
|
|
|
|
# sub create_grammar { |
28
|
|
|
|
|
|
|
# return { |
29
|
|
|
|
|
|
|
# # ... define a grammar hash here ... |
30
|
|
|
|
|
|
|
# }; |
31
|
|
|
|
|
|
|
# } |
32
|
|
|
|
|
|
|
## |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
##------------------------------------------------------------------------------ |
35
|
|
|
|
|
|
|
# Parser object constructor/initializer |
36
|
|
|
|
|
|
|
##------------------------------------------------------------------------------ |
37
|
|
|
|
|
|
|
sub new { |
38
|
0
|
|
|
0
|
0
|
|
my $class = shift; |
39
|
0
|
|
0
|
|
|
|
return bless { @_ }, ref($class) || $class; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
##------------------------------------------------------------------------------ |
43
|
|
|
|
|
|
|
# $parsed = $parser->parse($wikitext); |
44
|
|
|
|
|
|
|
##------------------------------------------------------------------------------ |
45
|
|
|
|
|
|
|
sub parse { |
46
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
47
|
0
|
|
0
|
|
|
|
$self->{input} ||= shift; |
48
|
0
|
|
0
|
|
|
|
$self->{grammar} ||= $self->set_grammar; |
49
|
0
|
|
0
|
|
|
|
$self->{receiver} ||= $self->set_receiver; |
50
|
0
|
|
|
|
|
|
$self->{receiver}->init; |
51
|
0
|
|
|
|
|
|
$self->parse_blocks('top'); |
52
|
0
|
|
|
|
|
|
return $self->{receiver}->content; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
## |
56
|
|
|
|
|
|
|
# Call `set_receiver` to reset the receiver for a new parse. |
57
|
|
|
|
|
|
|
sub set_receiver { |
58
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
59
|
0
|
|
0
|
|
|
|
$self->{receiver} = shift || $self->create_receiver; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub set_grammar { |
63
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
64
|
0
|
|
0
|
|
|
|
$self->{grammar} = shift || $self->create_grammar; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
68
|
|
|
|
|
|
|
# Parse input into a series of blocks. With each iteration the parser must |
69
|
|
|
|
|
|
|
# match a block at position 0 of the text, and remove that block from the |
70
|
|
|
|
|
|
|
# input reparse it further. This continues until there is no input left. |
71
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
72
|
|
|
|
|
|
|
sub parse_blocks { |
73
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
74
|
0
|
|
|
|
|
|
my $container_type = shift; |
75
|
0
|
|
|
|
|
|
my $types = $self->{grammar}{$container_type}{blocks}; |
76
|
0
|
|
|
|
|
|
while (my $length = length $self->{input}) { |
77
|
0
|
|
|
|
|
|
for my $type (@$types) { |
78
|
0
|
0
|
|
|
|
|
my $matched = $self->find_match(matched_block => $type) or next; |
79
|
0
|
|
|
|
|
|
substr($self->{input}, 0, $matched->{end}, ''); |
80
|
0
|
|
|
|
|
|
$self->handle_match($type, $matched); |
81
|
0
|
|
|
|
|
|
last; |
82
|
|
|
|
|
|
|
} |
83
|
0
|
0
|
|
|
|
|
die $self->reduction_error |
84
|
|
|
|
|
|
|
unless length($self->{input}) < $length; |
85
|
|
|
|
|
|
|
} |
86
|
0
|
|
|
|
|
|
return; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
90
|
|
|
|
|
|
|
# This code parses a chunk into interleaved pieces of plain text and |
91
|
|
|
|
|
|
|
# phrases. It repeatedly tries to match every possible phrase and |
92
|
|
|
|
|
|
|
# then takes the match closest to the start. Everything before a |
93
|
|
|
|
|
|
|
# match is written as text. Matched phrases are subparsed according |
94
|
|
|
|
|
|
|
# to their rules. This continues until the input is all eaten. |
95
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
96
|
|
|
|
|
|
|
sub parse_phrases { |
97
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
98
|
0
|
|
|
|
|
|
my $container_type = shift; |
99
|
0
|
|
|
|
|
|
my $types = $self->{grammar}{$container_type}{phrases}; |
100
|
0
|
|
|
|
|
|
while (length $self->{input}) { |
101
|
0
|
|
|
|
|
|
my $match; |
102
|
0
|
|
|
|
|
|
for my $type (@$types) { |
103
|
0
|
0
|
|
|
|
|
my $matched = $self->find_match(matched_phrase => $type) or next; |
104
|
0
|
0
|
0
|
|
|
|
if (not defined $match or $matched->{begin} < $match->{begin}) { |
105
|
0
|
|
|
|
|
|
$match = $matched; |
106
|
0
|
|
|
|
|
|
$match->{type} = $type; |
107
|
0
|
0
|
|
|
|
|
last if $match->{begin} == 0; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
} |
110
|
0
|
0
|
|
|
|
|
if (! $match) { |
111
|
0
|
|
|
|
|
|
$self->{receiver}->text_node($self->{input}); |
112
|
0
|
|
|
|
|
|
last; |
113
|
|
|
|
|
|
|
} |
114
|
0
|
|
|
|
|
|
my ($begin, $end, $type) = @{$match}{qw(begin end type)}; |
|
0
|
|
|
|
|
|
|
115
|
0
|
0
|
|
|
|
|
$self->{receiver}->text_node(substr($self->{input}, 0, $begin)) |
116
|
|
|
|
|
|
|
unless $begin == 0; |
117
|
0
|
|
|
|
|
|
substr($self->{input}, 0, $end, ''); |
118
|
0
|
|
|
|
|
|
$type = $match->{type}; |
119
|
0
|
|
|
|
|
|
$self->handle_match($type, $match); |
120
|
|
|
|
|
|
|
} |
121
|
0
|
|
|
|
|
|
return; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub find_match { |
125
|
0
|
|
|
0
|
0
|
|
my ($self, $matched_func, $type) = @_; |
126
|
0
|
|
|
|
|
|
my $matched; |
127
|
0
|
0
|
|
|
|
|
if (my $regexp = $self->{grammar}{$type}{match}) { |
128
|
0
|
0
|
|
|
|
|
if (ref($regexp) eq 'ARRAY') { |
129
|
0
|
|
|
|
|
|
for my $re (@$regexp) { |
130
|
0
|
0
|
|
|
|
|
if ($self->{input} =~ $re) { |
131
|
0
|
|
|
|
|
|
$matched = $self->$matched_func; |
132
|
0
|
|
|
|
|
|
last; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
} |
135
|
0
|
0
|
|
|
|
|
return unless $matched; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
else { |
138
|
0
|
0
|
|
|
|
|
return unless $self->{input} =~ $regexp; |
139
|
0
|
|
|
|
|
|
$matched = $self->$matched_func; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
else { |
143
|
0
|
|
|
|
|
|
my $func = "match_$type"; |
144
|
0
|
0
|
|
|
|
|
$matched = $self->$func or return; |
145
|
|
|
|
|
|
|
} |
146
|
0
|
|
|
|
|
|
return $matched; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub handle_match { |
150
|
0
|
|
|
0
|
0
|
|
my ($self, $type, $match) = @_; |
151
|
0
|
|
|
|
|
|
my $func = "handle_$type"; |
152
|
0
|
0
|
|
|
|
|
if ($self->can($func)) { |
153
|
0
|
|
|
|
|
|
$self->$func($match, $type); |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
else { |
156
|
0
|
|
|
|
|
|
my $grammar = $self->{grammar}{$type}; |
157
|
0
|
0
|
|
|
|
|
my $parse = $grammar->{blocks} |
158
|
|
|
|
|
|
|
? 'parse_blocks' |
159
|
|
|
|
|
|
|
: 'parse_phrases'; |
160
|
0
|
0
|
|
|
|
|
my @filter = $grammar->{filter} |
161
|
|
|
|
|
|
|
? ($grammar->{filter}) |
162
|
|
|
|
|
|
|
: (); |
163
|
0
|
|
|
|
|
|
$self->subparse($parse, $match, $type, @filter); |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub subparse { |
168
|
0
|
|
|
0
|
0
|
|
my ($self, $func, $match, $type, $filter) = @_; |
169
|
0
|
|
|
|
|
|
$self->{receiver}->begin_node($type); |
170
|
|
|
|
|
|
|
my $parser = $self->new( |
171
|
|
|
|
|
|
|
grammar => $self->{grammar}, |
172
|
|
|
|
|
|
|
receiver => $self->{receiver}->new, |
173
|
|
|
|
|
|
|
input => $filter |
174
|
0
|
0
|
|
|
|
|
? do { $_ = $match->{text}; &$filter(); $_ } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
: $match->{text}, |
176
|
|
|
|
|
|
|
); |
177
|
0
|
|
|
|
|
|
$parser->$func($type); |
178
|
0
|
|
|
|
|
|
$self->{receiver}->insert($parser->{receiver}); |
179
|
0
|
|
|
|
|
|
$self->{receiver}->end_node($type); |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
183
|
|
|
|
|
|
|
# Helper functions |
184
|
|
|
|
|
|
|
# |
185
|
|
|
|
|
|
|
# These are the odds and ends called by the code above. |
186
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
sub reduction_error { |
189
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
190
|
0
|
|
|
|
|
|
return ref($self) . qq[ reduction error for:\n"$self->{input}"]; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub matched_block { |
194
|
0
|
0
|
|
0
|
0
|
|
my $begin = defined $_[2] ? $_[2] : $-[0]; |
195
|
0
|
0
|
|
|
|
|
die "All blocks must match at position 0" |
196
|
|
|
|
|
|
|
if "$begin" ne "0"; |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
return +{ |
199
|
0
|
|
0
|
|
|
|
text => ($_[1] || $1), |
|
|
|
0
|
|
|
|
|
200
|
|
|
|
|
|
|
end => ($_[3] || $+[0]), |
201
|
|
|
|
|
|
|
}; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub matched_phrase { |
205
|
|
|
|
|
|
|
return +{ |
206
|
0
|
0
|
0
|
0
|
0
|
|
text => ($_[1] || $1), |
|
|
|
0
|
|
|
|
|
207
|
|
|
|
|
|
|
begin => (defined $_[2] ? $_[2] : $-[0]), |
208
|
|
|
|
|
|
|
end => ($_[3] || $+[0]), |
209
|
|
|
|
|
|
|
}; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
1; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=for perldoc |
215
|
|
|
|
|
|
|
This POD generated by Perldoc-0.21. |
216
|
|
|
|
|
|
|
DO NOT EDIT. Your changes will be lost. |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=encoding utf8 |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=head1 NAME |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Document::Parser - Base Class for Creating Text Format Parsers |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head1 SYNOPSIS |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
package MyParser; |
227
|
|
|
|
|
|
|
use base 'Document::Parser'; |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub create_grammar { |
230
|
|
|
|
|
|
|
return { |
231
|
|
|
|
|
|
|
# ... define a grammar hash here ... |
232
|
|
|
|
|
|
|
}; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=head1 DESCRIPTION |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Document::Parser is a base class that you can use to easily generate a |
238
|
|
|
|
|
|
|
parser for text document markups (like Wiki or POD markups). |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
See this parser as an example: |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Parser.pm |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
And this module for usage of the parser: |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Formatter2.pm |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head1 AUTHOR |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Ingy döt Net |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head1 COPYRIGHT |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Copyright (c) 2007. Ingy döt Net. All rights reserved. |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
257
|
|
|
|
|
|
|
under the same terms as Perl itself. |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
See http://www.perl.com/perl/misc/Artistic.html |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=cut |