| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
5
|
|
|
5
|
|
1213
|
use strict; use warnings; |
|
|
5
|
|
|
5
|
|
12
|
|
|
|
5
|
|
|
|
|
216
|
|
|
|
5
|
|
|
|
|
26
|
|
|
|
5
|
|
|
|
|
8
|
|
|
|
5
|
|
|
|
|
7328
|
|
|
2
|
|
|
|
|
|
|
package WikiText::Parser; |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
sub new { |
|
5
|
74
|
|
|
74
|
0
|
105
|
my $class = shift; |
|
6
|
74
|
|
66
|
|
|
458
|
return bless { @_ }, ref($class) || $class; |
|
7
|
|
|
|
|
|
|
} |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub parse { |
|
10
|
18
|
|
|
18
|
0
|
79
|
my $self = shift; |
|
11
|
18
|
|
66
|
|
|
118
|
$self->{input} ||= shift; |
|
12
|
18
|
100
|
|
|
|
77
|
$self->{input} .= "\n" |
|
13
|
|
|
|
|
|
|
if substr($self->{input}, -1) ne "\n"; |
|
14
|
18
|
|
33
|
|
|
98
|
$self->{grammar} ||= $self->set_grammar; |
|
15
|
18
|
|
33
|
|
|
125
|
$self->{receiver} ||= $self->set_receiver; |
|
16
|
18
|
|
|
|
|
94
|
$self->{receiver}->init; |
|
17
|
18
|
|
|
|
|
59
|
$self->parse_blocks('top'); |
|
18
|
17
|
|
|
|
|
74
|
return $self->{receiver}->content; |
|
19
|
|
|
|
|
|
|
} |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
sub set_receiver { |
|
22
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
23
|
0
|
|
0
|
|
|
0
|
$self->{receiver} = shift || $self->create_receiver; |
|
24
|
|
|
|
|
|
|
} |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub set_grammar { |
|
27
|
18
|
|
|
18
|
0
|
31
|
my $self = shift; |
|
28
|
18
|
|
33
|
|
|
107
|
$self->{grammar} = shift || $self->create_grammar; |
|
29
|
|
|
|
|
|
|
} |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub parse_blocks { |
|
32
|
18
|
|
|
18
|
0
|
27
|
my $self = shift; |
|
33
|
18
|
|
|
|
|
28
|
my $container_type = shift; |
|
34
|
18
|
|
|
|
|
46
|
my $types = $self->{grammar}{$container_type}{blocks}; |
|
35
|
18
|
|
|
|
|
73
|
while (my $length = length $self->{input}) { |
|
36
|
33
|
|
|
|
|
63
|
for my $type (@$types) { |
|
37
|
137
|
100
|
|
|
|
298
|
my $matched = $self->find_match(matched_block => $type) or next; |
|
38
|
32
|
|
|
|
|
94
|
substr($self->{input}, 0, $matched->{end}, ''); |
|
39
|
32
|
|
|
|
|
82
|
$self->handle_match($type, $matched); |
|
40
|
32
|
|
|
|
|
87
|
last; |
|
41
|
|
|
|
|
|
|
} |
|
42
|
33
|
100
|
|
|
|
170
|
die $self->reduction_error |
|
43
|
|
|
|
|
|
|
unless length($self->{input}) < $length; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
17
|
|
|
|
|
97
|
return; |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub parse_phrases { |
|
49
|
56
|
|
|
56
|
0
|
73
|
my $self = shift; |
|
50
|
56
|
|
|
|
|
76
|
my $container_type = shift; |
|
51
|
56
|
|
|
|
|
108
|
my $types = $self->{grammar}{$container_type}{phrases}; |
|
52
|
56
|
|
100
|
|
|
272
|
while (defined $self->{input} and length $self->{input}) { |
|
53
|
72
|
|
|
|
|
83
|
my $match; |
|
54
|
72
|
|
|
|
|
137
|
for my $type (@$types) { |
|
55
|
114
|
100
|
|
|
|
250
|
my $matched = $self->find_match(matched_phrase => $type) or next; |
|
56
|
36
|
100
|
100
|
|
|
209
|
if (not defined $match or $matched->{begin} < $match->{begin}) { |
|
57
|
30
|
|
|
|
|
38
|
$match = $matched; |
|
58
|
30
|
|
|
|
|
68
|
$match->{type} = $type; |
|
59
|
30
|
100
|
|
|
|
91
|
last if $match->{begin} == 0; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
} |
|
62
|
72
|
100
|
|
|
|
154
|
if (! $match) { |
|
63
|
48
|
|
|
|
|
157
|
$self->{receiver}->text_node($self->{input}); |
|
64
|
48
|
|
|
|
|
67
|
last; |
|
65
|
|
|
|
|
|
|
} |
|
66
|
24
|
|
|
|
|
32
|
my ($begin, $end, $type) = @{$match}{qw(begin end type)}; |
|
|
24
|
|
|
|
|
58
|
|
|
67
|
24
|
100
|
|
|
|
100
|
$self->{receiver}->text_node(substr($self->{input}, 0, $begin)) |
|
68
|
|
|
|
|
|
|
unless $begin == 0; |
|
69
|
24
|
|
|
|
|
56
|
substr($self->{input}, 0, $end, ''); |
|
70
|
24
|
|
|
|
|
37
|
$type = $match->{type}; |
|
71
|
24
|
|
|
|
|
86
|
$self->handle_match($type, $match); |
|
72
|
|
|
|
|
|
|
} |
|
73
|
56
|
|
|
|
|
108
|
return; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub find_match { |
|
77
|
251
|
|
|
251
|
0
|
383
|
my ($self, $matched_func, $type) = @_; |
|
78
|
251
|
|
|
|
|
231
|
my $matched; |
|
79
|
251
|
50
|
|
|
|
680
|
if (my $regexp = $self->{grammar}{$type}{match}) { |
|
80
|
251
|
50
|
|
|
|
452
|
if (ref($regexp) eq 'ARRAY') { |
|
81
|
0
|
|
|
|
|
0
|
for my $re (@$regexp) { |
|
82
|
0
|
0
|
|
|
|
0
|
if ($self->{input} =~ $re) { |
|
83
|
0
|
|
|
|
|
0
|
$matched = $self->$matched_func; |
|
84
|
0
|
|
|
|
|
0
|
last; |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
} |
|
87
|
0
|
0
|
|
|
|
0
|
return unless $matched; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
else { |
|
90
|
251
|
100
|
|
|
|
1937
|
return unless $self->{input} =~ $regexp; |
|
91
|
68
|
|
|
|
|
310
|
$matched = $self->$matched_func; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
else { |
|
95
|
0
|
|
|
|
|
0
|
my $func = "match_$type"; |
|
96
|
0
|
0
|
|
|
|
0
|
$matched = $self->$func or return; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
68
|
|
|
|
|
342
|
return $matched; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub handle_match { |
|
102
|
56
|
|
|
56
|
0
|
88
|
my ($self, $type, $match) = @_; |
|
103
|
56
|
|
|
|
|
135
|
my $func = "handle_$type"; |
|
104
|
56
|
50
|
|
|
|
308
|
if ($self->can($func)) { |
|
105
|
0
|
|
|
|
|
0
|
$self->$func($match, $type); |
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
else { |
|
108
|
56
|
|
|
|
|
106
|
my $grammar = $self->{grammar}{$type}; |
|
109
|
56
|
50
|
|
|
|
105
|
my $parse = $grammar->{blocks} |
|
110
|
|
|
|
|
|
|
? 'parse_blocks' |
|
111
|
|
|
|
|
|
|
: 'parse_phrases'; |
|
112
|
56
|
100
|
|
|
|
152
|
my @filter = $grammar->{filter} |
|
113
|
|
|
|
|
|
|
? ($grammar->{filter}) |
|
114
|
|
|
|
|
|
|
: (); |
|
115
|
56
|
|
|
|
|
160
|
$self->subparse($parse, $match, $type, @filter); |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub subparse { |
|
120
|
56
|
|
|
56
|
0
|
90
|
my ($self, $func, $match, $type, $filter) = @_; |
|
121
|
56
|
50
|
|
|
|
245
|
$match->{type} = |
|
122
|
|
|
|
|
|
|
exists $self->{grammar}{$type}{type} |
|
123
|
|
|
|
|
|
|
? $self->{grammar}{$type}{type} |
|
124
|
|
|
|
|
|
|
: $type; |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
my $parser = $self->new( |
|
127
|
|
|
|
|
|
|
grammar => $self->{grammar}, |
|
128
|
|
|
|
|
|
|
receiver => $self->{receiver}->new, |
|
129
|
|
|
|
|
|
|
input => $filter |
|
130
|
56
|
100
|
|
|
|
194
|
? do { $_ = $match->{text}; &$filter($match); $_ } |
|
|
21
|
|
|
|
|
45
|
|
|
|
21
|
|
|
|
|
62
|
|
|
|
21
|
|
|
|
|
59
|
|
|
131
|
|
|
|
|
|
|
: $match->{text}, |
|
132
|
|
|
|
|
|
|
); |
|
133
|
56
|
50
|
|
|
|
282
|
$self->{receiver}->begin_node($match) |
|
134
|
|
|
|
|
|
|
if $match->{type}; |
|
135
|
56
|
|
|
|
|
193
|
$parser->$func($type); |
|
136
|
56
|
|
|
|
|
226
|
$self->{receiver}->insert($parser->{receiver}); |
|
137
|
56
|
50
|
|
|
|
283
|
$self->{receiver}->end_node($match) |
|
138
|
|
|
|
|
|
|
if $match->{type}; |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub reduction_error { |
|
142
|
1
|
|
|
1
|
0
|
3
|
my $self = shift; |
|
143
|
1
|
|
|
|
|
3
|
my $input = $self->{input}; |
|
144
|
1
|
|
|
|
|
10
|
$input =~ s/^((.*\n){2}).*/$1/; |
|
145
|
1
|
|
|
|
|
5
|
chomp $input; |
|
146
|
1
|
|
|
|
|
61
|
return ref($self) . qq[ reduction error for:\n"$input"]; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub matched_block { |
|
150
|
32
|
50
|
|
32
|
0
|
128
|
my $begin = defined $_[2] ? $_[2] : $-[0]; |
|
151
|
32
|
50
|
|
|
|
108
|
die "All blocks must match at position 0" |
|
152
|
|
|
|
|
|
|
if "$begin" ne "0"; |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
return +{ |
|
155
|
32
|
|
33
|
|
|
415
|
text => ($_[1] || $1), |
|
|
|
|
33
|
|
|
|
|
|
156
|
|
|
|
|
|
|
end => ($_[3] || $+[0]), |
|
157
|
|
|
|
|
|
|
1 => $1, |
|
158
|
|
|
|
|
|
|
2 => $2, |
|
159
|
|
|
|
|
|
|
3 => $3, |
|
160
|
|
|
|
|
|
|
}; |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub matched_phrase { |
|
164
|
|
|
|
|
|
|
return +{ |
|
165
|
36
|
50
|
33
|
36
|
0
|
482
|
text => ($_[1] || $1), |
|
|
|
|
33
|
|
|
|
|
|
166
|
|
|
|
|
|
|
begin => (defined $_[2] ? $_[2] : $-[0]), |
|
167
|
|
|
|
|
|
|
end => ($_[3] || $+[0]), |
|
168
|
|
|
|
|
|
|
1 => $1, |
|
169
|
|
|
|
|
|
|
2 => $2, |
|
170
|
|
|
|
|
|
|
3 => $3, |
|
171
|
|
|
|
|
|
|
}; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
1; |