line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
5
|
|
|
5
|
|
1213
|
use strict; use warnings; |
|
5
|
|
|
5
|
|
12
|
|
|
5
|
|
|
|
|
216
|
|
|
5
|
|
|
|
|
26
|
|
|
5
|
|
|
|
|
8
|
|
|
5
|
|
|
|
|
7328
|
|
2
|
|
|
|
|
|
|
package WikiText::Parser; |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
sub new { |
5
|
74
|
|
|
74
|
0
|
105
|
my $class = shift; |
6
|
74
|
|
66
|
|
|
458
|
return bless { @_ }, ref($class) || $class; |
7
|
|
|
|
|
|
|
} |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub parse { |
10
|
18
|
|
|
18
|
0
|
79
|
my $self = shift; |
11
|
18
|
|
66
|
|
|
118
|
$self->{input} ||= shift; |
12
|
18
|
100
|
|
|
|
77
|
$self->{input} .= "\n" |
13
|
|
|
|
|
|
|
if substr($self->{input}, -1) ne "\n"; |
14
|
18
|
|
33
|
|
|
98
|
$self->{grammar} ||= $self->set_grammar; |
15
|
18
|
|
33
|
|
|
125
|
$self->{receiver} ||= $self->set_receiver; |
16
|
18
|
|
|
|
|
94
|
$self->{receiver}->init; |
17
|
18
|
|
|
|
|
59
|
$self->parse_blocks('top'); |
18
|
17
|
|
|
|
|
74
|
return $self->{receiver}->content; |
19
|
|
|
|
|
|
|
} |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
sub set_receiver { |
22
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
23
|
0
|
|
0
|
|
|
0
|
$self->{receiver} = shift || $self->create_receiver; |
24
|
|
|
|
|
|
|
} |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub set_grammar { |
27
|
18
|
|
|
18
|
0
|
31
|
my $self = shift; |
28
|
18
|
|
33
|
|
|
107
|
$self->{grammar} = shift || $self->create_grammar; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub parse_blocks { |
32
|
18
|
|
|
18
|
0
|
27
|
my $self = shift; |
33
|
18
|
|
|
|
|
28
|
my $container_type = shift; |
34
|
18
|
|
|
|
|
46
|
my $types = $self->{grammar}{$container_type}{blocks}; |
35
|
18
|
|
|
|
|
73
|
while (my $length = length $self->{input}) { |
36
|
33
|
|
|
|
|
63
|
for my $type (@$types) { |
37
|
137
|
100
|
|
|
|
298
|
my $matched = $self->find_match(matched_block => $type) or next; |
38
|
32
|
|
|
|
|
94
|
substr($self->{input}, 0, $matched->{end}, ''); |
39
|
32
|
|
|
|
|
82
|
$self->handle_match($type, $matched); |
40
|
32
|
|
|
|
|
87
|
last; |
41
|
|
|
|
|
|
|
} |
42
|
33
|
100
|
|
|
|
170
|
die $self->reduction_error |
43
|
|
|
|
|
|
|
unless length($self->{input}) < $length; |
44
|
|
|
|
|
|
|
} |
45
|
17
|
|
|
|
|
97
|
return; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub parse_phrases { |
49
|
56
|
|
|
56
|
0
|
73
|
my $self = shift; |
50
|
56
|
|
|
|
|
76
|
my $container_type = shift; |
51
|
56
|
|
|
|
|
108
|
my $types = $self->{grammar}{$container_type}{phrases}; |
52
|
56
|
|
100
|
|
|
272
|
while (defined $self->{input} and length $self->{input}) { |
53
|
72
|
|
|
|
|
83
|
my $match; |
54
|
72
|
|
|
|
|
137
|
for my $type (@$types) { |
55
|
114
|
100
|
|
|
|
250
|
my $matched = $self->find_match(matched_phrase => $type) or next; |
56
|
36
|
100
|
100
|
|
|
209
|
if (not defined $match or $matched->{begin} < $match->{begin}) { |
57
|
30
|
|
|
|
|
38
|
$match = $matched; |
58
|
30
|
|
|
|
|
68
|
$match->{type} = $type; |
59
|
30
|
100
|
|
|
|
91
|
last if $match->{begin} == 0; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
} |
62
|
72
|
100
|
|
|
|
154
|
if (! $match) { |
63
|
48
|
|
|
|
|
157
|
$self->{receiver}->text_node($self->{input}); |
64
|
48
|
|
|
|
|
67
|
last; |
65
|
|
|
|
|
|
|
} |
66
|
24
|
|
|
|
|
32
|
my ($begin, $end, $type) = @{$match}{qw(begin end type)}; |
|
24
|
|
|
|
|
58
|
|
67
|
24
|
100
|
|
|
|
100
|
$self->{receiver}->text_node(substr($self->{input}, 0, $begin)) |
68
|
|
|
|
|
|
|
unless $begin == 0; |
69
|
24
|
|
|
|
|
56
|
substr($self->{input}, 0, $end, ''); |
70
|
24
|
|
|
|
|
37
|
$type = $match->{type}; |
71
|
24
|
|
|
|
|
86
|
$self->handle_match($type, $match); |
72
|
|
|
|
|
|
|
} |
73
|
56
|
|
|
|
|
108
|
return; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub find_match { |
77
|
251
|
|
|
251
|
0
|
383
|
my ($self, $matched_func, $type) = @_; |
78
|
251
|
|
|
|
|
231
|
my $matched; |
79
|
251
|
50
|
|
|
|
680
|
if (my $regexp = $self->{grammar}{$type}{match}) { |
80
|
251
|
50
|
|
|
|
452
|
if (ref($regexp) eq 'ARRAY') { |
81
|
0
|
|
|
|
|
0
|
for my $re (@$regexp) { |
82
|
0
|
0
|
|
|
|
0
|
if ($self->{input} =~ $re) { |
83
|
0
|
|
|
|
|
0
|
$matched = $self->$matched_func; |
84
|
0
|
|
|
|
|
0
|
last; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
} |
87
|
0
|
0
|
|
|
|
0
|
return unless $matched; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
else { |
90
|
251
|
100
|
|
|
|
1937
|
return unless $self->{input} =~ $regexp; |
91
|
68
|
|
|
|
|
310
|
$matched = $self->$matched_func; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
else { |
95
|
0
|
|
|
|
|
0
|
my $func = "match_$type"; |
96
|
0
|
0
|
|
|
|
0
|
$matched = $self->$func or return; |
97
|
|
|
|
|
|
|
} |
98
|
68
|
|
|
|
|
342
|
return $matched; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub handle_match { |
102
|
56
|
|
|
56
|
0
|
88
|
my ($self, $type, $match) = @_; |
103
|
56
|
|
|
|
|
135
|
my $func = "handle_$type"; |
104
|
56
|
50
|
|
|
|
308
|
if ($self->can($func)) { |
105
|
0
|
|
|
|
|
0
|
$self->$func($match, $type); |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
else { |
108
|
56
|
|
|
|
|
106
|
my $grammar = $self->{grammar}{$type}; |
109
|
56
|
50
|
|
|
|
105
|
my $parse = $grammar->{blocks} |
110
|
|
|
|
|
|
|
? 'parse_blocks' |
111
|
|
|
|
|
|
|
: 'parse_phrases'; |
112
|
56
|
100
|
|
|
|
152
|
my @filter = $grammar->{filter} |
113
|
|
|
|
|
|
|
? ($grammar->{filter}) |
114
|
|
|
|
|
|
|
: (); |
115
|
56
|
|
|
|
|
160
|
$self->subparse($parse, $match, $type, @filter); |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub subparse { |
120
|
56
|
|
|
56
|
0
|
90
|
my ($self, $func, $match, $type, $filter) = @_; |
121
|
56
|
50
|
|
|
|
245
|
$match->{type} = |
122
|
|
|
|
|
|
|
exists $self->{grammar}{$type}{type} |
123
|
|
|
|
|
|
|
? $self->{grammar}{$type}{type} |
124
|
|
|
|
|
|
|
: $type; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
my $parser = $self->new( |
127
|
|
|
|
|
|
|
grammar => $self->{grammar}, |
128
|
|
|
|
|
|
|
receiver => $self->{receiver}->new, |
129
|
|
|
|
|
|
|
input => $filter |
130
|
56
|
100
|
|
|
|
194
|
? do { $_ = $match->{text}; &$filter($match); $_ } |
|
21
|
|
|
|
|
45
|
|
|
21
|
|
|
|
|
62
|
|
|
21
|
|
|
|
|
59
|
|
131
|
|
|
|
|
|
|
: $match->{text}, |
132
|
|
|
|
|
|
|
); |
133
|
56
|
50
|
|
|
|
282
|
$self->{receiver}->begin_node($match) |
134
|
|
|
|
|
|
|
if $match->{type}; |
135
|
56
|
|
|
|
|
193
|
$parser->$func($type); |
136
|
56
|
|
|
|
|
226
|
$self->{receiver}->insert($parser->{receiver}); |
137
|
56
|
50
|
|
|
|
283
|
$self->{receiver}->end_node($match) |
138
|
|
|
|
|
|
|
if $match->{type}; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub reduction_error { |
142
|
1
|
|
|
1
|
0
|
3
|
my $self = shift; |
143
|
1
|
|
|
|
|
3
|
my $input = $self->{input}; |
144
|
1
|
|
|
|
|
10
|
$input =~ s/^((.*\n){2}).*/$1/; |
145
|
1
|
|
|
|
|
5
|
chomp $input; |
146
|
1
|
|
|
|
|
61
|
return ref($self) . qq[ reduction error for:\n"$input"]; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub matched_block { |
150
|
32
|
50
|
|
32
|
0
|
128
|
my $begin = defined $_[2] ? $_[2] : $-[0]; |
151
|
32
|
50
|
|
|
|
108
|
die "All blocks must match at position 0" |
152
|
|
|
|
|
|
|
if "$begin" ne "0"; |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
return +{ |
155
|
32
|
|
33
|
|
|
415
|
text => ($_[1] || $1), |
|
|
|
33
|
|
|
|
|
156
|
|
|
|
|
|
|
end => ($_[3] || $+[0]), |
157
|
|
|
|
|
|
|
1 => $1, |
158
|
|
|
|
|
|
|
2 => $2, |
159
|
|
|
|
|
|
|
3 => $3, |
160
|
|
|
|
|
|
|
}; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub matched_phrase { |
164
|
|
|
|
|
|
|
return +{ |
165
|
36
|
50
|
33
|
36
|
0
|
482
|
text => ($_[1] || $1), |
|
|
|
33
|
|
|
|
|
166
|
|
|
|
|
|
|
begin => (defined $_[2] ? $_[2] : $-[0]), |
167
|
|
|
|
|
|
|
end => ($_[3] || $+[0]), |
168
|
|
|
|
|
|
|
1 => $1, |
169
|
|
|
|
|
|
|
2 => $2, |
170
|
|
|
|
|
|
|
3 => $3, |
171
|
|
|
|
|
|
|
}; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
1; |