line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
##################################################################### |
2
|
|
|
|
|
|
|
# Base class for markers |
3
|
|
|
|
|
|
|
##################################################################### |
4
|
|
|
|
|
|
|
package Grammar::Graph::Marker; |
5
|
1
|
|
|
1
|
|
47370
|
use Modern::Perl; |
|
1
|
|
|
|
|
26483
|
|
|
1
|
|
|
|
|
81
|
|
6
|
1
|
|
|
1
|
|
1020
|
use Moose; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
extends 'Grammar::Formal::Empty'; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
has 'of' => ( |
10
|
|
|
|
|
|
|
is => 'ro', |
11
|
|
|
|
|
|
|
required => 1, |
12
|
|
|
|
|
|
|
isa => 'Str' |
13
|
|
|
|
|
|
|
); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
##################################################################### |
16
|
|
|
|
|
|
|
# StartOf |
17
|
|
|
|
|
|
|
##################################################################### |
18
|
|
|
|
|
|
|
package Grammar::Graph::StartOf; |
19
|
|
|
|
|
|
|
use Modern::Perl; |
20
|
|
|
|
|
|
|
use Moose; |
21
|
|
|
|
|
|
|
extends 'Grammar::Graph::Marker'; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
##################################################################### |
24
|
|
|
|
|
|
|
# FinalOf |
25
|
|
|
|
|
|
|
##################################################################### |
26
|
|
|
|
|
|
|
package Grammar::Graph::FinalOf; |
27
|
|
|
|
|
|
|
use Modern::Perl; |
28
|
|
|
|
|
|
|
use Moose; |
29
|
|
|
|
|
|
|
extends 'Grammar::Graph::Marker'; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
##################################################################### |
32
|
|
|
|
|
|
|
# Base class for sentinels |
33
|
|
|
|
|
|
|
##################################################################### |
34
|
|
|
|
|
|
|
package Grammar::Graph::Sentinel; |
35
|
|
|
|
|
|
|
use Modern::Perl; |
36
|
|
|
|
|
|
|
use Moose; |
37
|
|
|
|
|
|
|
extends 'Grammar::Formal::Empty'; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
has 'link' => ( |
40
|
|
|
|
|
|
|
is => 'ro', |
41
|
|
|
|
|
|
|
required => 1, |
42
|
|
|
|
|
|
|
isa => 'Str' |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
##################################################################### |
46
|
|
|
|
|
|
|
# Prefix |
47
|
|
|
|
|
|
|
##################################################################### |
48
|
|
|
|
|
|
|
package Grammar::Graph::Prefix; |
49
|
|
|
|
|
|
|
use Modern::Perl; |
50
|
|
|
|
|
|
|
use Moose; |
51
|
|
|
|
|
|
|
extends 'Grammar::Graph::Sentinel'; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
##################################################################### |
54
|
|
|
|
|
|
|
# Suffix |
55
|
|
|
|
|
|
|
##################################################################### |
56
|
|
|
|
|
|
|
package Grammar::Graph::Suffix; |
57
|
|
|
|
|
|
|
use Modern::Perl; |
58
|
|
|
|
|
|
|
use Moose; |
59
|
|
|
|
|
|
|
extends 'Grammar::Graph::Sentinel'; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
##################################################################### |
62
|
|
|
|
|
|
|
# Grammar::Graph |
63
|
|
|
|
|
|
|
##################################################################### |
64
|
|
|
|
|
|
|
package Grammar::Graph; |
65
|
|
|
|
|
|
|
use 5.012000; |
66
|
|
|
|
|
|
|
use strict; |
67
|
|
|
|
|
|
|
use warnings; |
68
|
|
|
|
|
|
|
use base qw(Graph::Directed); |
69
|
|
|
|
|
|
|
use Grammar::Formal; |
70
|
|
|
|
|
|
|
use List::UtilsBy qw/partition_by/; |
71
|
|
|
|
|
|
|
use List::MoreUtils qw/uniq/; |
72
|
|
|
|
|
|
|
use List::Util qw/shuffle sum max/; |
73
|
|
|
|
|
|
|
use Storable qw/freeze thaw/; |
74
|
|
|
|
|
|
|
use Graph::SomeUtils qw/:all/; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
local $Storable::canonical = 1; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
our $PREFIX_SUFFIX_SEP = " # "; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
our $VERSION = '0.02'; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw( |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
) ] ); |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
our @EXPORT = qw( |
89
|
|
|
|
|
|
|
); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub _fa_next_id { |
92
|
|
|
|
|
|
|
my ($self) = @_; |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
my $next_id = $self->get_graph_attribute('fa_next_id'); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
$next_id = do { |
97
|
|
|
|
|
|
|
my $max = max(grep { /^[0-9]+$/ } $self->vertices) // 0; |
98
|
|
|
|
|
|
|
$max + 1; |
99
|
|
|
|
|
|
|
} if not defined $next_id or $self->has_vertex($next_id); |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$self->set_graph_attribute('fa_next_id', $next_id + 1); |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
return $next_id; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub fa_add_state { |
107
|
|
|
|
|
|
|
my ($self, $expect) = @_; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
$expect //= Grammar::Formal::Empty->new; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
my $id = $self->_fa_next_id(); |
112
|
|
|
|
|
|
|
$self->add_vertex($id); |
113
|
|
|
|
|
|
|
$self->set_vertex_attribute($id, 'label', $expect) |
114
|
|
|
|
|
|
|
if defined $expect; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
return $id; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub fa_all_e_reachable { |
120
|
|
|
|
|
|
|
my ($self, $v) = @_; |
121
|
|
|
|
|
|
|
my %seen; |
122
|
|
|
|
|
|
|
my @todo = ($v); |
123
|
|
|
|
|
|
|
while (@todo) { |
124
|
|
|
|
|
|
|
my $c = pop @todo; |
125
|
|
|
|
|
|
|
my $label = $self->get_vertex_attribute($c, 'label'); |
126
|
|
|
|
|
|
|
next if $label and not $label->isa('Grammar::Formal::Empty'); |
127
|
|
|
|
|
|
|
push @todo, grep { not $seen{$_}++ } $self->successors($c); |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
keys %seen; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
##################################################################### |
133
|
|
|
|
|
|
|
# Clone a subgraph between two vertices |
134
|
|
|
|
|
|
|
##################################################################### |
135
|
|
|
|
|
|
|
sub _clone_subgraph_between { |
136
|
|
|
|
|
|
|
my ($self, $src, $dst) = @_; |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
my %want = map { $_ => 1 } |
139
|
|
|
|
|
|
|
graph_vertices_between($self, $src, $dst); |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
my %map; |
142
|
|
|
|
|
|
|
my @prefixes; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
while (my ($k, $yes) = each %want) { |
145
|
|
|
|
|
|
|
next unless $yes; |
146
|
|
|
|
|
|
|
my $label = $self->get_vertex_attribute($k, 'label'); |
147
|
|
|
|
|
|
|
push @prefixes, $label if |
148
|
|
|
|
|
|
|
$label and $label->isa('Grammar::Graph::Prefix'); |
149
|
|
|
|
|
|
|
$map{$k} = $self->fa_add_state($label); |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
for my $p (@prefixes) { |
153
|
|
|
|
|
|
|
my @old_link = split/$PREFIX_SUFFIX_SEP/, $p->link; |
154
|
|
|
|
|
|
|
my @new_link = map { $map{$_} } @old_link; |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
die "Trying to clone Prefix without Suffix" |
157
|
|
|
|
|
|
|
unless defined $new_link[1]; |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
$self->set_vertex_attribute($new_link[0], 'label', |
160
|
|
|
|
|
|
|
Grammar::Graph::Prefix->new(link => |
161
|
|
|
|
|
|
|
join $PREFIX_SUFFIX_SEP, @new_link) |
162
|
|
|
|
|
|
|
); |
163
|
|
|
|
|
|
|
$self->set_vertex_attribute($new_link[1], 'label', |
164
|
|
|
|
|
|
|
Grammar::Graph::Suffix->new(link => |
165
|
|
|
|
|
|
|
join $PREFIX_SUFFIX_SEP, @new_link) |
166
|
|
|
|
|
|
|
); |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
while (my ($old, $new) = each %map) { |
170
|
|
|
|
|
|
|
for (grep { $want{$_} } $self->successors($old)) { |
171
|
|
|
|
|
|
|
$self->add_edge($new, $map{$_}); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
return ($map{$src}, $map{$dst}); |
176
|
|
|
|
|
|
|
}; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
##################################################################### |
179
|
|
|
|
|
|
|
# Generate a graph with all rules with edges over ::References |
180
|
|
|
|
|
|
|
##################################################################### |
181
|
|
|
|
|
|
|
sub _fa_ref_graph { |
182
|
|
|
|
|
|
|
my ($self) = @_; |
183
|
|
|
|
|
|
|
my $symbols = $self->get_graph_attribute('symbol_table'); |
184
|
|
|
|
|
|
|
my $ref_graph = Graph::Directed->new; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
while (my ($r1, $v) = each %{$symbols}) { |
187
|
|
|
|
|
|
|
for (graph_all_successors_and_self($self, $v->{start_vertex})) { |
188
|
|
|
|
|
|
|
my $label = $self->get_vertex_attribute($_, 'label'); |
189
|
|
|
|
|
|
|
if ($label and $label->isa('Grammar::Formal::Reference')) { |
190
|
|
|
|
|
|
|
my $r2 = $self->get_vertex_attribute($_, 'label')->expand; |
191
|
|
|
|
|
|
|
$ref_graph->add_edge($r1 . '', $r2 . ''); |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
return $ref_graph; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
##################################################################### |
200
|
|
|
|
|
|
|
# ... |
201
|
|
|
|
|
|
|
##################################################################### |
202
|
|
|
|
|
|
|
sub _do_replace_thing { |
203
|
|
|
|
|
|
|
my ($self, $direct, $start, $final) = @_; |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
my $label = $self->get_vertex_attribute($direct, 'label'); |
206
|
|
|
|
|
|
|
my $prefix = $self->fa_add_state(); |
207
|
|
|
|
|
|
|
my $suffix = $self->fa_add_state(); |
208
|
|
|
|
|
|
|
my $link = join $PREFIX_SUFFIX_SEP, $prefix, $suffix; |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
my $prefix_p = Grammar::Graph::Prefix->new(link => $link); |
211
|
|
|
|
|
|
|
my $suffix_p = Grammar::Graph::Suffix->new(link => $link); |
212
|
|
|
|
|
|
|
$self->set_vertex_attribute($prefix, 'label', $prefix_p); |
213
|
|
|
|
|
|
|
$self->set_vertex_attribute($suffix, 'label', $suffix_p); |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
$self->add_edge($_, $prefix) for $self->predecessors($direct); |
216
|
|
|
|
|
|
|
$self->add_edge($suffix, $_) for $self->successors($direct); |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
$self->add_edge($final, $suffix); |
219
|
|
|
|
|
|
|
$self->add_edge($prefix, $start); |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
graph_delete_vertex_fast($self, $direct); |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
##################################################################### |
225
|
|
|
|
|
|
|
# ... |
226
|
|
|
|
|
|
|
##################################################################### |
227
|
|
|
|
|
|
|
sub _find_refs { |
228
|
|
|
|
|
|
|
my ($self, $id, $v) = @_; |
229
|
|
|
|
|
|
|
grep { |
230
|
|
|
|
|
|
|
$self->has_vertex_attribute($_, 'label') and |
231
|
|
|
|
|
|
|
$self->get_vertex_attribute($_, 'label') |
232
|
|
|
|
|
|
|
->isa('Grammar::Formal::Reference') and |
233
|
|
|
|
|
|
|
$self->get_vertex_attribute($_, 'label') |
234
|
|
|
|
|
|
|
->expand eq $id; |
235
|
|
|
|
|
|
|
} graph_all_successors_and_self($self, $v); |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub _replace_direct_recursion { |
239
|
|
|
|
|
|
|
my ($self, $id) = @_; |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
my $symbols = $self->get_graph_attribute('symbol_table'); |
242
|
|
|
|
|
|
|
my $v = $symbols->{$id}{start_vertex}; |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
my @direct_refs = _find_refs($self, $id, $v); |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
for my $direct (@direct_refs) { |
247
|
|
|
|
|
|
|
my $final = $symbols->{$id}{final_vertex}; |
248
|
|
|
|
|
|
|
my $start = $symbols->{$id}{start_vertex}; |
249
|
|
|
|
|
|
|
_do_replace_thing($self, $direct, $start, $final); |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
sub _replace_strongly_connected_component { |
254
|
|
|
|
|
|
|
my ($self, $comb) = @_; |
255
|
|
|
|
|
|
|
my $symbols = $self->get_graph_attribute('symbol_table'); |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
my %backup = map { $_ => [ |
258
|
|
|
|
|
|
|
_clone_subgraph_between($self, |
259
|
|
|
|
|
|
|
$symbols->{$_}{start_vertex}, |
260
|
|
|
|
|
|
|
$symbols->{$_}{final_vertex}) |
261
|
|
|
|
|
|
|
] } split/\+/, $comb; |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
my %expanded; |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
for my $last (split/\+/, $comb) { |
266
|
|
|
|
|
|
|
my @other = grep { $_ ne $last } split/\+/, $comb; |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
for my $id (@other) { |
269
|
|
|
|
|
|
|
_replace_direct_recursion($self, $id); |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
my @things = map { _find_refs($self, $id, |
272
|
|
|
|
|
|
|
$symbols->{$_}{start_vertex}) } |
273
|
|
|
|
|
|
|
split/\+/, $comb; |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
for (@things) { |
276
|
|
|
|
|
|
|
my ($start, $final) = _clone_subgraph_between($self, |
277
|
|
|
|
|
|
|
$symbols->{$id}{start_vertex}, |
278
|
|
|
|
|
|
|
$symbols->{$id}{final_vertex}); |
279
|
|
|
|
|
|
|
_do_replace_thing($self, $_, $start, $final); |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
_replace_direct_recursion($self, $last); |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# ... |
286
|
|
|
|
|
|
|
$expanded{$last} = [ |
287
|
|
|
|
|
|
|
_clone_subgraph_between($self, |
288
|
|
|
|
|
|
|
$symbols->{$last}{start_vertex}, |
289
|
|
|
|
|
|
|
$symbols->{$last}{final_vertex}) |
290
|
|
|
|
|
|
|
]; |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# restore from backup |
293
|
|
|
|
|
|
|
while (my ($id, $start_final) = each %backup) { |
294
|
|
|
|
|
|
|
graph_delete_vertices_fast($self, |
295
|
|
|
|
|
|
|
graph_all_successors_and_self($self, $symbols->{$id}{start_vertex}) |
296
|
|
|
|
|
|
|
); |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
my ($start, $final) = _clone_subgraph_between($self, |
299
|
|
|
|
|
|
|
@$start_final); |
300
|
|
|
|
|
|
|
$symbols->{$id}{start_vertex} = $start; |
301
|
|
|
|
|
|
|
$symbols->{$id}{final_vertex} = $final; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
while (my ($id, $start_final) = each %expanded) { |
306
|
|
|
|
|
|
|
graph_delete_vertices_fast($self, |
307
|
|
|
|
|
|
|
graph_all_successors_and_self($self, $symbols->{$id}{start_vertex}) |
308
|
|
|
|
|
|
|
); |
309
|
|
|
|
|
|
|
$symbols->{$id}{start_vertex} = $start_final->[0]; |
310
|
|
|
|
|
|
|
$symbols->{$id}{final_vertex} = $start_final->[1]; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub fa_expand_references { |
315
|
|
|
|
|
|
|
my ($self) = @_; |
316
|
|
|
|
|
|
|
my $ref_graph = $self->_fa_ref_graph; |
317
|
|
|
|
|
|
|
my $symbols = $self->get_graph_attribute('symbol_table'); |
318
|
|
|
|
|
|
|
my $scg = $ref_graph->strongly_connected_graph; |
319
|
|
|
|
|
|
|
my @topo = reverse $scg->toposort; |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
for (my $ix = 0; $ix < @topo; ++$ix) { |
322
|
|
|
|
|
|
|
my $comp = $topo[$ix]; |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
if ($comp =~ /\+/) { |
325
|
|
|
|
|
|
|
_replace_strongly_connected_component($self, $comp); |
326
|
|
|
|
|
|
|
} else { |
327
|
|
|
|
|
|
|
_replace_direct_recursion($self, $comp); |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
for my $id (split/\+/, $comp) { |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
my @things = map { _find_refs($self, $id, |
333
|
|
|
|
|
|
|
$symbols->{$_}{start_vertex}) } |
334
|
|
|
|
|
|
|
$ref_graph->predecessors($id); |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
for my $v (@things) { |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
next unless $self->has_vertex($v); |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
my ($src, $dst) = $self->_clone_subgraph_between( |
341
|
|
|
|
|
|
|
$symbols->{$id}{start_vertex}, |
342
|
|
|
|
|
|
|
$symbols->{$id}{final_vertex}); |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
for my $o ($self->predecessors($v)) { |
345
|
|
|
|
|
|
|
$self->add_edge($o, $src); |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
for my $o ($self->successors($v)) { |
349
|
|
|
|
|
|
|
$self->add_edge($dst, $o); |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
graph_delete_vertex_fast($self, $v); |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
##################################################################### |
359
|
|
|
|
|
|
|
# Remove unlabeled vertices |
360
|
|
|
|
|
|
|
##################################################################### |
361
|
|
|
|
|
|
|
sub fa_remove_useless_epsilons { |
362
|
|
|
|
|
|
|
my ($graph, @todo) = @_; |
363
|
|
|
|
|
|
|
my %deleted; |
364
|
|
|
|
|
|
|
for my $v (sort @todo) { |
365
|
|
|
|
|
|
|
my $label = $graph->get_vertex_attribute($v, 'label'); |
366
|
|
|
|
|
|
|
next if defined $label and ref($label) ne 'Grammar::Formal::Empty'; |
367
|
|
|
|
|
|
|
next unless $graph->successors($v); |
368
|
|
|
|
|
|
|
next unless $graph->predecessors($v); |
369
|
|
|
|
|
|
|
for my $src ($graph->predecessors($v)) { |
370
|
|
|
|
|
|
|
for my $dst ($graph->successors($v)) { |
371
|
|
|
|
|
|
|
$graph->add_edge($src, $dst); |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
$deleted{$v}++; |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
graph_delete_vertices_fast($graph, keys %deleted); |
377
|
|
|
|
|
|
|
}; |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
##################################################################### |
380
|
|
|
|
|
|
|
# Merge character classes |
381
|
|
|
|
|
|
|
##################################################################### |
382
|
|
|
|
|
|
|
sub fa_merge_character_classes { |
383
|
|
|
|
|
|
|
my ($self) = @_; |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
my %groups = partition_by { |
386
|
|
|
|
|
|
|
freeze [[sort $self->predecessors($_)], [sort $self->successors($_)]]; |
387
|
|
|
|
|
|
|
} grep { |
388
|
|
|
|
|
|
|
my $label = $self->get_vertex_attribute($_, 'label'); |
389
|
|
|
|
|
|
|
$label and $label->isa('Grammar::Formal::CharClass'); |
390
|
|
|
|
|
|
|
} $self->vertices; |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
require Set::IntSpan; |
393
|
|
|
|
|
|
|
while (my ($k, $v) = each %groups) { |
394
|
|
|
|
|
|
|
next unless @$v > 1; |
395
|
|
|
|
|
|
|
my $union = Set::IntSpan->new; |
396
|
|
|
|
|
|
|
for my $vertex (@$v) { |
397
|
|
|
|
|
|
|
my $label = $self->get_vertex_attribute($vertex, 'label'); |
398
|
|
|
|
|
|
|
$union->U($label->spans); |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
my $class = Grammar::Formal::CharClass->new(spans => $union); |
401
|
|
|
|
|
|
|
my $state = $self->fa_add_state($class); |
402
|
|
|
|
|
|
|
$self->add_edge($_, $state) for $self->predecessors($v->[0]); |
403
|
|
|
|
|
|
|
$self->add_edge($state, $_) for $self->successors($v->[0]); |
404
|
|
|
|
|
|
|
graph_delete_vertices_fast($self, @$v); |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
##################################################################### |
409
|
|
|
|
|
|
|
# Merge character classes |
410
|
|
|
|
|
|
|
##################################################################### |
411
|
|
|
|
|
|
|
sub fa_separate_character_classes { |
412
|
|
|
|
|
|
|
my ($self) = @_; |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
require Set::IntSpan::Partition; |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
my @vertices = grep { |
417
|
|
|
|
|
|
|
my $label = $self->get_vertex_attribute($_, 'label'); |
418
|
|
|
|
|
|
|
$label and $label->isa('Grammar::Formal::CharClass') |
419
|
|
|
|
|
|
|
} $self->vertices; |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
my @classes = map { |
422
|
|
|
|
|
|
|
$self->get_vertex_attribute($_, 'label')->spans; |
423
|
|
|
|
|
|
|
} @vertices; |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
my %map = Set::IntSpan::Partition::intspan_partition_map(@classes); |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
for (my $ix = 0; $ix < @vertices; ++$ix) { |
428
|
|
|
|
|
|
|
for (@{ $map{$ix} }) { |
429
|
|
|
|
|
|
|
my $state = $self->fa_add_state( |
430
|
|
|
|
|
|
|
Grammar::Formal::CharClass->new(spans => $_)); |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
for my $p ($self->predecessors($vertices[$ix])) { |
433
|
|
|
|
|
|
|
$self->add_edge($p, $state); |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
for my $s ($self->successors($vertices[$ix])) { |
436
|
|
|
|
|
|
|
$self->add_edge($state, $s); |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
graph_delete_vertex_fast($self, $vertices[$ix]); |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
##################################################################### |
446
|
|
|
|
|
|
|
# ... |
447
|
|
|
|
|
|
|
##################################################################### |
448
|
|
|
|
|
|
|
sub _delete_not_allowed { |
449
|
|
|
|
|
|
|
my ($self) = @_; |
450
|
|
|
|
|
|
|
graph_delete_vertex_fast($self, $_) for grep { |
451
|
|
|
|
|
|
|
my $label = $self->get_vertex_attribute($_, 'label'); |
452
|
|
|
|
|
|
|
$label and $label->isa('Grammar::Formal::NotAllowed'); |
453
|
|
|
|
|
|
|
} $self->vertices; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
##################################################################### |
457
|
|
|
|
|
|
|
# ... |
458
|
|
|
|
|
|
|
##################################################################### |
459
|
|
|
|
|
|
|
sub _delete_unreachables { |
460
|
|
|
|
|
|
|
my ($self) = @_; |
461
|
|
|
|
|
|
|
my $symbols = $self->get_graph_attribute('symbol_table'); |
462
|
|
|
|
|
|
|
my %keep; |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
$keep{$_}++ for map { |
465
|
|
|
|
|
|
|
my @suc = graph_all_successors_and_self($self, $_->{start_vertex}); |
466
|
|
|
|
|
|
|
# Always keep final vertices |
467
|
|
|
|
|
|
|
my @fin = $_->{final_vertex}; |
468
|
|
|
|
|
|
|
(@suc, @fin); |
469
|
|
|
|
|
|
|
} values %$symbols; |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
graph_delete_vertices_fast($self, grep { |
472
|
|
|
|
|
|
|
not $keep{$_} |
473
|
|
|
|
|
|
|
} $self->vertices); |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
##################################################################### |
477
|
|
|
|
|
|
|
# Constructor |
478
|
|
|
|
|
|
|
##################################################################### |
479
|
|
|
|
|
|
|
sub from_grammar_formal { |
480
|
|
|
|
|
|
|
my ($class, $formal, %options) = @_; |
481
|
|
|
|
|
|
|
my $self = $class->new; |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
_add_to_automaton($formal, $self); |
484
|
|
|
|
|
|
|
_delete_not_allowed($self); |
485
|
|
|
|
|
|
|
fa_remove_useless_epsilons($self, $self->vertices); |
486
|
|
|
|
|
|
|
_delete_unreachables($self); |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
return $self; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
##################################################################### |
492
|
|
|
|
|
|
|
# Helper function to write some forms of repetition to the graph |
493
|
|
|
|
|
|
|
##################################################################### |
494
|
|
|
|
|
|
|
sub _bound_repetition { |
495
|
|
|
|
|
|
|
my ($min, $max, $child, $fa, $root) = @_; |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
die if defined $max and $min > $max; |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
if ($min <= 1 and not defined $max) { |
500
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
501
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state; |
502
|
|
|
|
|
|
|
my $s3 = $fa->fa_add_state; |
503
|
|
|
|
|
|
|
my $s4 = $fa->fa_add_state; |
504
|
|
|
|
|
|
|
my ($ps, $pf) = _add_to_automaton($child, $fa, $root); |
505
|
|
|
|
|
|
|
$fa->add_edge($s1, $s2); |
506
|
|
|
|
|
|
|
$fa->add_edge($s2, $ps); |
507
|
|
|
|
|
|
|
$fa->add_edge($pf, $s3); |
508
|
|
|
|
|
|
|
$fa->add_edge($s3, $s4); |
509
|
|
|
|
|
|
|
$fa->add_edge($s2, $s3) if $min == 0; |
510
|
|
|
|
|
|
|
$fa->add_edge($s3, $s2); # loop |
511
|
|
|
|
|
|
|
return ($s1, $s4); |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
515
|
|
|
|
|
|
|
my $first = $s1; |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
while ($min--) { |
518
|
|
|
|
|
|
|
my ($src, $dst) = _add_to_automaton($child, $fa, $root); |
519
|
|
|
|
|
|
|
$fa->add_edge($s1, $src); |
520
|
|
|
|
|
|
|
$s1 = $dst; |
521
|
|
|
|
|
|
|
$max-- if defined $max; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
if (defined $max and $max == 0) { |
525
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state; |
526
|
|
|
|
|
|
|
$fa->add_edge($s1, $s2); |
527
|
|
|
|
|
|
|
return ($first, $s2); |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
do { |
531
|
|
|
|
|
|
|
my ($src, $dst) = _add_to_automaton($child, $fa, $root); |
532
|
|
|
|
|
|
|
$fa->add_edge($s1, $src); |
533
|
|
|
|
|
|
|
my $sx = $fa->fa_add_state; |
534
|
|
|
|
|
|
|
$fa->add_edge($dst, $sx); |
535
|
|
|
|
|
|
|
$fa->add_edge($s1, $sx); # optional because min <= 0 now |
536
|
|
|
|
|
|
|
$fa->add_edge($sx, $s1) if not defined $max; # loop |
537
|
|
|
|
|
|
|
$s1 = $sx; |
538
|
|
|
|
|
|
|
} while (defined $max and --$max); |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state; |
541
|
|
|
|
|
|
|
$fa->add_edge($s1, $s2); |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
return ($first, $s2); |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
##################################################################### |
547
|
|
|
|
|
|
|
# Collection of sub routines that write patterns to the graph |
548
|
|
|
|
|
|
|
##################################################################### |
549
|
|
|
|
|
|
|
my %pattern_converters = ( |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
'Grammar::Formal::CharClass' => sub { |
552
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
553
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
554
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state($pattern); |
555
|
|
|
|
|
|
|
my $s3 = $fa->fa_add_state; |
556
|
|
|
|
|
|
|
$fa->add_edge($s1, $s2); |
557
|
|
|
|
|
|
|
$fa->add_edge($s2, $s3); |
558
|
|
|
|
|
|
|
return ($s1, $s3); |
559
|
|
|
|
|
|
|
}, |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
'Grammar::Formal::ProseValue' => sub { |
562
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
563
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
564
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state($pattern); |
565
|
|
|
|
|
|
|
my $s3 = $fa->fa_add_state; |
566
|
|
|
|
|
|
|
$fa->add_edge($s1, $s2); |
567
|
|
|
|
|
|
|
$fa->add_edge($s2, $s3); |
568
|
|
|
|
|
|
|
return ($s1, $s3); |
569
|
|
|
|
|
|
|
}, |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
'Grammar::Formal::Reference' => sub { |
572
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
573
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
574
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state($pattern); |
575
|
|
|
|
|
|
|
my $s3 = $fa->fa_add_state; |
576
|
|
|
|
|
|
|
$fa->add_edge($s1, $s2); |
577
|
|
|
|
|
|
|
$fa->add_edge($s2, $s3); |
578
|
|
|
|
|
|
|
return ($s1, $s3); |
579
|
|
|
|
|
|
|
}, |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
'Grammar::Formal::NotAllowed' => sub { |
582
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
583
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
584
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state($pattern); |
585
|
|
|
|
|
|
|
my $s3 = $fa->fa_add_state; |
586
|
|
|
|
|
|
|
$fa->add_edge($s1, $s2); |
587
|
|
|
|
|
|
|
$fa->add_edge($s2, $s3); |
588
|
|
|
|
|
|
|
return ($s1, $s3); |
589
|
|
|
|
|
|
|
}, |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
'Grammar::Formal::Whatever' => sub { |
592
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
593
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
594
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state($pattern); |
595
|
|
|
|
|
|
|
my $s3 = $fa->fa_add_state; |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
$fa->add_edge($s1, $s2); |
598
|
|
|
|
|
|
|
$fa->add_edge($s2, $s3); |
599
|
|
|
|
|
|
|
$fa->add_edge($s1, $s3); |
600
|
|
|
|
|
|
|
$fa->add_edge($s2, $s2); |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
return ($s1, $s3); |
603
|
|
|
|
|
|
|
}, |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
'Grammar::Formal::Range' => sub { |
606
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
607
|
|
|
|
|
|
|
my $char_class = Grammar::Formal::CharClass |
608
|
|
|
|
|
|
|
->from_numbers($pattern->min .. $pattern->max); |
609
|
|
|
|
|
|
|
return _add_to_automaton($char_class, $fa, $root); |
610
|
|
|
|
|
|
|
}, |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
'Grammar::Formal::AsciiInsensitiveString' => sub { |
613
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
use bytes; |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
my @spans = map { |
618
|
|
|
|
|
|
|
Grammar::Formal::CharClass |
619
|
|
|
|
|
|
|
->from_numbers(ord(lc), ord(uc)) |
620
|
|
|
|
|
|
|
} split//, $pattern->value; |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
my $group = Grammar::Formal::Empty->new; |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
while (@spans) { |
625
|
|
|
|
|
|
|
$group = Grammar::Formal::Group->new(p1 => pop(@spans), p2 => $group); |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
return _add_to_automaton($group, $fa, $root); |
629
|
|
|
|
|
|
|
}, |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
'Grammar::Formal::CaseSensitiveString' => sub { |
632
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
my @spans = map { |
635
|
|
|
|
|
|
|
Grammar::Formal::CharClass |
636
|
|
|
|
|
|
|
->from_numbers(ord) |
637
|
|
|
|
|
|
|
} split//, $pattern->value; |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
my $group = Grammar::Formal::Empty->new; |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
while (@spans) { |
642
|
|
|
|
|
|
|
$group = Grammar::Formal::Group->new(p1 => pop(@spans), p2 => $group); |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
return _add_to_automaton($group, $fa, $root); |
646
|
|
|
|
|
|
|
}, |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
'Grammar::Formal::Grammar' => sub { |
649
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
my %map = map { |
652
|
|
|
|
|
|
|
$_ => [ _add_to_automaton($pattern->rules->{$_}, $fa) ] |
653
|
|
|
|
|
|
|
} keys %{ $pattern->rules }; |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
return unless defined $pattern->start; |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
658
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state; |
659
|
|
|
|
|
|
|
my ($ps, $pf) = @{ $map{ $pattern->start } }; |
660
|
|
|
|
|
|
|
$fa->add_edge($s1, $ps); |
661
|
|
|
|
|
|
|
$fa->add_edge($pf, $s2); |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
return ($s1, $s2); |
664
|
|
|
|
|
|
|
}, |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
'Grammar::Formal' => sub { |
667
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
my %map = map { |
670
|
|
|
|
|
|
|
$_ => [ _add_to_automaton($pattern->rules->{$_}, $fa) ] |
671
|
|
|
|
|
|
|
} keys %{ $pattern->rules }; |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
# root, so we do not return src and dst |
674
|
|
|
|
|
|
|
return; |
675
|
|
|
|
|
|
|
}, |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
'Grammar::Formal::Rule' => sub { |
678
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
679
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
680
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state; |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
my $table = $fa->get_graph_attribute('symbol_table') // {}; |
683
|
|
|
|
|
|
|
$fa->set_graph_attribute('symbol_table', $table); |
684
|
|
|
|
|
|
|
$table->{$pattern} //= {}; |
685
|
|
|
|
|
|
|
$table->{$pattern}{start_vertex} = $s1; |
686
|
|
|
|
|
|
|
$table->{$pattern}{final_vertex} = $s2; |
687
|
|
|
|
|
|
|
$table->{$pattern}{shortname} = $pattern->name; |
688
|
|
|
|
|
|
|
$fa->set_vertex_attribute($s1, 'label', |
689
|
|
|
|
|
|
|
Grammar::Graph::StartOf->new(of => "$pattern")); |
690
|
|
|
|
|
|
|
$fa->set_vertex_attribute($s2, 'label', |
691
|
|
|
|
|
|
|
Grammar::Graph::FinalOf->new(of => "$pattern")); |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
my ($ps, $pf) = _add_to_automaton($pattern->p, $fa, [$pattern, $s1, $s2]); |
694
|
|
|
|
|
|
|
$fa->add_edge($s1, $ps); |
695
|
|
|
|
|
|
|
$fa->add_edge($pf, $s2); |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
return ($s1, $s2); |
698
|
|
|
|
|
|
|
}, |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
'Grammar::Formal::BoundRepetition' => sub { |
701
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
702
|
|
|
|
|
|
|
return _bound_repetition($pattern->min, $pattern->max, $pattern->p, $fa, $root); |
703
|
|
|
|
|
|
|
}, |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
'Grammar::Formal::SomeOrMore' => sub { |
706
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
707
|
|
|
|
|
|
|
return _bound_repetition($pattern->min, undef, $pattern->p, $fa, $root); |
708
|
|
|
|
|
|
|
}, |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
'Grammar::Formal::OneOrMore' => sub { |
711
|
|
|
|
|
|
|
my ($self, $fa, $root) = @_; |
712
|
|
|
|
|
|
|
my $s1 = $fa->add_state; |
713
|
|
|
|
|
|
|
my $s2 = $fa->add_state; |
714
|
|
|
|
|
|
|
my $s3 = $fa->add_state; |
715
|
|
|
|
|
|
|
my $s4 = $fa->add_state; |
716
|
|
|
|
|
|
|
my ($ps, $pf) = $self->p->add_to_automaton($fa, $root); |
717
|
|
|
|
|
|
|
$fa->add_e_transition($s1, $s2); |
718
|
|
|
|
|
|
|
$fa->add_e_transition($s2, $ps); |
719
|
|
|
|
|
|
|
$fa->add_e_transition($pf, $s3); |
720
|
|
|
|
|
|
|
$fa->add_e_transition($s3, $s4); |
721
|
|
|
|
|
|
|
$fa->add_e_transition($s3, $s2); |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
return ($s1, $s4); |
724
|
|
|
|
|
|
|
}, |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
'Grammar::Formal::ZeroOrMore' => sub { |
727
|
|
|
|
|
|
|
my ($self, $fa, $root) = @_; |
728
|
|
|
|
|
|
|
my $s1 = $fa->add_state; |
729
|
|
|
|
|
|
|
my $s2 = $fa->add_state; |
730
|
|
|
|
|
|
|
my $s3 = $fa->add_state; |
731
|
|
|
|
|
|
|
my $s4 = $fa->add_state; |
732
|
|
|
|
|
|
|
my ($ps, $pf) = $self->p->add_to_automaton($fa, $root); |
733
|
|
|
|
|
|
|
$fa->add_e_transition($s1, $s2); |
734
|
|
|
|
|
|
|
$fa->add_e_transition($s2, $ps); |
735
|
|
|
|
|
|
|
$fa->add_e_transition($pf, $s3); |
736
|
|
|
|
|
|
|
$fa->add_e_transition($s3, $s4); |
737
|
|
|
|
|
|
|
$fa->add_e_transition($s3, $s2); |
738
|
|
|
|
|
|
|
$fa->add_e_transition($s2, $s3); # zero |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
return ($s1, $s4); |
741
|
|
|
|
|
|
|
}, |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
'Grammar::Formal::Empty' => sub { |
744
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
745
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
746
|
|
|
|
|
|
|
my $s3 = $fa->fa_add_state; |
747
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state; |
748
|
|
|
|
|
|
|
$fa->add_edge($s1, $s2); |
749
|
|
|
|
|
|
|
$fa->add_edge($s2, $s3); |
750
|
|
|
|
|
|
|
return ($s1, $s3); |
751
|
|
|
|
|
|
|
}, |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
'Grammar::Formal::Choice' => sub { |
754
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
755
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
756
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state; |
757
|
|
|
|
|
|
|
my ($p1s, $p1f) = _add_to_automaton($pattern->p1, $fa, $root); |
758
|
|
|
|
|
|
|
my ($p2s, $p2f) = _add_to_automaton($pattern->p2, $fa, $root); |
759
|
|
|
|
|
|
|
$fa->add_edge($s1, $p1s); |
760
|
|
|
|
|
|
|
$fa->add_edge($s1, $p2s); |
761
|
|
|
|
|
|
|
$fa->add_edge($p1f, $s2); |
762
|
|
|
|
|
|
|
$fa->add_edge($p2f, $s2); |
763
|
|
|
|
|
|
|
return ($s1, $s2); |
764
|
|
|
|
|
|
|
}, |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
'Grammar::Formal::Group' => sub { |
767
|
|
|
|
|
|
|
my ($pattern, $fa, $root) = @_; |
768
|
|
|
|
|
|
|
my $s1 = $fa->fa_add_state; |
769
|
|
|
|
|
|
|
my $s2 = $fa->fa_add_state; |
770
|
|
|
|
|
|
|
my ($p1s, $p1f) = _add_to_automaton($pattern->p1, $fa, $root); |
771
|
|
|
|
|
|
|
my ($p2s, $p2f) = _add_to_automaton($pattern->p2, $fa, $root); |
772
|
|
|
|
|
|
|
$fa->add_edge($p1f, $p2s); |
773
|
|
|
|
|
|
|
$fa->add_edge($s1, $p1s); |
774
|
|
|
|
|
|
|
$fa->add_edge($p2f, $s2); |
775
|
|
|
|
|
|
|
return ($s1, $s2); |
776
|
|
|
|
|
|
|
}, |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
); |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
sub _add_to_automaton { |
782
|
|
|
|
|
|
|
my ($pattern, $self, $root) = @_; |
783
|
|
|
|
|
|
|
if ($pattern_converters{ref $pattern}) { |
784
|
|
|
|
|
|
|
return $pattern_converters{ref $pattern}->($pattern, $self, $root); |
785
|
|
|
|
|
|
|
} |
786
|
|
|
|
|
|
|
my $s1 = $self->fa_add_state; |
787
|
|
|
|
|
|
|
my $s2 = $self->fa_add_state($pattern); |
788
|
|
|
|
|
|
|
my $s3 = $self->fa_add_state; |
789
|
|
|
|
|
|
|
$self->add_edge($s1, $s2); |
790
|
|
|
|
|
|
|
$self->add_edge($s2, $s3); |
791
|
|
|
|
|
|
|
return ($s1, $s3); |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
1; |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
__END__ |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=head1 NAME |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
Grammar::Graph - Graph representation of formal grammars |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=head1 SYNOPSIS |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
use Grammar::Graph; |
805
|
|
|
|
|
|
|
my $g = Grammar::Graph->from_grammar_formal($formal); |
806
|
|
|
|
|
|
|
my $symbols = $g->get_graph_attribute('symbol_table'); |
807
|
|
|
|
|
|
|
my $new_state = $g->fa_add_state(); |
808
|
|
|
|
|
|
|
... |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=head1 DESCRIPTION |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
Graph representation of formal grammars. |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
=head1 METHODS |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
=over |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=item C<from_grammar_formal($grammar_formal)> |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
Constructs a new C<Grammar::Graph> object from a L<Grammar::Formal> |
821
|
|
|
|
|
|
|
object. C<Grammar::Graph> derives from L<Graph>. The graph has a |
822
|
|
|
|
|
|
|
graph attribute C<symbol_table> with an entry for each rule identifying |
823
|
|
|
|
|
|
|
C<start_vertex>, C<final_vertex>, C<shortname>, and other properties. |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
=item C<fa_add_state($label)> |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Adds a new vertex to the graph and optionally labeles it with the |
828
|
|
|
|
|
|
|
supplied label. The vertex should be assumed to be a random integer. |
829
|
|
|
|
|
|
|
Care should be taken when adding vertices to the graph through other |
830
|
|
|
|
|
|
|
means to avoid clashes. |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=item C<fa_all_e_reachable($v)> |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
Returns the successors of $v and transitively any successors that can |
835
|
|
|
|
|
|
|
be reached without going over a vertex labeled by something other than |
836
|
|
|
|
|
|
|
C<Grammar::Formal::Empty>-derived objects. In other words, all the |
837
|
|
|
|
|
|
|
vertices that can be reached without going over an input symbol. |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
=item C<fa_expand_references()> |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
Modifies the graph such that vertices are no longer labeled with |
842
|
|
|
|
|
|
|
C<Grammar::Formal::Reference> nodes provided there is an entry for |
843
|
|
|
|
|
|
|
the referenced symbol in the Graph's C<symbol_table>. Recursive and |
844
|
|
|
|
|
|
|
cyclic references are linearised by vertices labeled with special |
845
|
|
|
|
|
|
|
C<Grammar::Graph::StartOf> and C<Grammar::Graph::FinalOf> nodes, and |
846
|
|
|
|
|
|
|
they in turn are protected by C<Grammar::Graph::Prefix> and linked |
847
|
|
|
|
|
|
|
C<Grammar::Graph::Suffix> nodes (the former identify the rule, the |
848
|
|
|
|
|
|
|
latter identify the reference) to ensure the nesting relationship |
849
|
|
|
|
|
|
|
can be fully recovered. |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=item C<fa_merge_character_classes()> |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
Vertices labeled with a C<Grammar::Formal::CharClass> node that share |
854
|
|
|
|
|
|
|
the same set of predecessors and successors are merged into a single |
855
|
|
|
|
|
|
|
vertex labeled with a C<Grammar::Formal::CharClass> node that is the |
856
|
|
|
|
|
|
|
union of original vertices. |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
=item C<fa_separate_character_classes()> |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
Collects all vertices labeled with a C<Grammar::Formal::CharClass> node |
861
|
|
|
|
|
|
|
in the graph and replaces them with vertices labeled with |
862
|
|
|
|
|
|
|
C<Grammar::Formal::CharClass> nodes such that an input symbol matches |
863
|
|
|
|
|
|
|
at most a single C<Grammar::Formal::CharClass>. |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
=item C<fa_remove_useless_epsilons()> |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
Removes vertices labeled with nothing or C<Grammar::Formal::Empty> nodes |
868
|
|
|
|
|
|
|
by connecting all predecessors to all successors directly. The check for |
869
|
|
|
|
|
|
|
C<Grammar::Formal::Empty> is exact, derived classes do not match. |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
=back |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
=head1 EXPORTS |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
None. |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
=head1 AUTHOR / COPYRIGHT / LICENSE |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
Copyright (c) 2014 Bjoern Hoehrmann <bjoern@hoehrmann.de>. |
880
|
|
|
|
|
|
|
This module is licensed under the same terms as Perl itself. |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
=cut |