line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
50
|
|
|
50
|
|
661
|
use v5.14; |
|
50
|
|
|
|
|
163
|
|
2
|
50
|
|
|
50
|
|
287
|
use warnings; |
|
50
|
|
|
|
|
114
|
|
|
50
|
|
|
|
|
2413
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
=head1 NAME |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
Attean::API::IDPJoinPlanner - Iterative dynamic programming query planning role |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 VERSION |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
This document describes Attean::API::IDPJoinPlanner version 0.032 |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 SYNOPSIS |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
extends 'Attean::QueryPlanner'; |
15
|
|
|
|
|
|
|
with 'Attean::API::IDPJoinPlanner'; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 DESCRIPTION |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
The Attean::API::IDPJoinPlanner role provides a query planner the |
20
|
|
|
|
|
|
|
C<< joins_for_plan_alternatives >> method, as well as the cost estimation |
21
|
|
|
|
|
|
|
methods that consume the L<Attean::API::CostPlanner> role. |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=over 4 |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=back |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 METHODS |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=over 4 |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=cut |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
use Types::Standard qw(CodeRef Object); |
36
|
50
|
|
|
50
|
|
288
|
|
|
50
|
|
|
|
|
92
|
|
|
50
|
|
|
|
|
368
|
|
37
|
|
|
|
|
|
|
use Moo::Role; |
38
|
50
|
|
|
50
|
|
27102
|
|
|
50
|
|
|
|
|
105
|
|
|
50
|
|
|
|
|
283
|
|
39
|
|
|
|
|
|
|
requires 'plan_for_algebra'; # plan_for_algebra($algebra, $model, \@default_graphs) |
40
|
|
|
|
|
|
|
has 'request_signer' => (is => 'rw', isa => Object); |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
use Scalar::Util qw(refaddr); |
44
|
|
|
|
|
|
|
use Types::Standard qw(CodeRef); |
45
|
50
|
|
|
50
|
|
20160
|
|
|
50
|
|
|
|
|
121
|
|
|
50
|
|
|
|
|
2475
|
|
46
|
50
|
|
|
50
|
|
298
|
use Moo::Role; |
|
50
|
|
|
|
|
138
|
|
|
50
|
|
|
|
|
250
|
|
47
|
|
|
|
|
|
|
use namespace::clean; |
48
|
50
|
|
|
50
|
|
20057
|
with 'Attean::API::QueryPlanner'; |
|
50
|
|
|
|
|
117
|
|
|
50
|
|
|
|
|
190
|
|
49
|
50
|
|
|
50
|
|
15845
|
|
|
50
|
|
|
|
|
106
|
|
|
50
|
|
|
|
|
457
|
|
50
|
|
|
|
|
|
|
requires 'plans_for_algebra'; # plans_for_algebra($algebra, $model, \@active_graphs, \@default_graphs) |
51
|
|
|
|
|
|
|
requires 'cost_for_plan'; # cost_for_plan($plan, $model) |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
before 'cost_for_plan' => sub { |
54
|
|
|
|
|
|
|
my $self = shift; |
55
|
|
|
|
|
|
|
my $plan = shift; |
56
|
|
|
|
|
|
|
my $model = shift; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
if (refaddr($self) == refaddr($model)) { |
59
|
|
|
|
|
|
|
Carp::confess "Model and planner objects cannot be the same in call to cost_for_plan"; |
60
|
|
|
|
|
|
|
} elsif ($self->does('Attean::API::Model') and $model->does('Attean::API::Model')) { |
61
|
|
|
|
|
|
|
Carp::confess "Model and planner objects cannot both consume Attean::API::Model in call to cost_for_plan"; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
}; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
my $self = shift; |
66
|
|
|
|
|
|
|
my $algebra = shift; |
67
|
|
|
|
|
|
|
my $model = shift; |
68
|
41
|
|
|
41
|
0
|
29526
|
my $default_graphs = shift; |
69
|
41
|
|
|
|
|
76
|
my $active_graphs = $default_graphs; |
70
|
41
|
|
|
|
|
61
|
my @plans = sort { $self->cost_for_plan($a, $model) <=> $self->cost_for_plan($b, $model) } $self->plans_for_algebra($algebra, $model, $active_graphs, $default_graphs); |
71
|
41
|
|
|
|
|
66
|
my $plan = shift(@plans); |
72
|
41
|
|
|
|
|
70
|
return $plan; |
73
|
41
|
|
|
|
|
234
|
} |
|
93
|
|
|
|
|
1718
|
|
74
|
41
|
|
|
|
|
974
|
} |
75
|
41
|
|
|
|
|
344
|
|
76
|
|
|
|
|
|
|
use Moo::Role; |
77
|
|
|
|
|
|
|
requires 'joins_for_plan_alternatives'; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
50
|
|
|
50
|
|
40859
|
use Math::Cartesian::Product; |
|
50
|
|
|
|
|
111
|
|
|
50
|
|
|
|
|
224
|
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
use Moo::Role; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
with 'Attean::API::JoinPlanner'; |
85
|
50
|
|
|
50
|
|
16774
|
with 'Attean::API::QueryPlanner'; |
|
50
|
|
|
|
|
141
|
|
|
50
|
|
|
|
|
2428
|
|
86
|
|
|
|
|
|
|
|
87
|
50
|
|
|
50
|
|
346
|
my $self = shift; |
|
50
|
|
|
|
|
133
|
|
|
50
|
|
|
|
|
264
|
|
88
|
|
|
|
|
|
|
my $model = shift; |
89
|
|
|
|
|
|
|
my $active_graphs = shift; |
90
|
|
|
|
|
|
|
my $default_graphs = shift; |
91
|
|
|
|
|
|
|
my $interesting = shift; |
92
|
|
|
|
|
|
|
my @args = @_; # each $args[$i] here is an array reference containing alternate plans for element $i |
93
|
47
|
|
|
47
|
0
|
95
|
|
94
|
47
|
|
|
|
|
79
|
my $plans = shift(@args); |
95
|
47
|
|
|
|
|
81
|
while (scalar(@args)) { |
96
|
47
|
|
|
|
|
74
|
my $next = shift(@args); |
97
|
47
|
|
|
|
|
79
|
my @plans = $self->join_plans($model, $active_graphs, $default_graphs, $plans, $next, 'inner'); |
98
|
47
|
|
|
|
|
105
|
$plans = \@plans; |
99
|
|
|
|
|
|
|
} |
100
|
47
|
|
|
|
|
89
|
|
101
|
47
|
|
|
|
|
137
|
my @plans = @$plans; |
102
|
35
|
|
|
|
|
67
|
return @plans; |
103
|
35
|
|
|
|
|
283
|
} |
104
|
35
|
|
|
|
|
130
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
use Types::Standard qw(Int); |
107
|
47
|
|
|
|
|
139
|
use Scalar::Util qw(blessed); |
108
|
47
|
|
|
|
|
235
|
|
109
|
|
|
|
|
|
|
use Moo::Role; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
with 'Attean::API::CostPlanner'; |
112
|
|
|
|
|
|
|
with 'MooX::Log::Any'; |
113
|
50
|
|
|
50
|
|
23816
|
|
|
50
|
|
|
|
|
123
|
|
|
50
|
|
|
|
|
306
|
|
114
|
50
|
|
|
50
|
|
21846
|
has 'keep' => (is => 'ro', isa => Int, default => 5); |
|
50
|
|
|
|
|
112
|
|
|
50
|
|
|
|
|
2024
|
|
115
|
|
|
|
|
|
|
|
116
|
50
|
|
|
50
|
|
284
|
around 'joins_for_plan_alternatives' => sub { |
|
50
|
|
|
|
|
150
|
|
|
50
|
|
|
|
|
213
|
|
117
|
|
|
|
|
|
|
my $orig = shift; |
118
|
|
|
|
|
|
|
my $self = shift; |
119
|
|
|
|
|
|
|
my $model = shift; |
120
|
|
|
|
|
|
|
my @plans = $orig->($self, $model, @_); |
121
|
|
|
|
|
|
|
return $self->prune_plans($model, [], \@plans); |
122
|
|
|
|
|
|
|
}; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
my $self = shift; |
125
|
|
|
|
|
|
|
my $model = shift; |
126
|
|
|
|
|
|
|
my $interesting = shift; |
127
|
|
|
|
|
|
|
my @plans = @{ shift || [] }; |
128
|
|
|
|
|
|
|
no sort 'stable'; |
129
|
|
|
|
|
|
|
my @sorted = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [$self->cost_for_plan($_, $model), $_] } @plans; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
return ($self->keep) ? splice(@sorted, 0, $self->keep) : @sorted; |
132
|
26
|
|
|
26
|
0
|
49
|
} |
133
|
26
|
|
|
|
|
36
|
|
134
|
26
|
|
|
|
|
40
|
my $self = shift; |
135
|
26
|
50
|
|
|
|
46
|
my $plan = shift; |
|
26
|
|
|
|
|
92
|
|
136
|
50
|
|
|
50
|
|
48352
|
my $model = shift; |
|
50
|
|
|
|
|
25636
|
|
|
50
|
|
|
|
|
286
|
|
137
|
26
|
|
|
|
|
63
|
Carp::confess "No model given" unless (blessed($model) and $model->does('Attean::API::Model')); |
|
691
|
|
|
|
|
889
|
|
|
3668
|
|
|
|
|
4005
|
|
|
691
|
|
|
|
|
10865
|
|
138
|
|
|
|
|
|
|
|
139
|
26
|
50
|
|
|
|
1572
|
if ($plan->has_cost) { |
140
|
|
|
|
|
|
|
return $plan->cost; |
141
|
|
|
|
|
|
|
} else { |
142
|
|
|
|
|
|
|
if ($model->does('Attean::API::CostPlanner')) { |
143
|
2082
|
|
|
2082
|
0
|
112190
|
if (defined(my $cost = $model->cost_for_plan($plan, $self))) { |
144
|
2082
|
|
|
|
|
2471
|
$plan->cost($cost); |
145
|
2082
|
|
|
|
|
2478
|
$self->log->info('Model \''.ref($model).'\' did cost planning for \''.ref($plan).'\' and got cost '.$cost); |
146
|
2082
|
50
|
33
|
|
|
8357
|
return $cost; |
147
|
|
|
|
|
|
|
} |
148
|
2082
|
100
|
|
|
|
29314
|
} |
149
|
1283
|
|
|
|
|
18246
|
|
150
|
|
|
|
|
|
|
my $cost = 1; |
151
|
799
|
50
|
|
|
|
1422
|
my @children = @{ $plan->children }; |
152
|
799
|
100
|
|
|
|
21427
|
if ($plan->isa('Attean::Plan::Quad')) { |
153
|
27
|
|
|
|
|
550
|
my @vars = map { $_->value } grep { blessed($_) and $_->does('Attean::API::Variable') } $plan->values; |
154
|
27
|
|
|
|
|
1411
|
return scalar(@vars); |
155
|
27
|
|
|
|
|
394
|
} elsif ($plan->isa('Attean::Plan::Table')) { |
156
|
|
|
|
|
|
|
my $rows = $plan->rows; |
157
|
|
|
|
|
|
|
$cost = scalar(@$rows); |
158
|
|
|
|
|
|
|
} elsif ($plan->isa('Attean::Plan::NestedLoopJoin')) { |
159
|
772
|
|
|
|
|
5609
|
my $lcost = $self->cost_for_plan($children[0], $model); |
160
|
772
|
|
|
|
|
943
|
my $rcost = $self->cost_for_plan($children[1], $model); |
|
772
|
|
|
|
|
2978
|
|
161
|
772
|
50
|
|
|
|
4010
|
if ($lcost == 0) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
162
|
0
|
0
|
|
|
|
0
|
$cost = $rcost; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
163
|
0
|
|
|
|
|
0
|
} elsif ($rcost == 0) { |
164
|
|
|
|
|
|
|
$cost = $lcost; |
165
|
0
|
|
|
|
|
0
|
} else { |
166
|
0
|
|
|
|
|
0
|
$cost = $lcost * $rcost; |
167
|
|
|
|
|
|
|
} |
168
|
408
|
|
|
|
|
6978
|
|
169
|
408
|
|
|
|
|
10842
|
# a cartesian nested loop join is bad, but the algorithm already |
170
|
408
|
100
|
|
|
|
5534
|
# has to check for all possible joins, so it's not as bad as |
|
|
50
|
|
|
|
|
|
171
|
26
|
|
|
|
|
43
|
# a cartesian hash join (below) |
172
|
|
|
|
|
|
|
$cost *= 10 unless ($plan->children_are_variable_connected); |
173
|
0
|
|
|
|
|
0
|
} elsif ($plan->isa('Attean::Plan::HashJoin')) { |
174
|
|
|
|
|
|
|
my $joined = $plan->children_are_variable_connected; |
175
|
382
|
|
|
|
|
575
|
my $lcost = $self->cost_for_plan($children[0], $model); |
176
|
|
|
|
|
|
|
my $rcost = $self->cost_for_plan($children[1], $model); |
177
|
|
|
|
|
|
|
$cost = ($lcost + $rcost); |
178
|
|
|
|
|
|
|
$cost += ($lcost < $rcost); # To let the plan with cheaper rhs win |
179
|
|
|
|
|
|
|
$cost *= 100 unless ($plan->children_are_variable_connected); |
180
|
|
|
|
|
|
|
} elsif ($plan->isa('Attean::Plan::Service')) { |
181
|
408
|
100
|
|
|
|
1010
|
my $scost = 10; |
182
|
|
|
|
|
|
|
foreach my $c (@{ $plan->children }) { |
183
|
356
|
|
|
|
|
827
|
$scost += $self->cost_for_plan($c, $model); |
184
|
356
|
|
|
|
|
6872
|
} |
185
|
356
|
|
|
|
|
10411
|
$cost = 5 * $scost; |
186
|
356
|
|
|
|
|
4859
|
} elsif ($plan->isa('Attean::Plan::Unique')) { |
187
|
356
|
|
|
|
|
445
|
$cost = 0; # consider a filter on the iterator (like unique) to be essentially free |
188
|
356
|
50
|
|
|
|
883
|
foreach my $c (@{ $plan->children }) { |
189
|
|
|
|
|
|
|
$cost += $self->cost_for_plan($c, $model); |
190
|
3
|
|
|
|
|
6
|
} |
191
|
3
|
|
|
|
|
6
|
} else { |
|
3
|
|
|
|
|
10
|
|
192
|
0
|
|
|
|
|
0
|
foreach my $c (@{ $plan->children }) { |
193
|
|
|
|
|
|
|
$cost += $self->cost_for_plan($c, $model); |
194
|
3
|
|
|
|
|
8
|
} |
195
|
|
|
|
|
|
|
} |
196
|
0
|
|
|
|
|
0
|
|
197
|
0
|
|
|
|
|
0
|
$plan->cost($cost); |
|
0
|
|
|
|
|
0
|
|
198
|
0
|
|
|
|
|
0
|
if ($self->log->is_trace) { |
199
|
|
|
|
|
|
|
$self->log->trace("Cost $cost estimated for\n".$plan->as_string); |
200
|
|
|
|
|
|
|
} else { |
201
|
5
|
|
|
|
|
7
|
$self->log->debug('Estimated cost for \''.ref($plan).'\' is '.$cost); |
|
5
|
|
|
|
|
12
|
|
202
|
5
|
|
|
|
|
85
|
} |
203
|
|
|
|
|
|
|
return $cost; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
} |
206
|
772
|
|
|
|
|
12336
|
} |
207
|
772
|
50
|
|
|
|
26285
|
|
208
|
0
|
|
|
|
|
0
|
use Encode qw(encode); |
209
|
|
|
|
|
|
|
use Attean::RDF; |
210
|
772
|
|
|
|
|
18408
|
use LWP::UserAgent; |
211
|
|
|
|
|
|
|
use Scalar::Util qw(blessed reftype); |
212
|
772
|
|
|
|
|
8997
|
use List::Util qw(reduce); |
213
|
|
|
|
|
|
|
use List::MoreUtils qw(all any); |
214
|
|
|
|
|
|
|
use Types::Standard qw(Int ConsumerOf InstanceOf); |
215
|
|
|
|
|
|
|
use URI::Escape; |
216
|
|
|
|
|
|
|
use Algorithm::Combinatorics qw(subsets); |
217
|
|
|
|
|
|
|
use List::Util qw(min); |
218
|
50
|
|
|
50
|
|
28632
|
use Math::Cartesian::Product; |
|
50
|
|
|
|
|
161
|
|
|
50
|
|
|
|
|
2561
|
|
219
|
50
|
|
|
50
|
|
354
|
|
|
50
|
|
|
|
|
124
|
|
|
50
|
|
|
|
|
407
|
|
220
|
50
|
|
|
50
|
|
37536
|
use Moo::Role; |
|
50
|
|
|
|
|
105
|
|
|
50
|
|
|
|
|
1089
|
|
221
|
50
|
|
|
50
|
|
261
|
|
|
50
|
|
|
|
|
96
|
|
|
50
|
|
|
|
|
2210
|
|
222
|
50
|
|
|
50
|
|
313
|
with 'Attean::API::JoinPlanner'; |
|
50
|
|
|
|
|
104
|
|
|
50
|
|
|
|
|
2592
|
|
223
|
50
|
|
|
50
|
|
281
|
with 'Attean::API::SimpleCostPlanner'; |
|
50
|
|
|
|
|
97
|
|
|
50
|
|
|
|
|
360
|
|
224
|
50
|
|
|
50
|
|
49308
|
|
|
50
|
|
|
|
|
114
|
|
|
50
|
|
|
|
|
310
|
|
225
|
50
|
|
|
50
|
|
31726
|
my $self = shift; |
|
50
|
|
|
|
|
134
|
|
|
50
|
|
|
|
|
2456
|
|
226
|
50
|
|
|
50
|
|
24797
|
my $model = shift; |
|
50
|
|
|
|
|
161108
|
|
|
50
|
|
|
|
|
3105
|
|
227
|
50
|
|
|
50
|
|
424
|
my $active_graphs = shift; |
|
50
|
|
|
|
|
110
|
|
|
50
|
|
|
|
|
2114
|
|
228
|
50
|
|
|
50
|
|
289
|
my $default_graphs = shift; |
|
50
|
|
|
|
|
103
|
|
|
50
|
|
|
|
|
1643
|
|
229
|
|
|
|
|
|
|
my $interesting = shift; |
230
|
50
|
|
|
50
|
|
270
|
my @args = @_; # each $args[$i] here is an array reference containing alternate plans for element $i |
|
50
|
|
|
|
|
135
|
|
|
50
|
|
|
|
|
362
|
|
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
my $k = 3; # this is the batch size over which to do full dynamic programming |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# initialize $optPlan{$i} to be a set of alternate plans for evaluating element $i |
235
|
|
|
|
|
|
|
my %optPlan; |
236
|
25
|
|
|
25
|
0
|
37
|
foreach my $i (0 .. $#args) { |
237
|
25
|
|
|
|
|
34
|
$optPlan{$i} = [$self->prune_plans($model, $interesting, $args[$i])]; |
238
|
25
|
|
|
|
|
36
|
} |
239
|
25
|
|
|
|
|
33
|
|
240
|
25
|
|
|
|
|
40
|
my @todo = (0 .. $#args); # initialize the todo list to all elements |
241
|
25
|
|
|
|
|
71
|
my $next_symbol = 'a'; # when we start batching together sub-plans, we'll rename them with letters (e.g. elements 1, 2, and 4 might become 'a', and then 3, 5, and 'a' become 'b') |
242
|
|
|
|
|
|
|
|
243
|
25
|
|
|
|
|
40
|
# until we've joined all the elements in todo and are left with a set of plans for the join of all elements |
244
|
|
|
|
|
|
|
while (scalar(@todo) > 1) { |
245
|
|
|
|
|
|
|
$k = ($k < scalar(@todo)) ? $k : scalar(@todo); # in case we're joining fewer than the batch size |
246
|
25
|
|
|
|
|
38
|
foreach my $i (2 .. $k) { # we've already initialized plans for evaluating single elements; now consider plans for groups of elements (with group sizes 2, 3, ..., $k) |
247
|
25
|
|
|
|
|
77
|
foreach my $s (subsets(\@todo, $i)) { # pick a subset of size $i of the elements that need to be planned |
248
|
39
|
|
|
|
|
105
|
my $s_key = join('.', sort @$s); |
249
|
|
|
|
|
|
|
$optPlan{$s_key} = []; |
250
|
|
|
|
|
|
|
foreach my $o (subsets($s)) { # partition the subset s into two (o and not_o) |
251
|
25
|
|
|
|
|
65
|
next if (scalar(@$o) == 0); # only consider proper, non-empty subsets |
252
|
25
|
|
|
|
|
52
|
next if (scalar(@$o) == scalar(@$s)); # only consider proper, non-empty subsets |
253
|
|
|
|
|
|
|
my $o_key = join('.', sort @$o); |
254
|
|
|
|
|
|
|
my %o = map { $_ => 1 } @$o; |
255
|
25
|
|
|
|
|
67
|
my $not_o_key = join('.', sort grep { not exists $o{$_} } @$s); |
256
|
13
|
50
|
|
|
|
46
|
|
257
|
13
|
|
|
|
|
39
|
my $lhs = $optPlan{$o_key}; # get the plans for evaluating o |
258
|
14
|
|
|
|
|
64
|
my $rhs = $optPlan{$not_o_key}; # get the plans for evaluating not_o |
259
|
16
|
|
|
|
|
1291
|
|
260
|
16
|
|
|
|
|
42
|
# compute and store all the possible ways to evaluate s (o ⋈ not_o) |
261
|
16
|
|
|
|
|
45
|
push(@{ $optPlan{$s_key} }, $self->join_plans($model, $active_graphs, $default_graphs, $lhs, $rhs, 'inner')); |
262
|
68
|
100
|
|
|
|
1410
|
$optPlan{$s_key} = [$self->prune_plans($model, $interesting, $optPlan{$s_key})]; |
263
|
52
|
100
|
|
|
|
120
|
} |
264
|
36
|
|
|
|
|
93
|
} |
265
|
36
|
|
|
|
|
61
|
} |
|
39
|
|
|
|
|
124
|
|
266
|
36
|
|
|
|
|
77
|
|
|
78
|
|
|
|
|
171
|
|
267
|
|
|
|
|
|
|
# find the minimum cost plan $p that computes the join over $k elements (the elements end up in @v) |
268
|
36
|
|
|
|
|
73
|
my %min_plans; |
269
|
36
|
|
|
|
|
54
|
foreach my $w (subsets(\@todo, $k)) { |
270
|
|
|
|
|
|
|
my $w_key = join('.', sort @$w); |
271
|
|
|
|
|
|
|
my $plans = $optPlan{$w_key}; |
272
|
36
|
|
|
|
|
44
|
my @costs = map { $self->cost_for_plan($_, $model) => [$_, $w] } @$plans; |
|
36
|
|
|
|
|
152
|
|
273
|
36
|
|
|
|
|
144
|
my %costs = @costs; |
274
|
|
|
|
|
|
|
my $min = min keys %costs; |
275
|
|
|
|
|
|
|
my @min_plans; |
276
|
|
|
|
|
|
|
while (my ($cost, $data) = splice(@costs, 0, 2)) { |
277
|
|
|
|
|
|
|
if ($cost == $min) { |
278
|
|
|
|
|
|
|
push(@min_plans, $data); |
279
|
13
|
|
|
|
|
28
|
} |
280
|
13
|
|
|
|
|
52
|
} |
281
|
13
|
|
|
|
|
1038
|
$min_plans{ $min } = \@min_plans; |
282
|
13
|
|
|
|
|
32
|
} |
283
|
13
|
|
|
|
|
41
|
my $min_cost = min keys %min_plans; |
|
60
|
|
|
|
|
1133
|
|
284
|
13
|
|
|
|
|
124
|
my $min_plans = $min_plans{$min_cost}; |
285
|
13
|
|
|
|
|
92
|
my @min_plans; |
286
|
13
|
|
|
|
|
36
|
my $min_key; |
287
|
13
|
|
|
|
|
55
|
foreach my $d (@$min_plans) { |
288
|
60
|
100
|
|
|
|
122
|
my ($p, $v) = @$d; |
289
|
51
|
|
|
|
|
119
|
my $v_key = join('.', sort @$v); |
290
|
|
|
|
|
|
|
if (not(defined($min_key)) or $min_key eq $v_key) { |
291
|
|
|
|
|
|
|
push(@min_plans, $p); |
292
|
13
|
|
|
|
|
52
|
$min_key = $v_key; |
293
|
|
|
|
|
|
|
} |
294
|
13
|
|
|
|
|
46
|
} |
295
|
13
|
|
|
|
|
27
|
# my ($p, $v) = @$min_plan; |
296
|
13
|
|
|
|
|
35
|
# my $v_key = join('.', sort @$v); |
297
|
|
|
|
|
|
|
# warn "Choosing join for $v_key\n"; |
298
|
13
|
|
|
|
|
35
|
|
299
|
51
|
|
|
|
|
105
|
# generate a new symbol $t to stand in for $p, the join over the elements in @v |
300
|
51
|
|
|
|
|
114
|
my $t = $next_symbol++; |
301
|
51
|
50
|
66
|
|
|
146
|
|
302
|
51
|
|
|
|
|
65
|
# remove elements in @v from the todo list, and replace them by the new composite element $t |
303
|
51
|
|
|
|
|
75
|
$optPlan{$t} = [@min_plans]; |
304
|
|
|
|
|
|
|
my %v = map { $_ => 1 } split(/[.]/, $min_key); |
305
|
|
|
|
|
|
|
push(@todo, $t); |
306
|
|
|
|
|
|
|
@todo = grep { not exists $v{$_} } @todo; |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
# also remove subsets of @v from the optPlan hash as they are now covered by $optPlan{$t} |
309
|
|
|
|
|
|
|
foreach my $o (subsets([keys %v])) { |
310
|
|
|
|
|
|
|
my $o_key = join('.', sort @$o); |
311
|
13
|
|
|
|
|
44
|
# warn "deleting $o_key\n"; |
312
|
|
|
|
|
|
|
delete $optPlan{$o_key}; |
313
|
|
|
|
|
|
|
} |
314
|
13
|
|
|
|
|
36
|
} |
315
|
13
|
|
|
|
|
60
|
|
|
27
|
|
|
|
|
68
|
|
316
|
13
|
|
|
|
|
35
|
my $final_key = join('.', sort @todo); |
317
|
13
|
|
|
|
|
28
|
# use Data::Dumper; |
|
40
|
|
|
|
|
95
|
|
318
|
|
|
|
|
|
|
# warn Dumper($optPlan{$final_key}); |
319
|
|
|
|
|
|
|
return $self->prune_plans($model, $interesting, $optPlan{$final_key}); |
320
|
13
|
|
|
|
|
55
|
} |
321
|
56
|
|
|
|
|
1196
|
|
322
|
|
|
|
|
|
|
my $self = shift; |
323
|
56
|
|
|
|
|
214
|
my $model = shift; |
324
|
|
|
|
|
|
|
my $interesting = shift; |
325
|
|
|
|
|
|
|
my @plans = @{ shift || [] }; |
326
|
|
|
|
|
|
|
no sort 'stable'; |
327
|
25
|
|
|
|
|
66
|
my @sorted = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [$self->cost_for_plan($_, $model), $_] } @plans; |
328
|
|
|
|
|
|
|
if ($self->log->is_trace) { |
329
|
|
|
|
|
|
|
$self->log->trace('============= Plan iteration separator =============='); |
330
|
25
|
|
|
|
|
71
|
foreach my $plan (@sorted){ |
331
|
|
|
|
|
|
|
$self->log->trace("Cost: " . $self->cost_for_plan($plan, $model) . " for plan:\n". $plan->as_string); |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
} |
334
|
125
|
|
|
125
|
0
|
202
|
return splice(@sorted, 0, 5); |
335
|
125
|
|
|
|
|
154
|
} |
336
|
125
|
|
|
|
|
155
|
|
337
|
125
|
50
|
|
|
|
149
|
# Return a cost value for $plan. This value is basically opaque, except |
|
125
|
|
|
|
|
368
|
|
338
|
50
|
|
|
50
|
|
54966
|
# that it will be used to sort plans by cost when determining which is the |
|
50
|
|
|
|
|
141
|
|
|
50
|
|
|
|
|
355
|
|
339
|
125
|
|
|
|
|
219
|
# cheapest plan to evaluate. |
|
470
|
|
|
|
|
887
|
|
|
810
|
|
|
|
|
1144
|
|
|
470
|
|
|
|
|
8051
|
|
340
|
125
|
50
|
|
|
|
1918
|
my $self = shift; |
341
|
0
|
|
|
|
|
0
|
my $plan = shift; |
342
|
0
|
|
|
|
|
0
|
my $model = shift; |
343
|
0
|
|
|
|
|
0
|
Carp::confess "No model given" unless (blessed($model) and $model->does('Attean::API::Model')); |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
if ($plan->has_cost) { |
346
|
125
|
|
|
|
|
2525
|
return $plan->cost; |
347
|
|
|
|
|
|
|
} else { |
348
|
|
|
|
|
|
|
if ($model->does('Attean::API::CostPlanner')) { |
349
|
|
|
|
|
|
|
if (defined(my $cost = $model->cost_for_plan($plan, $self))) { |
350
|
|
|
|
|
|
|
$plan->cost($cost); |
351
|
|
|
|
|
|
|
$self->log->info('Model \''.ref($model).'\' did cost planning for \''.ref($plan).'\' and got cost '.$cost); |
352
|
|
|
|
|
|
|
return $cost; |
353
|
1075
|
|
|
1075
|
0
|
38250
|
} |
354
|
1075
|
|
|
|
|
1333
|
} |
355
|
1075
|
|
|
|
|
1256
|
|
356
|
1075
|
50
|
33
|
|
|
3782
|
my $cost = 1; |
357
|
|
|
|
|
|
|
my @children = @{ $plan->children }; |
358
|
1075
|
100
|
|
|
|
15343
|
if ($plan->isa('Attean::Plan::Quad')) { |
359
|
796
|
|
|
|
|
10777
|
my @vars = map { $_->value } grep { blessed($_) and $_->does('Attean::API::Variable') } $plan->values; |
360
|
|
|
|
|
|
|
# This gives a cost increasing at a reasonable pace |
361
|
279
|
50
|
|
|
|
531
|
$cost = $self->_hsp_heuristic_triple_sum($plan) * scalar(@vars); |
362
|
279
|
100
|
|
|
|
7763
|
} elsif ($plan->isa('Attean::Plan::Table')) { |
363
|
29
|
|
|
|
|
650
|
my $rows = $plan->rows; |
364
|
29
|
|
|
|
|
1104
|
$cost = scalar(@$rows); |
365
|
29
|
|
|
|
|
430
|
} elsif ($plan->isa('Attean::Plan::NestedLoopJoin')) { |
366
|
|
|
|
|
|
|
my $lcost = $self->cost_for_plan($children[0], $model); |
367
|
|
|
|
|
|
|
my $rcost = $self->cost_for_plan($children[1], $model); |
368
|
|
|
|
|
|
|
if ($lcost == 0) { |
369
|
250
|
|
|
|
|
1920
|
$cost = $rcost; |
370
|
250
|
|
|
|
|
318
|
} elsif ($rcost == 0) { |
|
250
|
|
|
|
|
810
|
|
371
|
250
|
100
|
|
|
|
1510
|
$cost = $lcost; |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
372
|
4
|
100
|
|
|
|
10
|
} else { |
|
4
|
|
|
|
|
14
|
|
|
16
|
|
|
|
|
219
|
|
373
|
|
|
|
|
|
|
my $mult = $self->_penalize_joins($plan); |
374
|
4
|
|
|
|
|
34
|
# warn "$mult * ($lcost * $rcost) [$children[0] $children[1]]"; |
375
|
|
|
|
|
|
|
$cost = $mult * $lcost * $rcost; |
376
|
0
|
|
|
|
|
0
|
} |
377
|
0
|
|
|
|
|
0
|
} elsif ($plan->isa('Attean::Plan::HashJoin')) { |
378
|
|
|
|
|
|
|
my $lcost = $self->cost_for_plan($children[0], $model); |
379
|
124
|
|
|
|
|
2136
|
my $rcost = $self->cost_for_plan($children[1], $model); |
380
|
124
|
|
|
|
|
2270
|
if ($lcost == 0) { |
381
|
124
|
100
|
|
|
|
761
|
$cost = $rcost; |
|
|
100
|
|
|
|
|
|
382
|
2
|
|
|
|
|
4
|
} elsif ($rcost == 0) { |
383
|
|
|
|
|
|
|
$cost = $lcost; |
384
|
2
|
|
|
|
|
5
|
} else { |
385
|
|
|
|
|
|
|
my $mult = $self->_penalize_joins($plan); |
386
|
120
|
|
|
|
|
248
|
# warn "$mult * ($lcost + $rcost)"; |
387
|
|
|
|
|
|
|
$cost = $mult * ($lcost + $rcost); |
388
|
120
|
|
|
|
|
197
|
$cost += ($lcost < $rcost); # To let the plan with cheaper rhs win |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
} elsif ($plan->isa('Attean::Plan::Service')) { |
391
|
104
|
|
|
|
|
1790
|
my $scost = 10; |
392
|
104
|
|
|
|
|
1901
|
foreach my $c (@{ $plan->children }) { |
393
|
104
|
50
|
|
|
|
663
|
$scost += $self->cost_for_plan($c, $model); |
|
|
50
|
|
|
|
|
|
394
|
0
|
|
|
|
|
0
|
} |
395
|
|
|
|
|
|
|
$cost = 5 * $scost; |
396
|
0
|
|
|
|
|
0
|
} elsif ($plan->isa('Attean::Plan::Unique')) { |
397
|
|
|
|
|
|
|
$cost = 0; # consider a filter on the iterator (like unique) to be essentially free |
398
|
104
|
|
|
|
|
218
|
foreach my $c (@{ $plan->children }) { |
399
|
|
|
|
|
|
|
$cost += $self->cost_for_plan($c, $model); |
400
|
104
|
|
|
|
|
184
|
} |
401
|
104
|
|
|
|
|
129
|
} else { |
402
|
|
|
|
|
|
|
foreach my $c (@{ $plan->children }) { |
403
|
|
|
|
|
|
|
$cost += $self->cost_for_plan($c, $model); |
404
|
1
|
|
|
|
|
2
|
} |
405
|
1
|
|
|
|
|
3
|
} |
|
1
|
|
|
|
|
5
|
|
406
|
0
|
|
|
|
|
0
|
|
407
|
|
|
|
|
|
|
# Costs must be integers for comparisons to work in the IDP planning algorithm |
408
|
1
|
|
|
|
|
3
|
$cost = int($cost); |
409
|
|
|
|
|
|
|
$plan->cost($cost); |
410
|
0
|
|
|
|
|
0
|
return $cost; |
411
|
0
|
|
|
|
|
0
|
} |
|
0
|
|
|
|
|
0
|
|
412
|
0
|
|
|
|
|
0
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
|
415
|
17
|
|
|
|
|
19
|
# The below function finds a number to aid sorting |
|
17
|
|
|
|
|
34
|
|
416
|
17
|
|
|
|
|
292
|
# It takes into account Heuristic 1 and 4 of the HSP paper, see REFERENCES |
417
|
|
|
|
|
|
|
# as well as that it was noted in the text that rdf:type is usually less selective. |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
# By assigning the integers to nodes, depending on whether they are in |
420
|
|
|
|
|
|
|
# triple (subject, predicate, object), variables, rdf:type and |
421
|
250
|
|
|
|
|
396
|
# literals, and sum them, they may be sorted. See code for the actual |
422
|
250
|
|
|
|
|
3903
|
# values used. |
423
|
250
|
|
|
|
|
6004
|
|
424
|
|
|
|
|
|
|
# Denoting s for bound subject, p for bound predicate, a for rdf:type |
425
|
|
|
|
|
|
|
# as predicate, o for bound object and l for literal object and ? for |
426
|
|
|
|
|
|
|
# variable, we get the following order, most of which are identical to |
427
|
|
|
|
|
|
|
# the HSP: |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
# spl: 6 |
430
|
|
|
|
|
|
|
# spo: 8 |
431
|
|
|
|
|
|
|
# sao: 10 |
432
|
|
|
|
|
|
|
# s?l: 14 |
433
|
|
|
|
|
|
|
# s?o: 16 |
434
|
|
|
|
|
|
|
# ?pl: 25 |
435
|
|
|
|
|
|
|
# ?po: 27 |
436
|
|
|
|
|
|
|
# ?ao: 29 |
437
|
|
|
|
|
|
|
# sp?: 30 |
438
|
|
|
|
|
|
|
# sa?: 32 |
439
|
|
|
|
|
|
|
# ??l: 33 |
440
|
|
|
|
|
|
|
# ??o: 35 |
441
|
|
|
|
|
|
|
# s??: 38 |
442
|
|
|
|
|
|
|
# ?p?: 49 |
443
|
|
|
|
|
|
|
# ?a?: 51 |
444
|
|
|
|
|
|
|
# ???: 57 |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# Note that this number is not intended as an estimate of selectivity, |
447
|
|
|
|
|
|
|
# merely a sorting key, but further research may possibly create such |
448
|
|
|
|
|
|
|
# numbers. |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
my ($self, $t) = @_; |
451
|
|
|
|
|
|
|
my $sum = 0; |
452
|
|
|
|
|
|
|
if ($t->subject->does('Attean::API::Variable')) { |
453
|
|
|
|
|
|
|
$sum = 20; |
454
|
|
|
|
|
|
|
} else { |
455
|
|
|
|
|
|
|
$sum = 1; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
if ($t->predicate->does('Attean::API::Variable')) { |
458
|
|
|
|
|
|
|
$sum += 10; |
459
|
|
|
|
|
|
|
} else { |
460
|
|
|
|
|
|
|
if ($t->predicate->equals(iri('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'))) { |
461
|
|
|
|
|
|
|
$sum += 4; |
462
|
|
|
|
|
|
|
} else { |
463
|
|
|
|
|
|
|
$sum += 2; |
464
|
4
|
|
|
4
|
|
9
|
} |
465
|
4
|
|
|
|
|
7
|
} |
466
|
4
|
50
|
|
|
|
16
|
if ($t->object->does('Attean::API::Variable')) { |
467
|
0
|
|
|
|
|
0
|
$sum += 27; |
468
|
|
|
|
|
|
|
} elsif ($t->object->does('Attean::API::Literal')) { |
469
|
4
|
|
|
|
|
76
|
$sum += 3; |
470
|
|
|
|
|
|
|
} else { |
471
|
4
|
50
|
|
|
|
13
|
$sum += 5; |
472
|
0
|
|
|
|
|
0
|
} |
473
|
|
|
|
|
|
|
return $sum; |
474
|
4
|
50
|
|
|
|
72
|
} |
475
|
0
|
|
|
|
|
0
|
|
476
|
|
|
|
|
|
|
# The following method returns a factor used to penalize certain types of joins. |
477
|
4
|
|
|
|
|
6
|
# It penalizes cartesian joins heavily, but also uses HSP Heuristic 2 (see REFERENCES) |
478
|
|
|
|
|
|
|
my ($self, $plan) = @_; |
479
|
|
|
|
|
|
|
my $jv = $plan->join_variables; |
480
|
4
|
50
|
|
|
|
22
|
my @children = @{ $plan->children }; |
|
|
0
|
|
|
|
|
|
481
|
4
|
|
|
|
|
59
|
my $mult = 1; |
482
|
|
|
|
|
|
|
if (scalar(@$jv)) { |
483
|
0
|
|
|
|
|
0
|
if ( all { $_->isa('Attean::Plan::Quad') } @children[0..1]) { |
484
|
|
|
|
|
|
|
my $var = ${$jv}[0]; # We will join on this |
485
|
0
|
|
|
|
|
0
|
my @lnodes = $children[0]->values; |
486
|
|
|
|
|
|
|
my @rnodes = $children[1]->values; |
487
|
4
|
|
|
|
|
11
|
# Now, find where the join variables are in the triple patterns |
488
|
|
|
|
|
|
|
my %joinpos; |
489
|
|
|
|
|
|
|
for (my $i = 0; $i <= 2; $i++) { |
490
|
|
|
|
|
|
|
if ($lnodes[$i]->does('Attean::API::Variable') && $lnodes[$i]->value eq $var) { |
491
|
|
|
|
|
|
|
$joinpos{l} = $i; |
492
|
|
|
|
|
|
|
} |
493
|
224
|
|
|
224
|
|
331
|
if ($rnodes[$i]->does('Attean::API::Variable') && $rnodes[$i]->value eq $var) { |
494
|
224
|
|
|
|
|
408
|
$joinpos{r} = $i; |
495
|
224
|
|
|
|
|
268
|
} |
|
224
|
|
|
|
|
441
|
|
496
|
224
|
|
|
|
|
342
|
last if scalar keys(%joinpos) >= 2; # Perhaps a bit premature optimization |
497
|
224
|
100
|
|
|
|
357
|
} |
498
|
208
|
100
|
|
344
|
|
1021
|
my $joinpos = join("", sort values(%joinpos)); # We can now match on this string |
|
344
|
|
|
|
|
975
|
|
499
|
64
|
|
|
|
|
84
|
my %costs = ('12' => 1.1, # The penalty numbers come mostly out from thin air |
|
64
|
|
|
|
|
115
|
|
500
|
64
|
|
|
|
|
174
|
'01' => 1.2, |
501
|
64
|
|
|
|
|
168
|
'02' => 1.5, |
502
|
|
|
|
|
|
|
'22' => 1.6, |
503
|
64
|
|
|
|
|
106
|
'00' => 1.8, |
504
|
64
|
|
|
|
|
134
|
'11' => 2); |
505
|
64
|
50
|
33
|
|
|
193
|
if (exists $costs{$joinpos}) { |
506
|
64
|
|
|
|
|
1041
|
$mult = $costs{$joinpos}; |
507
|
|
|
|
|
|
|
} |
508
|
64
|
50
|
33
|
|
|
115
|
#warn "Penalty: $mult for quads:\n" . $children[0]->as_string . $children[1]->as_string |
509
|
64
|
|
|
|
|
793
|
} |
510
|
|
|
|
|
|
|
} else { |
511
|
64
|
50
|
|
|
|
151
|
$mult = 5; # penalize cartesian joins |
512
|
|
|
|
|
|
|
} |
513
|
64
|
|
|
|
|
241
|
return $mult; |
514
|
64
|
|
|
|
|
214
|
} |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
1; |
519
|
|
|
|
|
|
|
|
520
|
64
|
50
|
|
|
|
111
|
|
521
|
64
|
|
|
|
|
198
|
=back |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=head1 BUGS |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
Please report any bugs or feature requests to through the GitHub web interface |
526
|
16
|
|
|
|
|
20
|
at L<https://github.com/kasei/attean/issues>. |
527
|
|
|
|
|
|
|
|
528
|
224
|
|
|
|
|
626
|
=head1 REFERENCES |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
The seminal reference for Iterative Dynamic Programming is "Iterative |
531
|
|
|
|
|
|
|
dynamic programming: a new class of query optimization algorithms" by |
532
|
|
|
|
|
|
|
D. Kossmann and K. Stocker, ACM Transactions on Database Systems |
533
|
|
|
|
|
|
|
(2000). |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
The heuristics to order triple patterns in this module is |
536
|
|
|
|
|
|
|
influenced by L<The ICS-FORTH Heuristics-based SPARQL Planner |
537
|
|
|
|
|
|
|
(HSP)|http://www.ics.forth.gr/isl/index_main.php?l=e&c=645>. |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=head1 SEE ALSO |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=head1 AUTHOR |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
Gregory Todd Williams C<< <gwilliams@cpan.org> >> |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=head1 COPYRIGHT |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
Copyright (c) 2014--2022 Gregory Todd Williams. |
551
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
552
|
|
|
|
|
|
|
the same terms as Perl itself. |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=cut |