line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
50
|
|
|
50
|
|
614
|
use v5.14; |
|
50
|
|
|
|
|
154
|
|
2
|
50
|
|
|
50
|
|
274
|
use warnings; |
|
50
|
|
|
|
|
102
|
|
|
50
|
|
|
|
|
2269
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
=head1 NAME |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
Attean::API::IDPJoinPlanner - Iterative dynamic programming query planning role |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 VERSION |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
This document describes Attean::API::IDPJoinPlanner version 0.032 |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 SYNOPSIS |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
extends 'Attean::QueryPlanner'; |
15
|
|
|
|
|
|
|
with 'Attean::API::IDPJoinPlanner'; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 DESCRIPTION |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
The Attean::API::IDPJoinPlanner role provides a query planner the |
20
|
|
|
|
|
|
|
C<< joins_for_plan_alternatives >> method, as well as the cost estimation |
21
|
|
|
|
|
|
|
methods that consume the L<Attean::API::CostPlanner> role. |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=over 4 |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=back |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 METHODS |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=over 4 |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=cut |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
use Types::Standard qw(CodeRef); |
36
|
50
|
|
|
50
|
|
274
|
|
|
50
|
|
|
|
|
91
|
|
|
50
|
|
|
|
|
365
|
|
37
|
|
|
|
|
|
|
use Moo::Role; |
38
|
50
|
|
|
50
|
|
22094
|
|
|
50
|
|
|
|
|
106
|
|
|
50
|
|
|
|
|
259
|
|
39
|
|
|
|
|
|
|
requires 'plan_for_algebra'; # plan_for_algebra($algebra, $model, \@default_graphs) |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
use Scalar::Util qw(refaddr); |
43
|
|
|
|
|
|
|
use Types::Standard qw(CodeRef); |
44
|
50
|
|
|
50
|
|
19178
|
|
|
50
|
|
|
|
|
111
|
|
|
50
|
|
|
|
|
2580
|
|
45
|
50
|
|
|
50
|
|
312
|
use Moo::Role; |
|
50
|
|
|
|
|
124
|
|
|
50
|
|
|
|
|
250
|
|
46
|
|
|
|
|
|
|
use namespace::clean; |
47
|
50
|
|
|
50
|
|
19692
|
with 'Attean::API::QueryPlanner'; |
|
50
|
|
|
|
|
115
|
|
|
50
|
|
|
|
|
201
|
|
48
|
50
|
|
|
50
|
|
15615
|
|
|
50
|
|
|
|
|
126
|
|
|
50
|
|
|
|
|
798
|
|
49
|
|
|
|
|
|
|
requires 'plans_for_algebra'; # plans_for_algebra($algebra, $model, \@active_graphs, \@default_graphs) |
50
|
|
|
|
|
|
|
requires 'cost_for_plan'; # cost_for_plan($plan, $model) |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
before 'cost_for_plan' => sub { |
53
|
|
|
|
|
|
|
my $self = shift; |
54
|
|
|
|
|
|
|
my $plan = shift; |
55
|
|
|
|
|
|
|
my $model = shift; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
if (refaddr($self) == refaddr($model)) { |
58
|
|
|
|
|
|
|
Carp::confess "Model and planner objects cannot be the same in call to cost_for_plan"; |
59
|
|
|
|
|
|
|
} elsif ($self->does('Attean::API::Model') and $model->does('Attean::API::Model')) { |
60
|
|
|
|
|
|
|
Carp::confess "Model and planner objects cannot both consume Attean::API::Model in call to cost_for_plan"; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
}; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my $self = shift; |
65
|
|
|
|
|
|
|
my $algebra = shift; |
66
|
|
|
|
|
|
|
my $model = shift; |
67
|
41
|
|
|
41
|
0
|
25809
|
my $default_graphs = shift; |
68
|
41
|
|
|
|
|
82
|
my $active_graphs = $default_graphs; |
69
|
41
|
|
|
|
|
76
|
my @plans = sort { $self->cost_for_plan($a, $model) <=> $self->cost_for_plan($b, $model) } $self->plans_for_algebra($algebra, $model, $active_graphs, $default_graphs); |
70
|
41
|
|
|
|
|
70
|
my $plan = shift(@plans); |
71
|
41
|
|
|
|
|
75
|
return $plan; |
72
|
41
|
|
|
|
|
236
|
} |
|
93
|
|
|
|
|
1755
|
|
73
|
41
|
|
|
|
|
1171
|
} |
74
|
41
|
|
|
|
|
430
|
|
75
|
|
|
|
|
|
|
use Moo::Role; |
76
|
|
|
|
|
|
|
requires 'joins_for_plan_alternatives'; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
50
|
|
|
50
|
|
41144
|
use Math::Cartesian::Product; |
|
50
|
|
|
|
|
105
|
|
|
50
|
|
|
|
|
191
|
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
use Moo::Role; |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
with 'Attean::API::JoinPlanner'; |
84
|
50
|
|
|
50
|
|
16804
|
with 'Attean::API::QueryPlanner'; |
|
50
|
|
|
|
|
147
|
|
|
50
|
|
|
|
|
2428
|
|
85
|
|
|
|
|
|
|
|
86
|
50
|
|
|
50
|
|
328
|
my $self = shift; |
|
50
|
|
|
|
|
123
|
|
|
50
|
|
|
|
|
236
|
|
87
|
|
|
|
|
|
|
my $model = shift; |
88
|
|
|
|
|
|
|
my $active_graphs = shift; |
89
|
|
|
|
|
|
|
my $default_graphs = shift; |
90
|
|
|
|
|
|
|
my $interesting = shift; |
91
|
|
|
|
|
|
|
my @args = @_; # each $args[$i] here is an array reference containing alternate plans for element $i |
92
|
47
|
|
|
47
|
0
|
68
|
|
93
|
47
|
|
|
|
|
62
|
my $plans = shift(@args); |
94
|
47
|
|
|
|
|
66
|
while (scalar(@args)) { |
95
|
47
|
|
|
|
|
61
|
my $next = shift(@args); |
96
|
47
|
|
|
|
|
70
|
my @plans = $self->join_plans($model, $active_graphs, $default_graphs, $plans, $next, 'inner'); |
97
|
47
|
|
|
|
|
103
|
$plans = \@plans; |
98
|
|
|
|
|
|
|
} |
99
|
47
|
|
|
|
|
75
|
|
100
|
47
|
|
|
|
|
115
|
my @plans = @$plans; |
101
|
35
|
|
|
|
|
51
|
return @plans; |
102
|
35
|
|
|
|
|
206
|
} |
103
|
35
|
|
|
|
|
129
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
use Types::Standard qw(Int); |
106
|
47
|
|
|
|
|
119
|
use Scalar::Util qw(blessed); |
107
|
47
|
|
|
|
|
212
|
|
108
|
|
|
|
|
|
|
use Moo::Role; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
with 'Attean::API::CostPlanner'; |
111
|
|
|
|
|
|
|
with 'MooX::Log::Any'; |
112
|
50
|
|
|
50
|
|
22273
|
|
|
50
|
|
|
|
|
122
|
|
|
50
|
|
|
|
|
298
|
|
113
|
50
|
|
|
50
|
|
21105
|
has 'keep' => (is => 'ro', isa => Int, default => 5); |
|
50
|
|
|
|
|
108
|
|
|
50
|
|
|
|
|
2053
|
|
114
|
|
|
|
|
|
|
|
115
|
50
|
|
|
50
|
|
290
|
around 'joins_for_plan_alternatives' => sub { |
|
50
|
|
|
|
|
84
|
|
|
50
|
|
|
|
|
223
|
|
116
|
|
|
|
|
|
|
my $orig = shift; |
117
|
|
|
|
|
|
|
my $self = shift; |
118
|
|
|
|
|
|
|
my $model = shift; |
119
|
|
|
|
|
|
|
my @plans = $orig->($self, $model, @_); |
120
|
|
|
|
|
|
|
return $self->prune_plans($model, [], \@plans); |
121
|
|
|
|
|
|
|
}; |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
my $self = shift; |
124
|
|
|
|
|
|
|
my $model = shift; |
125
|
|
|
|
|
|
|
my $interesting = shift; |
126
|
|
|
|
|
|
|
my @plans = @{ shift || [] }; |
127
|
|
|
|
|
|
|
no sort 'stable'; |
128
|
|
|
|
|
|
|
my @sorted = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [$self->cost_for_plan($_, $model), $_] } @plans; |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
return ($self->keep) ? splice(@sorted, 0, $self->keep) : @sorted; |
131
|
26
|
|
|
26
|
0
|
41
|
} |
132
|
26
|
|
|
|
|
33
|
|
133
|
26
|
|
|
|
|
34
|
my $self = shift; |
134
|
26
|
50
|
|
|
|
42
|
my $plan = shift; |
|
26
|
|
|
|
|
92
|
|
135
|
50
|
|
|
50
|
|
47192
|
my $model = shift; |
|
50
|
|
|
|
|
25111
|
|
|
50
|
|
|
|
|
294
|
|
136
|
26
|
|
|
|
|
62
|
Carp::confess "No model given" unless (blessed($model) and $model->does('Attean::API::Model')); |
|
691
|
|
|
|
|
825
|
|
|
3668
|
|
|
|
|
3842
|
|
|
691
|
|
|
|
|
10556
|
|
137
|
|
|
|
|
|
|
|
138
|
26
|
50
|
|
|
|
1402
|
if ($plan->has_cost) { |
139
|
|
|
|
|
|
|
return $plan->cost; |
140
|
|
|
|
|
|
|
} else { |
141
|
|
|
|
|
|
|
if ($model->does('Attean::API::CostPlanner')) { |
142
|
2082
|
|
|
2082
|
0
|
110329
|
if (defined(my $cost = $model->cost_for_plan($plan, $self))) { |
143
|
2082
|
|
|
|
|
2500
|
$plan->cost($cost); |
144
|
2082
|
|
|
|
|
2276
|
$self->log->info('Model \''.ref($model).'\' did cost planning for \''.ref($plan).'\' and got cost '.$cost); |
145
|
2082
|
50
|
33
|
|
|
7392
|
return $cost; |
146
|
|
|
|
|
|
|
} |
147
|
2082
|
100
|
|
|
|
29587
|
} |
148
|
1283
|
|
|
|
|
17850
|
|
149
|
|
|
|
|
|
|
my $cost = 1; |
150
|
799
|
50
|
|
|
|
1445
|
my @children = @{ $plan->children }; |
151
|
799
|
100
|
|
|
|
21281
|
if ($plan->isa('Attean::Plan::Quad')) { |
152
|
27
|
|
|
|
|
491
|
my @vars = map { $_->value } grep { blessed($_) and $_->does('Attean::API::Variable') } $plan->values; |
153
|
27
|
|
|
|
|
1021
|
return scalar(@vars); |
154
|
27
|
|
|
|
|
383
|
} elsif ($plan->isa('Attean::Plan::Table')) { |
155
|
|
|
|
|
|
|
my $rows = $plan->rows; |
156
|
|
|
|
|
|
|
$cost = scalar(@$rows); |
157
|
|
|
|
|
|
|
} elsif ($plan->isa('Attean::Plan::NestedLoopJoin')) { |
158
|
772
|
|
|
|
|
5595
|
my $lcost = $self->cost_for_plan($children[0], $model); |
159
|
772
|
|
|
|
|
901
|
my $rcost = $self->cost_for_plan($children[1], $model); |
|
772
|
|
|
|
|
2286
|
|
160
|
772
|
50
|
|
|
|
4236
|
if ($lcost == 0) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
161
|
0
|
0
|
|
|
|
0
|
$cost = $rcost; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
162
|
0
|
|
|
|
|
0
|
} elsif ($rcost == 0) { |
163
|
|
|
|
|
|
|
$cost = $lcost; |
164
|
0
|
|
|
|
|
0
|
} else { |
165
|
0
|
|
|
|
|
0
|
$cost = $lcost * $rcost; |
166
|
|
|
|
|
|
|
} |
167
|
408
|
|
|
|
|
6658
|
|
168
|
408
|
|
|
|
|
10556
|
# a cartesian nested loop join is bad, but the algorithm already |
169
|
408
|
100
|
|
|
|
5789
|
# has to check for all possible joins, so it's not as bad as |
|
|
50
|
|
|
|
|
|
170
|
26
|
|
|
|
|
36
|
# a cartesian hash join (below) |
171
|
|
|
|
|
|
|
$cost *= 10 unless ($plan->children_are_variable_connected); |
172
|
0
|
|
|
|
|
0
|
} elsif ($plan->isa('Attean::Plan::HashJoin')) { |
173
|
|
|
|
|
|
|
my $joined = $plan->children_are_variable_connected; |
174
|
382
|
|
|
|
|
524
|
my $lcost = $self->cost_for_plan($children[0], $model); |
175
|
|
|
|
|
|
|
my $rcost = $self->cost_for_plan($children[1], $model); |
176
|
|
|
|
|
|
|
$cost = ($lcost + $rcost); |
177
|
|
|
|
|
|
|
$cost += ($lcost < $rcost); # To let the plan with cheaper rhs win |
178
|
|
|
|
|
|
|
$cost *= 100 unless ($plan->children_are_variable_connected); |
179
|
|
|
|
|
|
|
} elsif ($plan->isa('Attean::Plan::Service')) { |
180
|
408
|
100
|
|
|
|
1024
|
my $scost = 10; |
181
|
|
|
|
|
|
|
foreach my $c (@{ $plan->children }) { |
182
|
356
|
|
|
|
|
863
|
$scost += $self->cost_for_plan($c, $model); |
183
|
356
|
|
|
|
|
6786
|
} |
184
|
356
|
|
|
|
|
9790
|
$cost = 5 * $scost; |
185
|
356
|
|
|
|
|
4801
|
} elsif ($plan->isa('Attean::Plan::Unique')) { |
186
|
356
|
|
|
|
|
404
|
$cost = 0; # consider a filter on the iterator (like unique) to be essentially free |
187
|
356
|
50
|
|
|
|
832
|
foreach my $c (@{ $plan->children }) { |
188
|
|
|
|
|
|
|
$cost += $self->cost_for_plan($c, $model); |
189
|
3
|
|
|
|
|
7
|
} |
190
|
3
|
|
|
|
|
5
|
} else { |
|
3
|
|
|
|
|
11
|
|
191
|
0
|
|
|
|
|
0
|
foreach my $c (@{ $plan->children }) { |
192
|
|
|
|
|
|
|
$cost += $self->cost_for_plan($c, $model); |
193
|
3
|
|
|
|
|
10
|
} |
194
|
|
|
|
|
|
|
} |
195
|
0
|
|
|
|
|
0
|
|
196
|
0
|
|
|
|
|
0
|
$plan->cost($cost); |
|
0
|
|
|
|
|
0
|
|
197
|
0
|
|
|
|
|
0
|
if ($self->log->is_trace) { |
198
|
|
|
|
|
|
|
$self->log->trace("Cost $cost estimated for\n".$plan->as_string); |
199
|
|
|
|
|
|
|
} else { |
200
|
5
|
|
|
|
|
8
|
$self->log->debug('Estimated cost for \''.ref($plan).'\' is '.$cost); |
|
5
|
|
|
|
|
12
|
|
201
|
5
|
|
|
|
|
80
|
} |
202
|
|
|
|
|
|
|
return $cost; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
} |
205
|
772
|
|
|
|
|
12007
|
} |
206
|
772
|
50
|
|
|
|
26047
|
|
207
|
0
|
|
|
|
|
0
|
use Encode qw(encode); |
208
|
|
|
|
|
|
|
use Attean::RDF; |
209
|
772
|
|
|
|
|
17691
|
use LWP::UserAgent; |
210
|
|
|
|
|
|
|
use Scalar::Util qw(blessed reftype); |
211
|
772
|
|
|
|
|
8755
|
use List::Util qw(reduce); |
212
|
|
|
|
|
|
|
use List::MoreUtils qw(all any); |
213
|
|
|
|
|
|
|
use Types::Standard qw(Int ConsumerOf InstanceOf); |
214
|
|
|
|
|
|
|
use URI::Escape; |
215
|
|
|
|
|
|
|
use Algorithm::Combinatorics qw(subsets); |
216
|
|
|
|
|
|
|
use List::Util qw(min); |
217
|
50
|
|
|
50
|
|
28456
|
use Math::Cartesian::Product; |
|
50
|
|
|
|
|
106
|
|
|
50
|
|
|
|
|
2500
|
|
218
|
50
|
|
|
50
|
|
348
|
|
|
50
|
|
|
|
|
110
|
|
|
50
|
|
|
|
|
425
|
|
219
|
50
|
|
|
50
|
|
37224
|
use Moo::Role; |
|
50
|
|
|
|
|
105
|
|
|
50
|
|
|
|
|
1091
|
|
220
|
50
|
|
|
50
|
|
240
|
|
|
50
|
|
|
|
|
106
|
|
|
50
|
|
|
|
|
2116
|
|
221
|
50
|
|
|
50
|
|
321
|
with 'Attean::API::JoinPlanner'; |
|
50
|
|
|
|
|
129
|
|
|
50
|
|
|
|
|
2819
|
|
222
|
50
|
|
|
50
|
|
321
|
with 'Attean::API::SimpleCostPlanner'; |
|
50
|
|
|
|
|
93
|
|
|
50
|
|
|
|
|
364
|
|
223
|
50
|
|
|
50
|
|
48639
|
|
|
50
|
|
|
|
|
103
|
|
|
50
|
|
|
|
|
308
|
|
224
|
50
|
|
|
50
|
|
31436
|
my $self = shift; |
|
50
|
|
|
|
|
114
|
|
|
50
|
|
|
|
|
2509
|
|
225
|
50
|
|
|
50
|
|
23862
|
my $model = shift; |
|
50
|
|
|
|
|
160117
|
|
|
50
|
|
|
|
|
3066
|
|
226
|
50
|
|
|
50
|
|
415
|
my $active_graphs = shift; |
|
50
|
|
|
|
|
101
|
|
|
50
|
|
|
|
|
2092
|
|
227
|
50
|
|
|
50
|
|
291
|
my $default_graphs = shift; |
|
50
|
|
|
|
|
108
|
|
|
50
|
|
|
|
|
1655
|
|
228
|
|
|
|
|
|
|
my $interesting = shift; |
229
|
50
|
|
|
50
|
|
275
|
my @args = @_; # each $args[$i] here is an array reference containing alternate plans for element $i |
|
50
|
|
|
|
|
127
|
|
|
50
|
|
|
|
|
384
|
|
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
my $k = 3; # this is the batch size over which to do full dynamic programming |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# initialize $optPlan{$i} to be a set of alternate plans for evaluating element $i |
234
|
|
|
|
|
|
|
my %optPlan; |
235
|
25
|
|
|
25
|
0
|
49
|
foreach my $i (0 .. $#args) { |
236
|
25
|
|
|
|
|
41
|
$optPlan{$i} = [$self->prune_plans($model, $interesting, $args[$i])]; |
237
|
25
|
|
|
|
|
44
|
} |
238
|
25
|
|
|
|
|
46
|
|
239
|
25
|
|
|
|
|
87
|
my @todo = (0 .. $#args); # initialize the todo list to all elements |
240
|
25
|
|
|
|
|
64
|
my $next_symbol = 'a'; # when we start batching together sub-plans, we'll rename them with letters (e.g. elements 1, 2, and 4 might become 'a', and then 3, 5, and 'a' become 'b') |
241
|
|
|
|
|
|
|
|
242
|
25
|
|
|
|
|
49
|
# until we've joined all the elements in todo and are left with a set of plans for the join of all elements |
243
|
|
|
|
|
|
|
while (scalar(@todo) > 1) { |
244
|
|
|
|
|
|
|
$k = ($k < scalar(@todo)) ? $k : scalar(@todo); # in case we're joining fewer than the batch size |
245
|
25
|
|
|
|
|
43
|
foreach my $i (2 .. $k) { # we've already initialized plans for evaluating single elements; now consider plans for groups of elements (with group sizes 2, 3, ..., $k) |
246
|
25
|
|
|
|
|
95
|
foreach my $s (subsets(\@todo, $i)) { # pick a subset of size $i of the elements that need to be planned |
247
|
39
|
|
|
|
|
148
|
my $s_key = join('.', sort @$s); |
248
|
|
|
|
|
|
|
$optPlan{$s_key} = []; |
249
|
|
|
|
|
|
|
foreach my $o (subsets($s)) { # partition the subset s into two (o and not_o) |
250
|
25
|
|
|
|
|
83
|
next if (scalar(@$o) == 0); # only consider proper, non-empty subsets |
251
|
25
|
|
|
|
|
62
|
next if (scalar(@$o) == scalar(@$s)); # only consider proper, non-empty subsets |
252
|
|
|
|
|
|
|
my $o_key = join('.', sort @$o); |
253
|
|
|
|
|
|
|
my %o = map { $_ => 1 } @$o; |
254
|
25
|
|
|
|
|
80
|
my $not_o_key = join('.', sort grep { not exists $o{$_} } @$s); |
255
|
13
|
50
|
|
|
|
56
|
|
256
|
13
|
|
|
|
|
47
|
my $lhs = $optPlan{$o_key}; # get the plans for evaluating o |
257
|
14
|
|
|
|
|
90
|
my $rhs = $optPlan{$not_o_key}; # get the plans for evaluating not_o |
258
|
16
|
|
|
|
|
1739
|
|
259
|
16
|
|
|
|
|
59
|
# compute and store all the possible ways to evaluate s (o ⋈ not_o) |
260
|
16
|
|
|
|
|
58
|
push(@{ $optPlan{$s_key} }, $self->join_plans($model, $active_graphs, $default_graphs, $lhs, $rhs, 'inner')); |
261
|
68
|
100
|
|
|
|
1641
|
$optPlan{$s_key} = [$self->prune_plans($model, $interesting, $optPlan{$s_key})]; |
262
|
52
|
100
|
|
|
|
211
|
} |
263
|
36
|
|
|
|
|
137
|
} |
264
|
36
|
|
|
|
|
105
|
} |
|
39
|
|
|
|
|
170
|
|
265
|
36
|
|
|
|
|
112
|
|
|
78
|
|
|
|
|
218
|
|
266
|
|
|
|
|
|
|
# find the minimum cost plan $p that computes the join over $k elements (the elements end up in @v) |
267
|
36
|
|
|
|
|
100
|
my %min_plans; |
268
|
36
|
|
|
|
|
67
|
foreach my $w (subsets(\@todo, $k)) { |
269
|
|
|
|
|
|
|
my $w_key = join('.', sort @$w); |
270
|
|
|
|
|
|
|
my $plans = $optPlan{$w_key}; |
271
|
36
|
|
|
|
|
58
|
my @costs = map { $self->cost_for_plan($_, $model) => [$_, $w] } @$plans; |
|
36
|
|
|
|
|
234
|
|
272
|
36
|
|
|
|
|
221
|
my %costs = @costs; |
273
|
|
|
|
|
|
|
my $min = min keys %costs; |
274
|
|
|
|
|
|
|
my @min_plans; |
275
|
|
|
|
|
|
|
while (my ($cost, $data) = splice(@costs, 0, 2)) { |
276
|
|
|
|
|
|
|
if ($cost == $min) { |
277
|
|
|
|
|
|
|
push(@min_plans, $data); |
278
|
13
|
|
|
|
|
31
|
} |
279
|
13
|
|
|
|
|
81
|
} |
280
|
13
|
|
|
|
|
1379
|
$min_plans{ $min } = \@min_plans; |
281
|
13
|
|
|
|
|
37
|
} |
282
|
13
|
|
|
|
|
37
|
my $min_cost = min keys %min_plans; |
|
60
|
|
|
|
|
1266
|
|
283
|
13
|
|
|
|
|
149
|
my $min_plans = $min_plans{$min_cost}; |
284
|
13
|
|
|
|
|
120
|
my @min_plans; |
285
|
13
|
|
|
|
|
32
|
my $min_key; |
286
|
13
|
|
|
|
|
68
|
foreach my $d (@$min_plans) { |
287
|
60
|
100
|
|
|
|
122
|
my ($p, $v) = @$d; |
288
|
51
|
|
|
|
|
143
|
my $v_key = join('.', sort @$v); |
289
|
|
|
|
|
|
|
if (not(defined($min_key)) or $min_key eq $v_key) { |
290
|
|
|
|
|
|
|
push(@min_plans, $p); |
291
|
13
|
|
|
|
|
66
|
$min_key = $v_key; |
292
|
|
|
|
|
|
|
} |
293
|
13
|
|
|
|
|
57
|
} |
294
|
13
|
|
|
|
|
36
|
# my ($p, $v) = @$min_plan; |
295
|
13
|
|
|
|
|
27
|
# my $v_key = join('.', sort @$v); |
296
|
|
|
|
|
|
|
# warn "Choosing join for $v_key\n"; |
297
|
13
|
|
|
|
|
595
|
|
298
|
51
|
|
|
|
|
89
|
# generate a new symbol $t to stand in for $p, the join over the elements in @v |
299
|
51
|
|
|
|
|
121
|
my $t = $next_symbol++; |
300
|
51
|
50
|
66
|
|
|
167
|
|
301
|
51
|
|
|
|
|
77
|
# remove elements in @v from the todo list, and replace them by the new composite element $t |
302
|
51
|
|
|
|
|
96
|
$optPlan{$t} = [@min_plans]; |
303
|
|
|
|
|
|
|
my %v = map { $_ => 1 } split(/[.]/, $min_key); |
304
|
|
|
|
|
|
|
push(@todo, $t); |
305
|
|
|
|
|
|
|
@todo = grep { not exists $v{$_} } @todo; |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
# also remove subsets of @v from the optPlan hash as they are now covered by $optPlan{$t} |
308
|
|
|
|
|
|
|
foreach my $o (subsets([keys %v])) { |
309
|
|
|
|
|
|
|
my $o_key = join('.', sort @$o); |
310
|
13
|
|
|
|
|
45
|
# warn "deleting $o_key\n"; |
311
|
|
|
|
|
|
|
delete $optPlan{$o_key}; |
312
|
|
|
|
|
|
|
} |
313
|
13
|
|
|
|
|
51
|
} |
314
|
13
|
|
|
|
|
76
|
|
|
27
|
|
|
|
|
133
|
|
315
|
13
|
|
|
|
|
40
|
my $final_key = join('.', sort @todo); |
316
|
13
|
|
|
|
|
30
|
# use Data::Dumper; |
|
40
|
|
|
|
|
94
|
|
317
|
|
|
|
|
|
|
# warn Dumper($optPlan{$final_key}); |
318
|
|
|
|
|
|
|
return $self->prune_plans($model, $interesting, $optPlan{$final_key}); |
319
|
13
|
|
|
|
|
62
|
} |
320
|
56
|
|
|
|
|
1220
|
|
321
|
|
|
|
|
|
|
my $self = shift; |
322
|
56
|
|
|
|
|
270
|
my $model = shift; |
323
|
|
|
|
|
|
|
my $interesting = shift; |
324
|
|
|
|
|
|
|
my @plans = @{ shift || [] }; |
325
|
|
|
|
|
|
|
no sort 'stable'; |
326
|
25
|
|
|
|
|
101
|
my @sorted = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [$self->cost_for_plan($_, $model), $_] } @plans; |
327
|
|
|
|
|
|
|
if ($self->log->is_trace) { |
328
|
|
|
|
|
|
|
$self->log->trace('============= Plan iteration separator =============='); |
329
|
25
|
|
|
|
|
87
|
foreach my $plan (@sorted){ |
330
|
|
|
|
|
|
|
$self->log->trace("Cost: " . $self->cost_for_plan($plan, $model) . " for plan:\n". $plan->as_string); |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
} |
333
|
125
|
|
|
125
|
0
|
223
|
return splice(@sorted, 0, 5); |
334
|
125
|
|
|
|
|
172
|
} |
335
|
125
|
|
|
|
|
181
|
|
336
|
125
|
50
|
|
|
|
178
|
# Return a cost value for $plan. This value is basically opaque, except |
|
125
|
|
|
|
|
357
|
|
337
|
50
|
|
|
50
|
|
53895
|
# that it will be used to sort plans by cost when determining which is the |
|
50
|
|
|
|
|
145
|
|
|
50
|
|
|
|
|
332
|
|
338
|
125
|
|
|
|
|
271
|
# cheapest plan to evaluate. |
|
470
|
|
|
|
|
935
|
|
|
810
|
|
|
|
|
1790
|
|
|
470
|
|
|
|
|
8228
|
|
339
|
125
|
50
|
|
|
|
2001
|
my $self = shift; |
340
|
0
|
|
|
|
|
0
|
my $plan = shift; |
341
|
0
|
|
|
|
|
0
|
my $model = shift; |
342
|
0
|
|
|
|
|
0
|
Carp::confess "No model given" unless (blessed($model) and $model->does('Attean::API::Model')); |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
if ($plan->has_cost) { |
345
|
125
|
|
|
|
|
3120
|
return $plan->cost; |
346
|
|
|
|
|
|
|
} else { |
347
|
|
|
|
|
|
|
if ($model->does('Attean::API::CostPlanner')) { |
348
|
|
|
|
|
|
|
if (defined(my $cost = $model->cost_for_plan($plan, $self))) { |
349
|
|
|
|
|
|
|
$plan->cost($cost); |
350
|
|
|
|
|
|
|
$self->log->info('Model \''.ref($model).'\' did cost planning for \''.ref($plan).'\' and got cost '.$cost); |
351
|
|
|
|
|
|
|
return $cost; |
352
|
1075
|
|
|
1075
|
0
|
38064
|
} |
353
|
1075
|
|
|
|
|
1273
|
} |
354
|
1075
|
|
|
|
|
1259
|
|
355
|
1075
|
50
|
33
|
|
|
3938
|
my $cost = 1; |
356
|
|
|
|
|
|
|
my @children = @{ $plan->children }; |
357
|
1075
|
100
|
|
|
|
15350
|
if ($plan->isa('Attean::Plan::Quad')) { |
358
|
796
|
|
|
|
|
11074
|
my @vars = map { $_->value } grep { blessed($_) and $_->does('Attean::API::Variable') } $plan->values; |
359
|
|
|
|
|
|
|
# This gives a cost increasing at a reasonable pace |
360
|
279
|
50
|
|
|
|
551
|
$cost = $self->_hsp_heuristic_triple_sum($plan) * scalar(@vars); |
361
|
279
|
100
|
|
|
|
7730
|
} elsif ($plan->isa('Attean::Plan::Table')) { |
362
|
29
|
|
|
|
|
702
|
my $rows = $plan->rows; |
363
|
29
|
|
|
|
|
1233
|
$cost = scalar(@$rows); |
364
|
29
|
|
|
|
|
514
|
} elsif ($plan->isa('Attean::Plan::NestedLoopJoin')) { |
365
|
|
|
|
|
|
|
my $lcost = $self->cost_for_plan($children[0], $model); |
366
|
|
|
|
|
|
|
my $rcost = $self->cost_for_plan($children[1], $model); |
367
|
|
|
|
|
|
|
if ($lcost == 0) { |
368
|
250
|
|
|
|
|
2007
|
$cost = $rcost; |
369
|
250
|
|
|
|
|
305
|
} elsif ($rcost == 0) { |
|
250
|
|
|
|
|
771
|
|
370
|
250
|
100
|
|
|
|
1630
|
$cost = $lcost; |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
371
|
4
|
100
|
|
|
|
19
|
} else { |
|
4
|
|
|
|
|
19
|
|
|
16
|
|
|
|
|
373
|
|
372
|
|
|
|
|
|
|
my $mult = $self->_penalize_joins($plan); |
373
|
4
|
|
|
|
|
15
|
# warn "$mult * ($lcost * $rcost) [$children[0] $children[1]]"; |
374
|
|
|
|
|
|
|
$cost = $mult * $lcost * $rcost; |
375
|
0
|
|
|
|
|
0
|
} |
376
|
0
|
|
|
|
|
0
|
} elsif ($plan->isa('Attean::Plan::HashJoin')) { |
377
|
|
|
|
|
|
|
my $lcost = $self->cost_for_plan($children[0], $model); |
378
|
124
|
|
|
|
|
2107
|
my $rcost = $self->cost_for_plan($children[1], $model); |
379
|
124
|
|
|
|
|
2383
|
if ($lcost == 0) { |
380
|
124
|
100
|
|
|
|
823
|
$cost = $rcost; |
|
|
100
|
|
|
|
|
|
381
|
2
|
|
|
|
|
4
|
} elsif ($rcost == 0) { |
382
|
|
|
|
|
|
|
$cost = $lcost; |
383
|
2
|
|
|
|
|
5
|
} else { |
384
|
|
|
|
|
|
|
my $mult = $self->_penalize_joins($plan); |
385
|
120
|
|
|
|
|
283
|
# warn "$mult * ($lcost + $rcost)"; |
386
|
|
|
|
|
|
|
$cost = $mult * ($lcost + $rcost); |
387
|
120
|
|
|
|
|
214
|
$cost += ($lcost < $rcost); # To let the plan with cheaper rhs win |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
} elsif ($plan->isa('Attean::Plan::Service')) { |
390
|
104
|
|
|
|
|
1791
|
my $scost = 10; |
391
|
104
|
|
|
|
|
2006
|
foreach my $c (@{ $plan->children }) { |
392
|
104
|
50
|
|
|
|
815
|
$scost += $self->cost_for_plan($c, $model); |
|
|
50
|
|
|
|
|
|
393
|
0
|
|
|
|
|
0
|
} |
394
|
|
|
|
|
|
|
$cost = 5 * $scost; |
395
|
0
|
|
|
|
|
0
|
} elsif ($plan->isa('Attean::Plan::Unique')) { |
396
|
|
|
|
|
|
|
$cost = 0; # consider a filter on the iterator (like unique) to be essentially free |
397
|
104
|
|
|
|
|
290
|
foreach my $c (@{ $plan->children }) { |
398
|
|
|
|
|
|
|
$cost += $self->cost_for_plan($c, $model); |
399
|
104
|
|
|
|
|
212
|
} |
400
|
104
|
|
|
|
|
174
|
} else { |
401
|
|
|
|
|
|
|
foreach my $c (@{ $plan->children }) { |
402
|
|
|
|
|
|
|
$cost += $self->cost_for_plan($c, $model); |
403
|
1
|
|
|
|
|
3
|
} |
404
|
1
|
|
|
|
|
3
|
} |
|
1
|
|
|
|
|
5
|
|
405
|
0
|
|
|
|
|
0
|
|
406
|
|
|
|
|
|
|
# Costs must be integers for comparisons to work in the IDP planning algorithm |
407
|
1
|
|
|
|
|
2
|
$cost = int($cost); |
408
|
|
|
|
|
|
|
$plan->cost($cost); |
409
|
0
|
|
|
|
|
0
|
return $cost; |
410
|
0
|
|
|
|
|
0
|
} |
|
0
|
|
|
|
|
0
|
|
411
|
0
|
|
|
|
|
0
|
} |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
|
414
|
17
|
|
|
|
|
23
|
# The below function finds a number to aid sorting |
|
17
|
|
|
|
|
33
|
|
415
|
17
|
|
|
|
|
291
|
# It takes into account Heuristic 1 and 4 of the HSP paper, see REFERENCES |
416
|
|
|
|
|
|
|
# as well as that it was noted in the text that rdf:type is usually less selective. |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
# By assigning the integers to nodes, depending on whether they are in |
419
|
|
|
|
|
|
|
# triple (subject, predicate, object), variables, rdf:type and |
420
|
250
|
|
|
|
|
402
|
# literals, and sum them, they may be sorted. See code for the actual |
421
|
250
|
|
|
|
|
4038
|
# values used. |
422
|
250
|
|
|
|
|
6343
|
|
423
|
|
|
|
|
|
|
# Denoting s for bound subject, p for bound predicate, a for rdf:type |
424
|
|
|
|
|
|
|
# as predicate, o for bound object and l for literal object and ? for |
425
|
|
|
|
|
|
|
# variable, we get the following order, most of which are identical to |
426
|
|
|
|
|
|
|
# the HSP: |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# spl: 6 |
429
|
|
|
|
|
|
|
# spo: 8 |
430
|
|
|
|
|
|
|
# sao: 10 |
431
|
|
|
|
|
|
|
# s?l: 14 |
432
|
|
|
|
|
|
|
# s?o: 16 |
433
|
|
|
|
|
|
|
# ?pl: 25 |
434
|
|
|
|
|
|
|
# ?po: 27 |
435
|
|
|
|
|
|
|
# ?ao: 29 |
436
|
|
|
|
|
|
|
# sp?: 30 |
437
|
|
|
|
|
|
|
# sa?: 32 |
438
|
|
|
|
|
|
|
# ??l: 33 |
439
|
|
|
|
|
|
|
# ??o: 35 |
440
|
|
|
|
|
|
|
# s??: 38 |
441
|
|
|
|
|
|
|
# ?p?: 49 |
442
|
|
|
|
|
|
|
# ?a?: 51 |
443
|
|
|
|
|
|
|
# ???: 57 |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
# Note that this number is not intended as an estimate of selectivity, |
446
|
|
|
|
|
|
|
# merely a sorting key, but further research may possibly create such |
447
|
|
|
|
|
|
|
# numbers. |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
my ($self, $t) = @_; |
450
|
|
|
|
|
|
|
my $sum = 0; |
451
|
|
|
|
|
|
|
if ($t->subject->does('Attean::API::Variable')) { |
452
|
|
|
|
|
|
|
$sum = 20; |
453
|
|
|
|
|
|
|
} else { |
454
|
|
|
|
|
|
|
$sum = 1; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
if ($t->predicate->does('Attean::API::Variable')) { |
457
|
|
|
|
|
|
|
$sum += 10; |
458
|
|
|
|
|
|
|
} else { |
459
|
|
|
|
|
|
|
if ($t->predicate->equals(iri('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'))) { |
460
|
|
|
|
|
|
|
$sum += 4; |
461
|
|
|
|
|
|
|
} else { |
462
|
|
|
|
|
|
|
$sum += 2; |
463
|
4
|
|
|
4
|
|
11
|
} |
464
|
4
|
|
|
|
|
6
|
} |
465
|
4
|
50
|
|
|
|
15
|
if ($t->object->does('Attean::API::Variable')) { |
466
|
0
|
|
|
|
|
0
|
$sum += 27; |
467
|
|
|
|
|
|
|
} elsif ($t->object->does('Attean::API::Literal')) { |
468
|
4
|
|
|
|
|
69
|
$sum += 3; |
469
|
|
|
|
|
|
|
} else { |
470
|
4
|
50
|
|
|
|
16
|
$sum += 5; |
471
|
0
|
|
|
|
|
0
|
} |
472
|
|
|
|
|
|
|
return $sum; |
473
|
4
|
50
|
|
|
|
96
|
} |
474
|
0
|
|
|
|
|
0
|
|
475
|
|
|
|
|
|
|
# The following method returns a factor used to penalize certain types of joins. |
476
|
4
|
|
|
|
|
9
|
# It penalizes cartesian joins heavily, but also uses HSP Heuristic 2 (see REFERENCES) |
477
|
|
|
|
|
|
|
my ($self, $plan) = @_; |
478
|
|
|
|
|
|
|
my $jv = $plan->join_variables; |
479
|
4
|
50
|
|
|
|
28
|
my @children = @{ $plan->children }; |
|
|
0
|
|
|
|
|
|
480
|
4
|
|
|
|
|
67
|
my $mult = 1; |
481
|
|
|
|
|
|
|
if (scalar(@$jv)) { |
482
|
0
|
|
|
|
|
0
|
if ( all { $_->isa('Attean::Plan::Quad') } @children[0..1]) { |
483
|
|
|
|
|
|
|
my $var = ${$jv}[0]; # We will join on this |
484
|
0
|
|
|
|
|
0
|
my @lnodes = $children[0]->values; |
485
|
|
|
|
|
|
|
my @rnodes = $children[1]->values; |
486
|
4
|
|
|
|
|
14
|
# Now, find where the join variables are in the triple patterns |
487
|
|
|
|
|
|
|
my %joinpos; |
488
|
|
|
|
|
|
|
for (my $i = 0; $i <= 2; $i++) { |
489
|
|
|
|
|
|
|
if ($lnodes[$i]->does('Attean::API::Variable') && $lnodes[$i]->value eq $var) { |
490
|
|
|
|
|
|
|
$joinpos{l} = $i; |
491
|
|
|
|
|
|
|
} |
492
|
224
|
|
|
224
|
|
377
|
if ($rnodes[$i]->does('Attean::API::Variable') && $rnodes[$i]->value eq $var) { |
493
|
224
|
|
|
|
|
494
|
$joinpos{r} = $i; |
494
|
224
|
|
|
|
|
264
|
} |
|
224
|
|
|
|
|
448
|
|
495
|
224
|
|
|
|
|
297
|
last if scalar keys(%joinpos) >= 2; # Perhaps a bit premature optimization |
496
|
224
|
100
|
|
|
|
377
|
} |
497
|
208
|
100
|
|
344
|
|
1221
|
my $joinpos = join("", sort values(%joinpos)); # We can now match on this string |
|
344
|
|
|
|
|
971
|
|
498
|
64
|
|
|
|
|
73
|
my %costs = ('12' => 1.1, # The penalty numbers come mostly out from thin air |
|
64
|
|
|
|
|
487
|
|
499
|
64
|
|
|
|
|
203
|
'01' => 1.2, |
500
|
64
|
|
|
|
|
155
|
'02' => 1.5, |
501
|
|
|
|
|
|
|
'22' => 1.6, |
502
|
64
|
|
|
|
|
105
|
'00' => 1.8, |
503
|
64
|
|
|
|
|
145
|
'11' => 2); |
504
|
64
|
50
|
33
|
|
|
185
|
if (exists $costs{$joinpos}) { |
505
|
64
|
|
|
|
|
1098
|
$mult = $costs{$joinpos}; |
506
|
|
|
|
|
|
|
} |
507
|
64
|
50
|
33
|
|
|
143
|
#warn "Penalty: $mult for quads:\n" . $children[0]->as_string . $children[1]->as_string |
508
|
64
|
|
|
|
|
856
|
} |
509
|
|
|
|
|
|
|
} else { |
510
|
64
|
50
|
|
|
|
169
|
$mult = 5; # penalize cartesian joins |
511
|
|
|
|
|
|
|
} |
512
|
64
|
|
|
|
|
273
|
return $mult; |
513
|
64
|
|
|
|
|
315
|
} |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
1; |
518
|
|
|
|
|
|
|
|
519
|
64
|
50
|
|
|
|
121
|
|
520
|
64
|
|
|
|
|
209
|
=back |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head1 BUGS |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Please report any bugs or feature requests to through the GitHub web interface |
525
|
16
|
|
|
|
|
31
|
at L<https://github.com/kasei/attean/issues>. |
526
|
|
|
|
|
|
|
|
527
|
224
|
|
|
|
|
665
|
=head1 REFERENCES |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
The seminal reference for Iterative Dynamic Programming is "Iterative |
530
|
|
|
|
|
|
|
dynamic programming: a new class of query optimization algorithms" by |
531
|
|
|
|
|
|
|
D. Kossmann and K. Stocker, ACM Transactions on Database Systems |
532
|
|
|
|
|
|
|
(2000). |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
The heuristics to order triple patterns in this module is |
535
|
|
|
|
|
|
|
influenced by L<The ICS-FORTH Heuristics-based SPARQL Planner |
536
|
|
|
|
|
|
|
(HSP)|http://www.ics.forth.gr/isl/index_main.php?l=e&c=645>. |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=head1 SEE ALSO |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
=head1 AUTHOR |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
Gregory Todd Williams C<< <gwilliams@cpan.org> >> |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=head1 COPYRIGHT |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
Copyright (c) 2014--2022 Gregory Todd Williams. |
550
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
551
|
|
|
|
|
|
|
the same terms as Perl itself. |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=cut |