line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# RDF::Query::BGPOptimizer |
2
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
=head1 NAME |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
RDF::Query::BGPOptimizer - Optimizer for ordering the joins of triple patterns in a BGP |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 VERSION |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
This document describes RDF::Query::BGPOptimizer version 2.915_01. |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 STATUS |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
This module's API and functionality should be considered unstable. |
15
|
|
|
|
|
|
|
In the future, this module may change in backwards-incompatible ways, |
16
|
|
|
|
|
|
|
or be removed entirely. If you need functionality that this module provides, |
17
|
|
|
|
|
|
|
please L<get in touch|http://www.perlrdf.org/>. |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 METHODS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=over 4 |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=cut |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
package RDF::Query::BGPOptimizer; |
26
|
|
|
|
|
|
|
|
27
|
35
|
|
|
35
|
|
183
|
use strict; |
|
35
|
|
|
|
|
76
|
|
|
35
|
|
|
|
|
910
|
|
28
|
35
|
|
|
35
|
|
180
|
use warnings; |
|
35
|
|
|
|
|
80
|
|
|
35
|
|
|
|
|
820
|
|
29
|
35
|
|
|
35
|
|
197
|
use Data::Dumper; |
|
35
|
|
|
|
|
72
|
|
|
35
|
|
|
|
|
1642
|
|
30
|
35
|
|
|
35
|
|
186
|
use List::Util qw(reduce); |
|
35
|
|
|
|
|
84
|
|
|
35
|
|
|
|
|
1934
|
|
31
|
35
|
|
|
35
|
|
184
|
use Scalar::Util qw(blessed reftype refaddr); |
|
35
|
|
|
|
|
96
|
|
|
35
|
|
|
|
|
2092
|
|
32
|
35
|
|
|
35
|
|
189
|
use RDF::Query::Error qw(:try); |
|
35
|
|
|
|
|
89
|
|
|
35
|
|
|
|
|
255
|
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
###################################################################### |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
our ($VERSION); |
37
|
|
|
|
|
|
|
BEGIN { |
38
|
35
|
|
|
35
|
|
36187
|
$VERSION = '2.915_01'; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
###################################################################### |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=item C<< ordered_triples ( $context, @triples ) >> |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
Returns a list of triples, ordered so as to optimize a left-deep join plan based |
46
|
|
|
|
|
|
|
on the frequency counts provided by the underlying model. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=cut |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub ordered_triples { |
51
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
52
|
0
|
|
|
|
|
|
my $context = shift; |
53
|
0
|
|
|
|
|
|
my @triples = @_; |
54
|
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
|
my $model = $context->model; |
56
|
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
my %vars; |
58
|
|
|
|
|
|
|
my %seen; |
59
|
|
|
|
|
|
|
my @weighted = map { |
60
|
0
|
|
|
|
|
|
my $triple = RDF::Query::Plan::Triple->new( $_->nodes ); |
|
0
|
|
|
|
|
|
|
61
|
0
|
|
|
|
|
|
[ $_, $self->_cost( $triple, $context ) ] |
62
|
|
|
|
|
|
|
} @triples; |
63
|
0
|
|
|
|
|
|
my %triples = map { refaddr($_->[0]) => $_ } @weighted; |
|
0
|
|
|
|
|
|
|
64
|
0
|
|
|
|
|
|
my @ordered = sort { $a->[1] <=> $b->[1] } @weighted; |
|
0
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
|
66
|
0
|
|
|
|
|
|
foreach my $t (@triples) { |
67
|
0
|
|
|
|
|
|
my @vars = $self->_triple_vars( $t ); |
68
|
0
|
|
|
|
|
|
foreach my $name (@vars) { |
69
|
0
|
0
|
|
|
|
|
push( @{ $vars{ $name } }, $t ) unless ($seen{ $name }{ refaddr($t) }++); |
|
0
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
my %edges; |
74
|
0
|
|
|
|
|
|
foreach my $name (keys %vars) { |
75
|
0
|
|
|
|
|
|
my @triples = @{ $vars{ $name } }; |
|
0
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
foreach my $t (@triples) { |
77
|
0
|
|
|
|
|
|
my $ta = refaddr($t); |
78
|
0
|
|
|
|
|
|
foreach my $u (@triples) { |
79
|
0
|
|
|
|
|
|
my $ua = refaddr($u); |
80
|
0
|
0
|
|
|
|
|
next if ($ta == $ua); |
81
|
0
|
|
|
|
|
|
$edges{ $ta }{ $ua } = $u; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
my @final; |
88
|
|
|
|
|
|
|
my %used; |
89
|
0
|
|
|
|
|
|
my $start = shift(@ordered); |
90
|
0
|
|
|
|
|
|
$used{ refaddr($start) }++; |
91
|
0
|
|
|
|
|
|
push(@final, $start); |
92
|
|
|
|
|
|
|
|
93
|
0
|
|
|
|
|
|
my @seen = refaddr($start->[0]); |
94
|
0
|
|
|
|
|
|
my $count = 0; |
95
|
0
|
|
|
|
|
|
while (@ordered) { |
96
|
0
|
0
|
|
|
|
|
if (++$count > scalar(@triples)) { |
97
|
0
|
|
|
|
|
|
die "loop in BGPOptimizer (?)"; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
my @frontier = grep { not($used{refaddr($_)}) } map { $triples{ $_ } } map { keys(%{ $edges{ $_ } }) } @seen; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
101
|
0
|
|
|
|
|
|
my @orderedf = sort { $a->[1] <=> $b->[1] } @frontier; |
|
0
|
|
|
|
|
|
|
102
|
0
|
0
|
|
|
|
|
if (@orderedf) { |
103
|
0
|
|
|
|
|
|
my $next = shift(@orderedf); |
104
|
0
|
|
|
|
|
|
my $addr = refaddr($next); |
105
|
0
|
|
|
|
|
|
$used{ $addr }++; |
106
|
0
|
|
|
|
|
|
push(@final, $next); |
107
|
0
|
|
|
|
|
|
push(@seen, refaddr($next->[0])); |
108
|
0
|
|
|
|
|
|
@ordered = grep { refaddr($_) != $addr } @ordered; |
|
0
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
} else { |
110
|
0
|
|
|
|
|
|
my $next = shift(@ordered); |
111
|
0
|
|
|
|
|
|
my $addr = refaddr($next); |
112
|
0
|
|
|
|
|
|
$used{ $addr }++; |
113
|
0
|
|
|
|
|
|
push(@final, $next); |
114
|
0
|
|
|
|
|
|
push(@seen, refaddr($next->[0])); |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
return map { $_->[0] } @final; |
|
0
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub _cost { |
122
|
0
|
|
|
0
|
|
|
my $self = shift; |
123
|
0
|
|
|
|
|
|
my $pattern = shift; |
124
|
0
|
|
|
|
|
|
my $context = shift; |
125
|
0
|
|
|
|
|
|
my $l = Log::Log4perl->get_logger("rdf.query.bgpoptimizer"); |
126
|
0
|
|
|
|
|
|
my $bf = $pattern->bf( $context ); |
127
|
0
|
|
|
|
|
|
my $f = ($bf =~ tr/f//); |
128
|
0
|
|
|
|
|
|
my $r = $f / 3; |
129
|
0
|
|
|
|
|
|
$l->debug( "Pattern has bf representation '$bf'" ); |
130
|
0
|
|
|
|
|
|
$l->debug( "There are $f of 3 free variables" ); |
131
|
0
|
|
|
|
|
|
$l->debug( 'Estimated cardinality of triple is : ' . $r ); |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# round the cardinality to an integer |
134
|
0
|
|
|
|
|
|
return int($r + .5 * ($r <=> 0)); |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub _triple_vars { |
138
|
0
|
|
|
0
|
|
|
my $self = shift; |
139
|
0
|
|
|
|
|
|
my $t = shift; |
140
|
0
|
|
|
|
|
|
my @nodes = $t->nodes; |
141
|
0
|
|
|
|
|
|
my (@vars, %seen); |
142
|
0
|
|
|
|
|
|
foreach my $n (@nodes) { |
143
|
0
|
0
|
|
|
|
|
if ($n->isa('RDF::Trine::Node::Variable')) { |
144
|
0
|
|
|
|
|
|
my $name = $n->name; |
145
|
0
|
0
|
|
|
|
|
push(@vars, $name) unless ($seen{ $name }++); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
} |
148
|
0
|
|
|
|
|
|
return @vars; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
1; |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
__END__ |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=back |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 AUTHOR |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Gregory Todd Williams <gwilliams@cpan.org> |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |