| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package AtteanX::Query::Cache::Analyzer; |
|
2
|
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
5378551
|
use 5.010001; |
|
|
5
|
|
|
|
|
12
|
|
|
4
|
5
|
|
|
5
|
|
23
|
use strict; |
|
|
5
|
|
|
|
|
7
|
|
|
|
5
|
|
|
|
|
88
|
|
|
5
|
5
|
|
|
5
|
|
16
|
use warnings; |
|
|
5
|
|
|
|
|
7
|
|
|
|
5
|
|
|
|
|
220
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:KJETILK'; |
|
8
|
|
|
|
|
|
|
our $VERSION = '0.001_04'; |
|
9
|
|
|
|
|
|
|
|
|
10
|
5
|
|
|
5
|
|
19
|
use Moo; |
|
|
5
|
|
|
|
|
7
|
|
|
|
5
|
|
|
|
|
27
|
|
|
11
|
5
|
|
|
5
|
|
1134
|
use Attean::RDF qw(triplepattern variable iri); |
|
|
5
|
|
|
|
|
8
|
|
|
|
5
|
|
|
|
|
277
|
|
|
12
|
5
|
|
|
5
|
|
24
|
use Types::Standard qw(Str Int InstanceOf ConsumerOf); |
|
|
5
|
|
|
|
|
5
|
|
|
|
5
|
|
|
|
|
33
|
|
|
13
|
5
|
|
|
5
|
|
3072
|
use Types::URI -all; |
|
|
5
|
|
|
|
|
6
|
|
|
|
5
|
|
|
|
|
78
|
|
|
14
|
5
|
|
|
5
|
|
12182
|
use AtteanX::Parser::SPARQL; |
|
|
5
|
|
|
|
|
232028
|
|
|
|
5
|
|
|
|
|
213
|
|
|
15
|
5
|
|
|
5
|
|
2789
|
use AtteanX::Query::Cache::Analyzer::Model; |
|
|
5
|
|
|
|
|
13
|
|
|
|
5
|
|
|
|
|
171
|
|
|
16
|
5
|
|
|
5
|
|
2329
|
use AtteanX::QueryPlanner::Cache; |
|
|
5
|
|
|
|
|
11
|
|
|
|
5
|
|
|
|
|
159
|
|
|
17
|
5
|
|
|
5
|
|
1997
|
use AtteanX::Query::Cache::Analyzer::QueryPlanner; |
|
|
5
|
|
|
|
|
13
|
|
|
|
5
|
|
|
|
|
151
|
|
|
18
|
5
|
|
|
5
|
|
2138
|
use AtteanX::Query::Cache::Retriever; |
|
|
5
|
|
|
|
|
8
|
|
|
|
5
|
|
|
|
|
142
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
5
|
|
|
5
|
|
27
|
use Carp; |
|
|
5
|
|
|
|
|
6
|
|
|
|
5
|
|
|
|
|
2503
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
has 'query' => (is => 'ro', required => 1, isa => Str); |
|
23
|
|
|
|
|
|
|
has 'algebra' => (is => 'ro', isa => ConsumerOf['Attean::API::Algebra'], builder => '_parse_query', lazy => 1); |
|
24
|
|
|
|
|
|
|
has 'base_uri' => (is => 'ro', default => 'http://default.invalid/'); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
has 'model' => (is => 'ro', isa => InstanceOf['AtteanX::Query::Cache::Analyzer::Model'], required => 1); |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
has 'graph' => (is => 'ro', isa => InstanceOf['Attean::IRI'], default => sub { return iri('http://example.invalid')}); |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
has 'improvement_threshold' => (is => 'ro', isa => Int, default => '10'); |
|
31
|
|
|
|
|
|
|
has 'improvement_top' => (is => 'ro', isa => Int, default => '3'); |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
has 'count_threshold' => (is => 'ro', isa => Int, default => '3'); |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
has 'max_triples' => (is => 'ro', isa => Int, default => sub { return $ENV{'LDF_MAX_TRIPLES'} || 100000 }); |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
with 'MooX::Log::Any'; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=pod |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=over |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=item C<< store >> |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
A L<Redis> object. This has two purposes: First, to store any |
|
46
|
|
|
|
|
|
|
data the analyzer needs to persist to decide when to prefetch. Second, |
|
47
|
|
|
|
|
|
|
it uses Redis' publish-subscribe system to publish the URLs containing |
|
48
|
|
|
|
|
|
|
queries that the prefetcher should fetch. |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=cut |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
has store => (is => 'ro', |
|
53
|
|
|
|
|
|
|
isa => InstanceOf['Redis'], |
|
54
|
|
|
|
|
|
|
required => 1 |
|
55
|
|
|
|
|
|
|
); |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub _parse_query { |
|
58
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
59
|
0
|
|
|
|
|
|
my $parser = AtteanX::Parser::SPARQL->new(); |
|
60
|
0
|
|
|
|
|
|
my ($algebra) = $parser->parse_list_from_bytes($self->query, $self->base_uri); # TODO: this is a bit of cargocult |
|
61
|
0
|
|
|
|
|
|
return $algebra; |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub best_cost_improvement { |
|
65
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
66
|
|
|
|
|
|
|
# First, we find the cost of the plan with the current cache: |
|
67
|
0
|
|
|
|
|
|
my $algebra = $self->algebra; |
|
68
|
0
|
|
|
|
|
|
my $curplanner = AtteanX::QueryPlanner::Cache::LDF->new; |
|
69
|
0
|
|
|
|
|
|
my $curplan = $curplanner->plan_for_algebra($algebra, $self->model, [$self->graph]); |
|
70
|
0
|
|
|
|
|
|
my $curcost = $curplanner->cost_for_plan($curplan, $self->model); |
|
71
|
0
|
|
|
|
|
|
$self->log->trace("Cost of incumbent plan: $curcost"); |
|
72
|
0
|
|
|
|
|
|
my %costs; |
|
73
|
|
|
|
|
|
|
my %triples; |
|
74
|
0
|
|
|
|
|
|
my $percentage = 1-($self->improvement_threshold/100); |
|
75
|
0
|
|
|
|
|
|
my $planner = AtteanX::Query::Cache::Analyzer::QueryPlanner->new; |
|
76
|
0
|
|
|
|
|
|
foreach my $bgp ($algebra->subpatterns_of_type('Attean::Algebra::BGP')) { # TODO: Parallelize |
|
77
|
0
|
|
|
|
|
|
foreach my $triple (@{ $bgp->triples }) { # TODO: May need quads |
|
|
0
|
|
|
|
|
|
|
|
78
|
0
|
|
|
|
|
|
my $key = $triple->canonicalize->tuples_string; |
|
79
|
0
|
0
|
|
|
|
|
next if ($self->model->is_cached($key)); |
|
80
|
0
|
0
|
|
|
|
|
next if ($self->model->ldf_store->count_triples_estimate($triple->values) > $self->max_triples); |
|
81
|
0
|
|
|
|
|
|
$self->model->try($key); |
|
82
|
0
|
0
|
|
|
|
|
if ($self->log->is_trace) { |
|
83
|
0
|
|
|
|
|
|
foreach my $plan ($planner->plans_for_algebra($algebra, $self->model, [$self->graph])) { |
|
84
|
0
|
|
|
|
|
|
my $cost = $planner->cost_for_plan($plan, $self->model); |
|
85
|
0
|
|
|
|
|
|
$self->log->trace("Cost $cost for:\n" . $plan->as_string); |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
} |
|
88
|
0
|
|
|
|
|
|
my $plan = $planner->plan_for_algebra($algebra, $self->model, [$self->graph]); |
|
89
|
0
|
|
|
|
|
|
$self->log->debug("Alternative plan after fetching $key:\n" . $plan->as_string); |
|
90
|
0
|
|
|
|
|
|
$costs{$key} = $planner->cost_for_plan($plan, $self->model); |
|
91
|
0
|
|
|
|
|
|
$self->log->info("Triple $key has cost $costs{$key}, current $curcost"); |
|
92
|
0
|
0
|
|
|
|
|
if ($costs{$key} < $curcost * $percentage) { |
|
93
|
0
|
|
|
|
|
|
$triples{$key} = $triple; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
} |
|
97
|
5
|
|
|
5
|
|
28
|
no sort 'stable'; |
|
|
5
|
|
|
|
|
5
|
|
|
|
5
|
|
|
|
|
41
|
|
|
98
|
0
|
|
|
|
|
|
my @worthy = map { $triples{$_} } sort {$costs{$a} <=> $costs{$b}} keys(%triples); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
|
return splice(@worthy,0, $self->improvement_top-1); |
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=item C<< count_patterns >> |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Loops the triple patterns, checks if any of them have a cached result |
|
106
|
|
|
|
|
|
|
(TODO) and increments the number of times a certain predicate has been |
|
107
|
|
|
|
|
|
|
seen in the store. When that number exceeds the C<count_threshold>, a |
|
108
|
|
|
|
|
|
|
single-element array of L<Attean::TriplePattern>s will be returned. |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=back |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=cut |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub count_patterns { |
|
116
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
117
|
0
|
|
|
|
|
|
my $algebra = $self->algebra; |
|
118
|
0
|
|
|
|
|
|
my @worthy = (); |
|
119
|
|
|
|
|
|
|
# TODO: Return undef if we can't process the query |
|
120
|
0
|
|
|
|
|
|
foreach my $bgp ($algebra->subpatterns_of_type('Attean::Algebra::BGP')) { |
|
121
|
0
|
|
|
|
|
|
foreach my $triple (@{ $bgp->triples }) { # TODO: May need quads |
|
|
0
|
|
|
|
|
|
|
|
122
|
0
|
|
|
|
|
|
my $patternkey = $triple->canonicalize->tuples_string; # This is the key for the triple we process |
|
123
|
0
|
0
|
|
|
|
|
next if ($self->model->is_cached($patternkey)); |
|
124
|
0
|
0
|
|
|
|
|
next if ($self->model->ldf_store->count_triples_estimate($triple->values) > $self->max_triples); |
|
125
|
0
|
|
|
|
|
|
my $key = $triple->predicate->as_string; # This is the key for the predicate we count |
|
126
|
|
|
|
|
|
|
# Update the storage and return the triple pattern |
|
127
|
0
|
|
|
|
|
|
$self->store->incr($key); |
|
128
|
0
|
|
|
|
|
|
my $count = $self->store->get($key); |
|
129
|
0
|
|
|
|
|
|
$self->log->debug("Count for key '$key' in database is $count"); |
|
130
|
0
|
0
|
|
|
|
|
if ($count >= $self->count_threshold) { # TODO: A way to expire counts |
|
131
|
0
|
|
|
|
|
|
$self->log->info("Triple '$patternkey' has predicate with $count counts"); |
|
132
|
0
|
|
|
|
|
|
push(@worthy, $triple); |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
} |
|
136
|
0
|
|
|
|
|
|
return @worthy; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub analyze_and_cache { |
|
140
|
0
|
|
|
0
|
0
|
|
my ($self, @analyzers) = @_; |
|
141
|
0
|
0
|
|
|
|
|
croak 'No analyzers given to analyze and cache' unless @analyzers; |
|
142
|
0
|
0
|
|
|
|
|
if ($analyzers[0] eq 'all') { |
|
143
|
0
|
|
|
|
|
|
@analyzers = ('count_patterns', 'best_cost_improvement'); |
|
144
|
|
|
|
|
|
|
} |
|
145
|
0
|
|
|
|
|
|
foreach my $analyzer (@analyzers) { |
|
146
|
0
|
0
|
|
|
|
|
croak "Could not find analyzer method $analyzer" unless $self->can($analyzer); |
|
147
|
|
|
|
|
|
|
} |
|
148
|
0
|
|
|
|
|
|
$self->log->info('Running analyzers named ' . join(', ', @analyzers)); |
|
149
|
0
|
|
|
|
|
|
my $retriever = AtteanX::Query::Cache::Retriever->new(model => $self->model); # TODO: Only OK if we don't do query planning |
|
150
|
0
|
|
|
|
|
|
my $i = 0; |
|
151
|
0
|
|
|
|
|
|
my %done; |
|
152
|
0
|
|
|
|
|
|
foreach my $analyzer (@analyzers) { |
|
153
|
0
|
|
|
|
|
|
foreach my $triple ($self->$analyzer) { |
|
154
|
0
|
|
|
|
|
|
my $key = $triple->canonicalize->tuples_string; |
|
155
|
0
|
0
|
|
|
|
|
next if $done{$key}; # Skip if some other analyzer already did fetch |
|
156
|
0
|
|
|
|
|
|
$self->log->debug('Fetching triple pattern ' . $triple->as_string); |
|
157
|
0
|
|
|
|
|
|
my $data = $retriever->fetch($triple); |
|
158
|
0
|
0
|
|
|
|
|
if (defined($data)) { |
|
159
|
0
|
|
|
|
|
|
$done{$key} = 1; |
|
160
|
0
|
|
|
|
|
|
$i++; |
|
161
|
|
|
|
|
|
|
} |
|
162
|
0
|
|
|
|
|
|
$self->model->cache->set($key, $data); |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
} |
|
165
|
0
|
|
|
|
|
|
$self->log->info("Got results from prefetching $i triple patterns"); |
|
166
|
0
|
|
|
|
|
|
return $i; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
1; |