line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package RDF::Generator::Void; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
53503
|
use 5.006; |
|
5
|
|
|
|
|
21
|
|
4
|
5
|
|
|
5
|
|
17
|
use strict; |
|
5
|
|
|
|
|
5
|
|
|
5
|
|
|
|
|
78
|
|
5
|
5
|
|
|
5
|
|
14
|
use warnings; |
|
5
|
|
|
|
|
13
|
|
|
5
|
|
|
|
|
97
|
|
6
|
5
|
|
|
5
|
|
501
|
use Moose; |
|
5
|
|
|
|
|
298575
|
|
|
5
|
|
|
|
|
39
|
|
7
|
5
|
|
|
5
|
|
24092
|
use Moose::Util::TypeConstraints; |
|
5
|
|
|
|
|
7
|
|
|
5
|
|
|
|
|
41
|
|
8
|
5
|
|
|
5
|
|
6495
|
use Data::UUID; |
|
5
|
|
|
|
|
604
|
|
|
5
|
|
|
|
|
304
|
|
9
|
5
|
|
|
5
|
|
489
|
use RDF::Trine qw[iri literal blank variable statement]; |
|
5
|
|
|
|
|
132069
|
|
|
5
|
|
|
|
|
284
|
|
10
|
5
|
|
|
5
|
|
2017
|
use RDF::Generator::Void::Stats; |
|
5
|
|
|
|
|
13
|
|
|
5
|
|
|
|
|
198
|
|
11
|
|
|
|
|
|
|
# use less (); |
12
|
5
|
|
|
5
|
|
34
|
use utf8; |
|
5
|
|
|
|
|
6
|
|
|
5
|
|
|
|
|
39
|
|
13
|
5
|
|
|
5
|
|
2642
|
use URI::Split qw(uri_split uri_join); |
|
5
|
|
|
|
|
2744
|
|
|
5
|
|
|
|
|
312
|
|
14
|
5
|
|
|
5
|
|
25
|
use Progress::Any; |
|
5
|
|
|
|
|
6
|
|
|
5
|
|
|
|
|
42
|
|
15
|
|
|
|
|
|
|
|
16
|
5
|
|
|
5
|
|
2113
|
use aliased 'RDF::Generator::Void::Meta::Attribute::ObjectList'; |
|
5
|
|
|
|
|
2545
|
|
|
5
|
|
|
|
|
21
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# Define some namespace prefixes |
19
|
|
|
|
|
|
|
my $void = RDF::Trine::Namespace->new('http://rdfs.org/ns/void#'); |
20
|
|
|
|
|
|
|
my $rdf = RDF::Trine::Namespace->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#'); |
21
|
|
|
|
|
|
|
my $xsd = RDF::Trine::Namespace->new('http://www.w3.org/2001/XMLSchema#'); |
22
|
|
|
|
|
|
|
my $dct = RDF::Trine::Namespace->new('http://purl.org/dc/terms/'); |
23
|
|
|
|
|
|
|
my $prov = RDF::Trine::Namespace->new('http://www.w3.org/ns/prov#'); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 NAME |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
RDF::Generator::Void - Generate VoID descriptions based on data in an RDF model |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 VERSION |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
Version 0.13_1 |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=cut |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
our $VERSION = '0.13_1'; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 SYNOPSIS |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
use RDF::Generator::Void; |
40
|
|
|
|
|
|
|
use RDF::Trine::Model; |
41
|
|
|
|
|
|
|
my $mymodel = RDF::Trine::Model->temporary_model; |
42
|
|
|
|
|
|
|
[add some data to $mymodel here] |
43
|
|
|
|
|
|
|
my $generator = RDF::Generator::Void->new(inmodel => $mymodel); |
44
|
|
|
|
|
|
|
$generator->urispace('http://example.org'); |
45
|
|
|
|
|
|
|
$generator->add_endpoints('http://example.org/sparql'); |
46
|
|
|
|
|
|
|
my $voidmodel = $generator->generate; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
This module takes a L<RDF::Trine::Model> object as input to the |
51
|
|
|
|
|
|
|
constructor, and based on the data in that model as well as data |
52
|
|
|
|
|
|
|
supplied by the user, it creates a new model with a VoID description |
53
|
|
|
|
|
|
|
of the data in the model. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
For a description of VoID, see L<http://www.w3.org/TR/void/>. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head1 METHODS |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 new(inmodel => $mymodel, dataset_uri => URI->new($dataset_uri), level => 1); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
The constructor. It can be called with two parameters, namely, |
62
|
|
|
|
|
|
|
C<inmodel> which is a model we want to describe and C<dataset_uri>, |
63
|
|
|
|
|
|
|
which is the URI we want to use for the description. Users should make |
64
|
|
|
|
|
|
|
sure it is possible to get this with HTTP. If this is not possible, |
65
|
|
|
|
|
|
|
you may leave this field empty so that a simple URN can be created for |
66
|
|
|
|
|
|
|
you as a default. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head2 C<inmodel> |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
Read-only accessor for the model used in description creation. |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head2 C<dataset_uri> |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Read-only accessor for the URI to the dataset. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=cut |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
has inmodel => ( |
79
|
|
|
|
|
|
|
is => 'ro', |
80
|
|
|
|
|
|
|
isa => 'RDF::Trine::Model', |
81
|
|
|
|
|
|
|
required => 1, |
82
|
|
|
|
|
|
|
); |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# This is setting up the dataset_uri method, and make it possible to |
85
|
|
|
|
|
|
|
# create a resource of it from strings or URI objects. |
86
|
|
|
|
|
|
|
class_type 'URI'; |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
subtype 'DatasetURI', |
89
|
|
|
|
|
|
|
as 'Object', |
90
|
|
|
|
|
|
|
where { $_->isa('RDF::Trine::Node::Resource') || $_->isa('RDF::Trine::Node::Blank') }; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
coerce 'DatasetURI', |
93
|
|
|
|
|
|
|
from 'URI', via { iri("$_") }, |
94
|
|
|
|
|
|
|
from 'Str', via { iri($_) }; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
has dataset_uri => ( |
97
|
|
|
|
|
|
|
is => 'ro', |
98
|
|
|
|
|
|
|
isa => 'DatasetURI', |
99
|
|
|
|
|
|
|
lazy => 1, |
100
|
|
|
|
|
|
|
builder => '_build_dataset_uri', |
101
|
|
|
|
|
|
|
coerce => 1, |
102
|
|
|
|
|
|
|
); |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
our $progress; # Declared for everything in here |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# This will create a URN with a UUID by default |
108
|
|
|
|
|
|
|
sub _build_dataset_uri { |
109
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
110
|
0
|
|
|
|
|
0
|
return iri sprintf('urn:uuid:%s', Data::UUID->new->create_str); |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head2 Property Attributes |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
The below attributes concern some essential properties in the VoID |
116
|
|
|
|
|
|
|
vocabulary. They are mostly arrays, and can be manipulated using array |
117
|
|
|
|
|
|
|
methods. Methods starting with C<all_> will return an array of unique |
118
|
|
|
|
|
|
|
values. Methods starting with C<add_> takes a list of values to add, |
119
|
|
|
|
|
|
|
and those starting with C<has_no_> return a boolean value, false if |
120
|
|
|
|
|
|
|
the array is empty. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head3 C<all_vocabularies>, C<add_vocabularies>, C<has_no_vocabularies> |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Methods to manipulate a list of vocabularies used in the dataset. The |
125
|
|
|
|
|
|
|
values should be a string that represents the URI of a vocabulary. |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=cut |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# All the following attributes have that in common that they |
130
|
|
|
|
|
|
|
# automatically the method names also specified in handles, to |
131
|
|
|
|
|
|
|
# manipulate and query the data. |
132
|
|
|
|
|
|
|
has _vocabularies => ( traits => [ObjectList] ); |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head3 C<all_endpoints>, C<add_endpoints>, C<has_no_endpoints> |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Methods to manipulate a list of SPARQL endpoints that can be used to |
137
|
|
|
|
|
|
|
query the dataset. The values should be a string that represents the |
138
|
|
|
|
|
|
|
URI of a SPARQL endpoint. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=cut |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
has _endpoints => ( traits => [ObjectList] ); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=head3 C<all_titles>, C<add_titles>, C<has_no_titles> |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Methods to manipulate the titles of the datasets. The values should be |
148
|
|
|
|
|
|
|
L<RDF::Trine::Node::Literal> objects, and should be set with |
149
|
|
|
|
|
|
|
language. Typically, you would have a value per language. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=cut |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
has _titles => ( |
155
|
|
|
|
|
|
|
traits => [ObjectList], |
156
|
|
|
|
|
|
|
isa => 'ArrayRef[RDF::Trine::Node::Literal]', |
157
|
|
|
|
|
|
|
); |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head3 C<all_licenses>, C<add_licenses>, C<has_no_licenses> |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Methods to manipulate a list of licenses that regulates the use of the |
163
|
|
|
|
|
|
|
dataset. The values should be a string that represents the URI of a |
164
|
|
|
|
|
|
|
license. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=cut |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
has _licenses => ( traits => [ObjectList] ); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head3 C<urispace>, C<has_urispace> |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
This method is used to set the URI prefix string that will match the |
173
|
|
|
|
|
|
|
entities in your dataset. The computation of the number of entities |
174
|
|
|
|
|
|
|
depends on this being set. C<has_urispace> can be used to check if it |
175
|
|
|
|
|
|
|
is set. |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# There should only be a single uriSpace per Dataset (but there may be |
180
|
|
|
|
|
|
|
# more for subsets), thus this is a simple scalar attribute. |
181
|
|
|
|
|
|
|
has urispace => ( |
182
|
|
|
|
|
|
|
is => 'rw', |
183
|
|
|
|
|
|
|
isa => 'Str', |
184
|
|
|
|
|
|
|
predicate => 'has_urispace', |
185
|
|
|
|
|
|
|
); |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head2 Running this stuff |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=head3 C<level>, C<has_level> |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
Set the level of detail. 0 doesn't do any statistics or heuristics, 1 |
192
|
|
|
|
|
|
|
has some statistics for the dataset as a whole, 2 will give some |
193
|
|
|
|
|
|
|
partition statistics and 3 will give subject and object counts for |
194
|
|
|
|
|
|
|
property partitions. Setting no level will give everything. |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=cut |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
has level => (is => 'rw', isa => 'Int', predicate => 'has_level'); |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=head3 C<stats>, C<clear_stats>, C<has_stats> |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Method to compute a statistical summary for the data in the dataset, |
204
|
|
|
|
|
|
|
such as the number of entities, predicates, etc. C<clear_stats> will |
205
|
|
|
|
|
|
|
clear the statistics and C<has_stats> will return true if exists. |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=cut |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# In practice, this method just calls the ::Stats class to do |
210
|
|
|
|
|
|
|
# everything. |
211
|
|
|
|
|
|
|
has stats => ( |
212
|
|
|
|
|
|
|
is => 'rw', |
213
|
|
|
|
|
|
|
isa => 'RDF::Generator::Void::Stats', |
214
|
|
|
|
|
|
|
lazy => 1, |
215
|
|
|
|
|
|
|
builder => '_build_stats', |
216
|
|
|
|
|
|
|
clearer => 'clear_stats', |
217
|
|
|
|
|
|
|
predicate => 'has_stats', |
218
|
|
|
|
|
|
|
); |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub _build_stats { |
221
|
6
|
|
|
6
|
|
17
|
my ($self) = @_; |
222
|
6
|
|
|
|
|
48
|
return RDF::Generator::Void::Stats->new(generator => $self); |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head3 generate( [ $model ] ) |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Returns the VoID as an RDF::Trine::Model. You may pass a model with |
229
|
|
|
|
|
|
|
statements as argument to this method. This model may then contain |
230
|
|
|
|
|
|
|
arbitrary RDF that will be added to the RDF model. If you do not send |
231
|
|
|
|
|
|
|
a model, one will be created for you. |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=cut |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub generate { |
236
|
13
|
|
|
13
|
1
|
19530
|
my $self = shift; |
237
|
13
|
|
66
|
|
|
88
|
my $void_model = shift || RDF::Trine::Model->temporary_model; |
238
|
13
|
|
|
|
|
642
|
local $progress = Progress::Any->get_indicator(task => "compute"); |
239
|
13
|
|
|
|
|
479
|
$progress->pos(0); |
240
|
13
|
|
|
|
|
326
|
my $target_size = 11; |
241
|
13
|
100
|
100
|
|
|
439
|
if ($self->has_level && ($self->level > 0)) { |
242
|
10
|
|
|
|
|
232
|
$target_size += $self->inmodel->size; |
243
|
|
|
|
|
|
|
} |
244
|
13
|
|
|
|
|
488
|
$progress->target($target_size); |
245
|
13
|
|
|
|
|
258
|
$progress->update(message => "Adding base statements"); |
246
|
13
|
|
|
|
|
529
|
local $self->{void_model} = $void_model; |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# Start generating the actual VoID statements |
249
|
13
|
|
|
|
|
349
|
$void_model->add_statement(statement( |
250
|
|
|
|
|
|
|
$self->dataset_uri, |
251
|
|
|
|
|
|
|
$rdf->type, |
252
|
|
|
|
|
|
|
$void->Dataset, |
253
|
|
|
|
|
|
|
)); |
254
|
13
|
|
|
|
|
8840
|
$progress->update(message => "Adding base statements"); |
255
|
|
|
|
|
|
|
|
256
|
13
|
|
|
|
|
784
|
my ($scheme, $auth, $path, $query, $frag) = uri_split($self->dataset_uri->uri_value); |
257
|
13
|
100
|
|
|
|
166
|
if ($frag) { # Then, we have a document that could be described with provenance |
258
|
7
|
|
|
|
|
19
|
my $uri = iri(uri_join($scheme, $auth, $path, $query, undef)); |
259
|
7
|
|
|
|
|
214
|
my $blank = blank(); |
260
|
7
|
|
|
|
|
416
|
$void_model->add_statement(statement($uri, |
261
|
|
|
|
|
|
|
$prov->wasGeneratedBy, |
262
|
|
|
|
|
|
|
$blank)); |
263
|
7
|
|
|
|
|
3721
|
(my $ver = $VERSION) =~ s/\./-/; |
264
|
7
|
|
|
|
|
23
|
my $release_uri = iri("http://purl.org/NET/cpan-uri/dist/RDF-Generator-Void/v_$ver"); |
265
|
7
|
|
|
|
|
121
|
$void_model->add_statement(statement($blank, |
266
|
|
|
|
|
|
|
$prov->wasAssociatedWith, |
267
|
|
|
|
|
|
|
$release_uri)); |
268
|
7
|
|
|
|
|
3604
|
$void_model->add_statement(statement($release_uri, |
269
|
|
|
|
|
|
|
$rdf->type, |
270
|
|
|
|
|
|
|
$prov->SoftwareAgent)); |
271
|
7
|
|
|
|
|
3582
|
$void_model->add_statement(statement($release_uri, |
272
|
|
|
|
|
|
|
iri('http://www.w3.org/2000/01/rdf-schema#label'), |
273
|
|
|
|
|
|
|
literal("RDF::Generator::Void, Version $VERSION", 'en'))); |
274
|
7
|
|
|
|
|
3061
|
$progress->update(message => "Adding provenance statements"); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
13
|
|
|
|
|
683
|
foreach my $endpoint ($self->all_endpoints) { |
278
|
6
|
|
|
|
|
139
|
$void_model->add_statement(statement( |
279
|
|
|
|
|
|
|
$self->dataset_uri, |
280
|
|
|
|
|
|
|
$void->sparqlEndpoint, |
281
|
|
|
|
|
|
|
iri($endpoint) |
282
|
|
|
|
|
|
|
)); |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
13
|
|
|
|
|
3398
|
foreach my $title ($self->all_titles) { |
286
|
10
|
|
|
|
|
2739
|
$void_model->add_statement(statement( |
287
|
|
|
|
|
|
|
$self->dataset_uri, |
288
|
|
|
|
|
|
|
$dct->title, |
289
|
|
|
|
|
|
|
$title |
290
|
|
|
|
|
|
|
)); |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
13
|
|
|
|
|
2609
|
foreach my $license ($self->all_licenses) { |
294
|
4
|
|
|
|
|
93
|
$void_model->add_statement(statement( |
295
|
|
|
|
|
|
|
$self->dataset_uri, |
296
|
|
|
|
|
|
|
$dct->license, |
297
|
|
|
|
|
|
|
iri($license) |
298
|
|
|
|
|
|
|
)); |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
|
301
|
13
|
|
|
|
|
1996
|
$progress->update(message => "Adding user-set statements"); |
302
|
|
|
|
|
|
|
|
303
|
13
|
|
|
|
|
761
|
$void_model->add_statement(statement( |
304
|
|
|
|
|
|
|
$self->dataset_uri, |
305
|
|
|
|
|
|
|
$void->triples, |
306
|
|
|
|
|
|
|
literal($self->inmodel->size, undef, $xsd->integer), |
307
|
|
|
|
|
|
|
)); |
308
|
|
|
|
|
|
|
|
309
|
13
|
|
|
|
|
6752
|
$progress->update(message => "Adding base statements"); |
310
|
13
|
50
|
|
|
|
969
|
if ($self->has_urispace) { |
311
|
13
|
|
|
|
|
301
|
$void_model->add_statement(statement( |
312
|
|
|
|
|
|
|
$self->dataset_uri, |
313
|
|
|
|
|
|
|
$void->uriSpace, |
314
|
|
|
|
|
|
|
literal($self->urispace) |
315
|
|
|
|
|
|
|
)); |
316
|
13
|
100
|
100
|
|
|
6041
|
return $void_model if ($self->has_level && ($self->level == 0)); |
317
|
12
|
|
|
|
|
75
|
$self->_generate_counts($void->entities, $self->stats->entities); |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
|
321
|
12
|
50
|
66
|
|
|
821
|
return $void_model if ($self->has_level && $self->level == 0); |
322
|
12
|
|
|
|
|
77
|
$self->_generate_counts($void->distinctSubjects, $self->stats->subjects); |
323
|
12
|
|
|
|
|
498
|
$self->_generate_counts($void->properties, $self->stats->properties); |
324
|
12
|
|
|
|
|
496
|
$self->_generate_counts($void->distinctObjects, $self->stats->objects); |
325
|
|
|
|
|
|
|
|
326
|
12
|
50
|
|
|
|
791
|
$self->_generate_most_common_vocabs($self->stats) if $self->has_stats; |
327
|
|
|
|
|
|
|
|
328
|
12
|
100
|
100
|
|
|
783
|
return $void_model if ($self->has_level && $self->level <= 1); |
329
|
|
|
|
|
|
|
|
330
|
4
|
|
|
|
|
7
|
$target_size += scalar(keys(%{$self->stats->propertyPartitions})); |
|
4
|
|
|
|
|
90
|
|
331
|
4
|
|
|
|
|
7
|
$target_size += scalar(keys(%{$self->stats->classPartitions})); |
|
4
|
|
|
|
|
92
|
|
332
|
4
|
|
|
|
|
17
|
$progress->target($target_size); |
333
|
|
|
|
|
|
|
|
334
|
4
|
|
|
|
|
83
|
$self->_generate_propertypartitions; |
335
|
4
|
|
|
|
|
15
|
$self->_generate_classpartitions; |
336
|
4
|
|
|
|
|
14
|
$progress->update(message => "Finishing"); |
337
|
|
|
|
|
|
|
|
338
|
4
|
|
|
|
|
114
|
return $void_model; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub _generate_counts { |
342
|
48
|
|
|
48
|
|
68
|
my ($self, $predicate, $count) = @_; |
343
|
48
|
50
|
|
|
|
1191
|
return undef unless $self->has_stats; |
344
|
48
|
|
|
|
|
1345
|
$self->{void_model}->add_statement(statement( |
345
|
|
|
|
|
|
|
$self->dataset_uri, |
346
|
|
|
|
|
|
|
$predicate, |
347
|
|
|
|
|
|
|
literal($count, undef, $xsd->integer), |
348
|
|
|
|
|
|
|
)); |
349
|
48
|
|
|
|
|
22203
|
$progress->update(message => "Adding counts statements"); |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub _generate_propertypartitions { |
353
|
4
|
|
|
4
|
|
7
|
my ($self) = @_; |
354
|
4
|
50
|
|
|
|
103
|
return undef unless $self->has_stats; |
355
|
4
|
|
|
|
|
87
|
my $properties = $self->stats->propertyPartitions; |
356
|
4
|
|
|
|
|
6
|
while (my ($uri, $counts) = each(%{$properties})) { |
|
30
|
|
|
|
|
1090
|
|
357
|
26
|
|
|
|
|
70
|
my $blank = blank(); |
358
|
26
|
|
|
|
|
1590
|
$self->{void_model}->add_statement(statement( |
359
|
|
|
|
|
|
|
$self->dataset_uri, |
360
|
|
|
|
|
|
|
$void->propertyPartition, |
361
|
|
|
|
|
|
|
$blank)); |
362
|
26
|
|
|
|
|
10857
|
$self->{void_model}->add_statement(statement($blank, |
363
|
|
|
|
|
|
|
$void->property, |
364
|
|
|
|
|
|
|
iri($uri))); |
365
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
366
|
|
|
|
|
|
|
$void->triples, |
367
|
26
|
|
|
|
|
13689
|
literal($counts->{'triples'}, undef, $xsd->integer))); |
368
|
|
|
|
|
|
|
# OK, so sometimes, one has to balance elegance and performance... |
369
|
26
|
100
|
|
|
|
11805
|
if ($counts->{'countsubjects'}) { |
370
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
371
|
|
|
|
|
|
|
$void->distinctSubjects, |
372
|
23
|
|
|
|
|
243
|
literal(scalar keys %{$counts->{'countsubjects'}}, undef, $xsd->integer))); |
|
23
|
|
|
|
|
671
|
|
373
|
|
|
|
|
|
|
$self->{void_model}->add_statement(statement($blank, |
374
|
|
|
|
|
|
|
$void->distinctObjects, |
375
|
23
|
|
|
|
|
9727
|
literal(scalar keys %{$counts->{'countobjects'}}, undef, $xsd->integer))); |
|
23
|
|
|
|
|
792
|
|
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
|
378
|
26
|
|
|
|
|
9667
|
$progress->update(message => "Adding property partition statements"); |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
sub _generate_classpartitions { |
384
|
4
|
|
|
4
|
|
7
|
my ($self) = @_; |
385
|
4
|
50
|
|
|
|
134
|
return undef unless $self->has_stats; |
386
|
4
|
|
|
|
|
97
|
my $classes = $self->stats->classPartitions; |
387
|
4
|
|
|
|
|
6
|
while (my ($uri, $count) = each(%{$classes})) { |
|
13
|
|
|
|
|
392
|
|
388
|
9
|
|
|
|
|
22
|
my $blank = blank(); |
389
|
9
|
|
|
|
|
465
|
$self->{void_model}->add_statement(statement( |
390
|
|
|
|
|
|
|
$self->dataset_uri, |
391
|
|
|
|
|
|
|
$void->classPartition, |
392
|
|
|
|
|
|
|
$blank)); |
393
|
9
|
|
|
|
|
3892
|
$self->{void_model}->add_statement(statement($blank, |
394
|
|
|
|
|
|
|
$void->class, |
395
|
|
|
|
|
|
|
iri($uri))); |
396
|
9
|
|
|
|
|
4953
|
$self->{void_model}->add_statement(statement($blank, |
397
|
|
|
|
|
|
|
$void->triples, |
398
|
|
|
|
|
|
|
literal($count, undef, $xsd->integer))); |
399
|
9
|
|
|
|
|
4050
|
$progress->update(message => "Adding class partition statements"); |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub _generate_most_common_vocabs { |
404
|
12
|
|
|
12
|
|
16
|
my ($self) = @_; |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# Which vocabularies are most commonly used for predicates in the |
407
|
|
|
|
|
|
|
# dataset? Vocabularies used for less than 1% of triples need not |
408
|
|
|
|
|
|
|
# apply. |
409
|
12
|
|
|
|
|
263
|
my $threshold = $self->inmodel->size / 100; |
410
|
12
|
|
|
|
|
502
|
my %vocabs = %{ $self->stats->vocabularies }; |
|
12
|
|
|
|
|
278
|
|
411
|
12
|
|
|
|
|
31
|
$self->add_vocabularies(grep { $vocabs{$_} > $threshold } keys %vocabs); |
|
39
|
|
|
|
|
423
|
|
412
|
|
|
|
|
|
|
|
413
|
12
|
|
|
|
|
360
|
foreach my $vocab ($self->all_vocabularies) { |
414
|
39
|
|
|
|
|
14573
|
$self->{void_model}->add_statement(statement( |
415
|
|
|
|
|
|
|
$self->dataset_uri, |
416
|
|
|
|
|
|
|
$void->vocabulary, |
417
|
|
|
|
|
|
|
iri($vocab), |
418
|
|
|
|
|
|
|
)); |
419
|
|
|
|
|
|
|
} |
420
|
12
|
|
|
|
|
5747
|
$progress->update(message => "Adding vocabulary statements"); |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head1 AUTHORS |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Kjetil Kjernsmo C<< <kjetilk@cpan.org> >> |
428
|
|
|
|
|
|
|
Toby Inkster C<< <tobyink@cpan.org> >> |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=head1 TODO |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=over |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=item * URI regexps support. |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=item * Technical features (esp. serializations). |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=item * Example resources and root resources. |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=item * Data dumps. |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=item * Subject classification. |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=item * Method to disable heuristics. |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=item * More heuristics. |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=item * Linkset descriptions. |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=item * Set URI space on partitions. |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=item * Use L<CHI> to cache? |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=item * Use schema introspection to generate property attributes with L<MooseX::Semantics>. |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
=back |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head1 BUGS |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Please report any bugs you find to L<https://github.com/kjetilk/RDF-Generator-Void/issues> |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
Note that any claim that this module will generate a void in |
466
|
|
|
|
|
|
|
spacetime, a wormhole, black hole, or funny philosophy is totally |
467
|
|
|
|
|
|
|
bogus and without any scientific merit whatsoever. The lead author has |
468
|
|
|
|
|
|
|
made elaborate precautions to avoid any such issues, and expects |
469
|
|
|
|
|
|
|
everyone to take his word for it. Oh, BTW, should it just happen |
470
|
|
|
|
|
|
|
anyway, it won't L<hurt much|http://news.sciencemag.org/sciencenow/2012/03/scienceshot-one-black-hole-wont-.html>. |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=head1 SUPPORT |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
perldoc RDF::Generator::Void |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
The Perl and RDF community website is at L<http://www.perlrdf.org/> |
480
|
|
|
|
|
|
|
where you can also find a mailing list to direct questions to. |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
You can also look for information at: |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=over 4 |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
L<http://annocpan.org/dist/RDF-Generator-Void> |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=item * CPAN Ratings |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
L<http://cpanratings.perl.org/d/RDF-Generator-Void> |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=item * MetaCPAN |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
L<https://metacpan.org/module/RDF::Generator::Void> |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=back |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
Many thanks to Konstantin Baierer for help with L<RDF::Generator::Void::Meta::Attribute::ObjectList>. |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
Copyright 2012 Toby Inkster. |
508
|
|
|
|
|
|
|
Copyright 2012-2013 Kjetil Kjernsmo. |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
511
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
512
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
1; # End of RDF::Generator::Void |