line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Catmandu::Importer::RDF; |
2
|
|
|
|
|
|
|
|
3
|
15
|
|
|
15
|
|
2089626
|
use open ':std', ':encoding(utf8)'; |
|
15
|
|
|
|
|
9591
|
|
|
15
|
|
|
|
|
94
|
|
4
|
15
|
|
|
15
|
|
128702
|
use namespace::clean; |
|
15
|
|
|
|
|
44
|
|
|
15
|
|
|
|
|
142
|
|
5
|
15
|
|
|
15
|
|
2879
|
use Catmandu::Sane; |
|
15
|
|
|
|
|
30
|
|
|
15
|
|
|
|
|
117
|
|
6
|
15
|
|
|
15
|
|
3097
|
use Moo; |
|
15
|
|
|
|
|
30
|
|
|
15
|
|
|
|
|
106
|
|
7
|
15
|
|
|
15
|
|
19332
|
use RDF::Trine::Parser; |
|
15
|
|
|
|
|
10261113
|
|
|
15
|
|
|
|
|
446
|
|
8
|
15
|
|
|
15
|
|
108
|
use RDF::Trine::Model; |
|
15
|
|
|
|
|
108
|
|
|
15
|
|
|
|
|
291
|
|
9
|
15
|
|
|
15
|
|
82
|
use RDF::Trine::Store::SPARQL; |
|
15
|
|
|
|
|
28
|
|
|
15
|
|
|
|
|
328
|
|
10
|
15
|
|
|
15
|
|
4583
|
use RDF::Trine::Store::LDF; |
|
15
|
|
|
|
|
9419445
|
|
|
15
|
|
|
|
|
570
|
|
11
|
15
|
|
|
15
|
|
130
|
use RDF::Trine::Store; |
|
15
|
|
|
|
|
36
|
|
|
15
|
|
|
|
|
299
|
|
12
|
15
|
|
|
15
|
|
114
|
use RDF::Query; |
|
15
|
|
|
|
|
45
|
|
|
15
|
|
|
|
|
358
|
|
13
|
15
|
|
|
15
|
|
74
|
use RDF::LDF; |
|
15
|
|
|
|
|
34
|
|
|
15
|
|
|
|
|
250
|
|
14
|
15
|
|
|
15
|
|
4890
|
use RDF::aREF; |
|
15
|
|
|
|
|
109881
|
|
|
15
|
|
|
|
|
985
|
|
15
|
15
|
|
|
15
|
|
109
|
use RDF::aREF::Encoder; |
|
15
|
|
|
|
|
37
|
|
|
15
|
|
|
|
|
256
|
|
16
|
15
|
|
|
15
|
|
73
|
use RDF::NS; |
|
15
|
|
|
|
|
41
|
|
|
15
|
|
|
|
|
227
|
|
17
|
15
|
|
|
15
|
|
4372
|
use IO::Pipe; |
|
15
|
|
|
|
|
14272
|
|
|
15
|
|
|
|
|
427
|
|
18
|
15
|
|
|
15
|
|
93
|
use JSON; |
|
15
|
|
|
|
|
32
|
|
|
15
|
|
|
|
|
156
|
|
19
|
15
|
|
|
15
|
|
5382
|
use LWP::UserAgent::CHICaching; |
|
15
|
|
|
|
|
1531050
|
|
|
15
|
|
|
|
|
30078
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
our $VERSION = '0.32'; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
with 'Catmandu::RDF'; |
24
|
|
|
|
|
|
|
with 'Catmandu::Importer'; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
has url => ( |
27
|
|
|
|
|
|
|
is => 'ro' |
28
|
|
|
|
|
|
|
); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
has base => ( |
31
|
|
|
|
|
|
|
is => 'ro', |
32
|
|
|
|
|
|
|
lazy => 1, |
33
|
|
|
|
|
|
|
builder => sub { |
34
|
8
|
50
|
|
8
|
|
541
|
defined $_[0]->file ? "file://".$_[0]->file : "http://example.org/"; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
has encoder => ( |
39
|
|
|
|
|
|
|
is => 'ro', |
40
|
|
|
|
|
|
|
lazy => 1, |
41
|
|
|
|
|
|
|
builder => sub { |
42
|
30
|
|
|
30
|
|
920
|
my $ns = $_[0]->ns; |
43
|
30
|
100
|
50
|
|
|
1749
|
RDF::aREF::Encoder->new( |
44
|
|
|
|
|
|
|
ns => (($ns // 1) ? $ns : { }), |
45
|
|
|
|
|
|
|
subject_map => !$_[0]->predicate_map, |
46
|
|
|
|
|
|
|
); |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
has sparql => ( |
51
|
|
|
|
|
|
|
is => 'ro', |
52
|
|
|
|
|
|
|
lazy => 1, |
53
|
|
|
|
|
|
|
trigger => sub { |
54
|
|
|
|
|
|
|
my ($sparql, $ns) = ($_[1], $_[0]->ns); |
55
|
|
|
|
|
|
|
$sparql = do { local (@ARGV,$/) = $sparql; <> } if $sparql =~ /^\S+$/ && -r $sparql; |
56
|
|
|
|
|
|
|
my %prefix; |
57
|
|
|
|
|
|
|
# guess requires prefixes (don't override existing). Don't mind false positives |
58
|
|
|
|
|
|
|
$prefix{$_} = 1 for ($sparql =~ /\s([a-z][a-z0-0_-]*):/mig); |
59
|
|
|
|
|
|
|
delete $prefix{$_} for ($sparql =~ /PREFIX\s+([^:]+):/mg); |
60
|
|
|
|
|
|
|
$_[0]->{sparql} = join "\n", (map { $ns->SPARQL($_) } keys %prefix), $sparql; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
); |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
has sparql_result => ( |
65
|
|
|
|
|
|
|
is => 'ro', |
66
|
|
|
|
|
|
|
default => sub { 'simple' } |
67
|
|
|
|
|
|
|
); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
has predicate_map => ( |
70
|
|
|
|
|
|
|
is => 'ro', |
71
|
|
|
|
|
|
|
); |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
has triples => ( |
74
|
|
|
|
|
|
|
is => 'ro', |
75
|
|
|
|
|
|
|
); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
has cache => ( |
78
|
|
|
|
|
|
|
is => 'ro', |
79
|
|
|
|
|
|
|
default => sub { 0 } |
80
|
|
|
|
|
|
|
); |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
has cache_options => ( |
83
|
|
|
|
|
|
|
is => 'ro', |
84
|
|
|
|
|
|
|
default => sub { +{ |
85
|
|
|
|
|
|
|
driver => 'Memory', |
86
|
|
|
|
|
|
|
global => 1 , |
87
|
|
|
|
|
|
|
max_size => 1024*1024 |
88
|
|
|
|
|
|
|
} } |
89
|
|
|
|
|
|
|
); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
has speed => ( |
92
|
|
|
|
|
|
|
is => 'ro', |
93
|
|
|
|
|
|
|
); |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub BUILD { |
96
|
52
|
|
|
52
|
0
|
1040
|
my ($self) = @_; |
97
|
|
|
|
|
|
|
|
98
|
52
|
50
|
|
|
|
1179
|
if ($self->cache) { |
99
|
0
|
|
0
|
|
|
0
|
my $options = $self->cache_options // {}; |
100
|
0
|
|
|
|
|
0
|
my $cache = CHI->new( %$options ); |
101
|
0
|
|
|
|
|
0
|
my $ua = LWP::UserAgent::CHICaching->new(cache => $cache); |
102
|
0
|
|
|
|
|
0
|
RDF::Trine->default_useragent($ua); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub generator { |
107
|
|
|
|
|
|
|
my ($self) = @_; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
if ($self->sparql) { |
110
|
|
|
|
|
|
|
return $self->sparql_generator; |
111
|
|
|
|
|
|
|
} else { |
112
|
|
|
|
|
|
|
return $self->rdf_generator; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub sparql_generator { |
117
|
3
|
|
|
3
|
0
|
10
|
my ($self) = @_; |
118
|
|
|
|
|
|
|
|
119
|
3
|
50
|
|
|
|
22
|
warn "--triples not active for sparql queries" if ($self->triples); |
120
|
3
|
50
|
|
|
|
11
|
warn "--predicate_map not active for sparql queries" if ($self->predicate_map); |
121
|
|
|
|
|
|
|
|
122
|
3
|
|
|
|
|
47
|
my $encoder = RDF::aREF::Encoder->new( ns => {} ); # never return qnames |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub { |
125
|
3
|
|
|
3
|
|
46
|
state $stream = $self->_sparql_stream; |
126
|
3
|
50
|
33
|
|
|
2713
|
if (defined($stream) && defined(my $row = $stream->next)) { |
127
|
3
|
50
|
66
|
|
|
703709
|
if (ref $row eq 'RDF::Query::VariableBindings' || ref $row eq 'RDF::Trine::VariableBindings') { |
128
|
3
|
|
|
|
|
11
|
my $ref = {}; |
129
|
3
|
|
|
|
|
50
|
for (keys %$row) { |
130
|
4
|
|
|
|
|
58
|
my $val = $row->{$_}; |
131
|
|
|
|
|
|
|
$ref->{$_} = $self->sparql_result eq 'aref' |
132
|
4
|
50
|
|
|
|
33
|
? $encoder->object($val) : do { # TODO: clean up |
133
|
4
|
100
|
|
|
|
38
|
if ( $val->is_resource ) { |
|
|
50
|
|
|
|
|
|
134
|
2
|
|
|
|
|
26
|
$val->uri_value; |
135
|
|
|
|
|
|
|
} elsif ( $val->is_literal) { |
136
|
2
|
|
|
|
|
56
|
$val->literal_value; |
137
|
|
|
|
|
|
|
} else { |
138
|
0
|
|
|
|
|
0
|
$val->as_string |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
}; |
141
|
|
|
|
|
|
|
} |
142
|
3
|
|
|
|
|
91
|
return $ref; |
143
|
|
|
|
|
|
|
} else { |
144
|
0
|
|
|
|
|
0
|
die "Expected a RDF::Query::VariableBindings or RDF::Trine::VariableBindings but got a " . ref($row); |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
} else { |
147
|
0
|
|
|
|
|
0
|
return ($stream = undef); |
148
|
|
|
|
|
|
|
} |
149
|
3
|
|
|
|
|
271
|
}; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub rdf_generator { |
153
|
41
|
|
|
41
|
0
|
147
|
my ($self) = @_; |
154
|
|
|
|
|
|
|
sub { |
155
|
49
|
|
|
49
|
|
679
|
state $stream = $self->_hashref_stream; |
156
|
38
|
50
|
|
|
|
415
|
return unless $stream; |
157
|
|
|
|
|
|
|
|
158
|
38
|
|
|
|
|
352
|
my $aref = { }; |
159
|
|
|
|
|
|
|
|
160
|
38
|
100
|
|
|
|
436
|
if ($self->triples) { |
161
|
10
|
100
|
|
|
|
80
|
if (my $hashref = $stream->()) { |
162
|
8
|
|
|
|
|
350
|
$self->encoder->add_hashref($hashref, $aref); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
else { |
165
|
2
|
|
|
|
|
194
|
return ($stream = undef); |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
else { |
169
|
|
|
|
|
|
|
# TODO: include namespace mappings if requested |
170
|
28
|
|
|
|
|
304
|
while (my $hashref = $stream->()) { |
171
|
103
|
|
|
|
|
4265
|
$self->encoder->add_hashref( |
172
|
|
|
|
|
|
|
$hashref, |
173
|
|
|
|
|
|
|
$aref |
174
|
|
|
|
|
|
|
); |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
28
|
100
|
|
|
|
574
|
if ($self->url) { |
178
|
6
|
|
|
|
|
150
|
$aref->{_url} = $self->url; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
28
|
|
|
|
|
2031
|
$stream = undef; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
36
|
100
|
|
|
|
19294
|
if ($self->url) { |
185
|
|
|
|
|
|
|
# RDF::Trine::Parser parses data from URL to UTF-8 |
186
|
|
|
|
|
|
|
# but we want internal character sequences |
187
|
6
|
|
|
|
|
61
|
_utf8_decode($aref); |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
36
|
|
|
|
|
1206
|
return $aref; |
191
|
41
|
|
|
|
|
491
|
}; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub _utf8_decode { |
195
|
18
|
50
|
|
18
|
|
108
|
if (ref $_[0] eq 'HASH') { |
196
|
|
|
|
|
|
|
# FIXME: UTF-8 in property values |
197
|
18
|
|
|
|
|
40
|
foreach (values %{$_[0]}) { |
|
18
|
|
|
|
|
140
|
|
198
|
42
|
100
|
|
|
|
237
|
ref($_) ? _utf8_decode($_) : utf8::decode($_); |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
} else { |
201
|
0
|
|
|
|
|
0
|
foreach (@{$_[0]}) { |
|
0
|
|
|
|
|
0
|
|
202
|
0
|
0
|
|
|
|
0
|
ref($_) ? _utf8_decode($_) : utf8::decode($_); |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub _sparql_stream { |
208
|
3
|
|
|
3
|
|
10
|
my ($self) = @_; |
209
|
|
|
|
|
|
|
|
210
|
3
|
50
|
|
|
|
15
|
die "need an url" unless $self->url; |
211
|
|
|
|
|
|
|
|
212
|
3
|
|
|
|
|
68
|
$self->log->info("parsing: " . $self->sparql); |
213
|
|
|
|
|
|
|
|
214
|
3
|
|
|
|
|
343
|
my $store; |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# Check if this server is an LDF server |
217
|
3
|
|
|
|
|
61
|
my $ldf_client = RDF::LDF->new(url => $self->url); |
218
|
|
|
|
|
|
|
|
219
|
3
|
100
|
|
|
|
1366
|
if ($ldf_client->is_fragment_server) { |
220
|
2
|
|
|
|
|
604658
|
$store = RDF::Trine::Store->new_with_config({ |
221
|
|
|
|
|
|
|
storetype => 'LDF', |
222
|
|
|
|
|
|
|
url => $self->url |
223
|
|
|
|
|
|
|
}); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
else { |
226
|
1
|
|
|
|
|
8942
|
$store = RDF::Trine::Store->new_with_config({ |
227
|
|
|
|
|
|
|
storetype => 'SPARQL', |
228
|
|
|
|
|
|
|
url => $self->url |
229
|
|
|
|
|
|
|
}); |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
3
|
50
|
|
|
|
598836
|
unless ($store) { |
233
|
0
|
|
|
|
|
0
|
$self->log->error("failed to connect to " . $self->url); |
234
|
0
|
|
|
|
|
0
|
return; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
3
|
|
|
|
|
38
|
my $model = RDF::Trine::Model->new($store); |
238
|
|
|
|
|
|
|
|
239
|
3
|
|
|
|
|
127
|
my $rdf_query = RDF::Query->new($self->sparql); |
240
|
|
|
|
|
|
|
|
241
|
3
|
50
|
|
|
|
16588
|
unless ($rdf_query) { |
242
|
0
|
|
|
|
|
0
|
$self->log->error("failed to parse " . $self->sparql); |
243
|
0
|
|
|
|
|
0
|
return; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
3
|
|
|
|
|
22
|
my $iterator = $rdf_query->execute($model); |
247
|
|
|
|
|
|
|
|
248
|
3
|
50
|
|
|
|
44406
|
unless ($iterator) { |
249
|
0
|
|
|
|
|
0
|
$self->log->error("failed to execute " . $self->sparql . " at " . $self->url); |
250
|
0
|
|
|
|
|
0
|
return; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub _hashref_stream { |
255
|
41
|
|
|
41
|
|
112
|
my ($self) = @_; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# Create a pipe stream to convert a callback handler into an iterator |
258
|
41
|
|
|
|
|
445
|
my $pipe = IO::Pipe->new(); |
259
|
|
|
|
|
|
|
|
260
|
41
|
100
|
|
|
|
143716
|
if (my $pid = fork()) { |
261
|
|
|
|
|
|
|
# parent |
262
|
30
|
|
|
|
|
1802
|
$pipe->reader(); |
263
|
|
|
|
|
|
|
|
264
|
30
|
|
|
|
|
7396
|
binmode($pipe,':encoding(UTF-8)'); |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
return sub { |
267
|
141
|
|
|
141
|
|
1577066
|
state $line = <$pipe>; |
268
|
|
|
|
|
|
|
|
269
|
141
|
100
|
|
|
|
4427
|
return decode_json($line) if defined($line); |
270
|
|
|
|
|
|
|
|
271
|
30
|
|
|
|
|
22072373
|
waitpid($pid,0); |
272
|
|
|
|
|
|
|
|
273
|
30
|
|
|
|
|
1045
|
return undef; |
274
|
30
|
|
|
|
|
11196
|
}; |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
else { |
277
|
|
|
|
|
|
|
# child |
278
|
11
|
|
|
|
|
1322
|
$pipe->writer(); |
279
|
|
|
|
|
|
|
|
280
|
11
|
|
|
|
|
2864
|
binmode($pipe,':encoding(UTF-8)'); |
281
|
|
|
|
|
|
|
|
282
|
11
|
100
|
|
|
|
4495
|
my $parser = $self->type |
283
|
|
|
|
|
|
|
? RDF::Trine::Parser->new( $self->type ) : 'RDF::Trine::Parser'; |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
my $handler = sub { |
286
|
41
|
|
|
41
|
|
366562
|
my $triple = shift; |
287
|
41
|
|
|
|
|
162
|
state $start = time; |
288
|
41
|
|
|
|
|
130
|
state $count = 0; |
289
|
|
|
|
|
|
|
|
290
|
41
|
100
|
|
|
|
361
|
my $subject = $triple->subject->is_blank ? |
291
|
|
|
|
|
|
|
'_:' . $triple->subject->blank_identifier : |
292
|
|
|
|
|
|
|
$triple->subject->uri_value; |
293
|
41
|
50
|
|
|
|
2455
|
my $predicate = $triple->predicate->is_blank ? |
294
|
|
|
|
|
|
|
'_:' . $triple->predicate->blank_identifier : |
295
|
|
|
|
|
|
|
$triple->predicate->value; |
296
|
41
|
100
|
|
|
|
1428
|
my $value = $triple->object->is_literal ? |
|
|
100
|
|
|
|
|
|
297
|
|
|
|
|
|
|
$triple->object->literal_value : |
298
|
|
|
|
|
|
|
$triple->object->is_blank ? |
299
|
|
|
|
|
|
|
'_:' . $triple->object->blank_identifier : |
300
|
|
|
|
|
|
|
$triple->object->uri_value; |
301
|
41
|
|
|
|
|
2506
|
my $type = lc $triple->object->type; |
302
|
41
|
100
|
|
|
|
824
|
$type = 'bnode' if $type eq 'blank'; |
303
|
41
|
100
|
|
|
|
200
|
my $lang = $triple->object->is_literal ? $triple->object->literal_value_language : undef; |
304
|
41
|
100
|
|
|
|
1055
|
my $datatype = $triple->object->is_literal ? $triple->object->literal_datatype : undef; |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# Create the RDF::Trine type RDF/JSON RDF::aREF can parse |
307
|
41
|
|
|
|
|
1033
|
my $hashref = {}; |
308
|
|
|
|
|
|
|
|
309
|
41
|
|
|
|
|
531
|
$hashref->{$subject}->{$predicate}->[0]->{type} = $type; |
310
|
41
|
100
|
|
|
|
328
|
$hashref->{$subject}->{$predicate}->[0]->{datatype} = $datatype if $datatype; |
311
|
41
|
100
|
|
|
|
254
|
$hashref->{$subject}->{$predicate}->[0]->{lang} = $lang if $lang; |
312
|
41
|
|
|
|
|
249
|
$hashref->{$subject}->{$predicate}->[0]->{value} = $value; |
313
|
|
|
|
|
|
|
|
314
|
41
|
|
|
|
|
1182
|
print $pipe encode_json($hashref) , "\n"; |
315
|
|
|
|
|
|
|
|
316
|
41
|
|
|
|
|
158
|
$count++; |
317
|
|
|
|
|
|
|
|
318
|
41
|
0
|
33
|
|
|
725
|
if ($self->speed && ($count % 100 == 0) && (my $elapsed = time - $start) ) { |
|
|
|
33
|
|
|
|
|
319
|
0
|
|
|
|
|
0
|
printf STDERR "triples %9d (%d/sec)\n" , $count , $count/$elapsed; |
320
|
|
|
|
|
|
|
} |
321
|
11
|
|
|
|
|
26719
|
}; |
322
|
|
|
|
|
|
|
|
323
|
11
|
100
|
|
|
|
321
|
if ($self->url) { |
324
|
3
|
|
|
|
|
170
|
$parser->parse_url( $self->url, $handler); |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
else { |
327
|
8
|
|
50
|
|
|
1683
|
my $from_scalar = (ref $self->file // '') eq 'SCALAR'; |
328
|
|
|
|
|
|
|
|
329
|
8
|
50
|
66
|
|
|
1883
|
if (!$self->type and $self->file and !$from_scalar) { |
|
|
|
66
|
|
|
|
|
330
|
6
|
|
|
|
|
448
|
$parser = $parser->guess_parser_by_filename($self->file)->new; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
8
|
100
|
|
|
|
27145
|
if ($from_scalar) { |
334
|
2
|
|
|
|
|
131
|
$parser->parse( $self->base, ${$self->file}, $handler ); |
|
2
|
|
|
|
|
278
|
|
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
else { |
337
|
6
|
|
33
|
|
|
323
|
$parser->parse_file( $self->base, $self->file // $self->fh, $handler ); |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
11
|
|
|
|
|
15554
|
exit(0); |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
1; |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
__END__ |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=head1 NAME |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Catmandu::Importer::RDF - parse RDF data |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head1 SYNOPSIS |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
Command line client C<catmandu>: |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
catmandu convert RDF --url http://d-nb.info/gnd/4151473-7 to YAML |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
catmandu convert RDF --file rdfdump.ttl to JSON |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# Parse the input into on JSON document per triplet. This is the |
363
|
|
|
|
|
|
|
# most memory efficient (and fastest) way to parse RDF input. |
364
|
|
|
|
|
|
|
catmandu convert RDF --triples 1 --file rdfdump.ttl to JSON |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# Transform back into NTriples (conversions to and from triples is the |
367
|
|
|
|
|
|
|
# most efficient way to process RDF) |
368
|
|
|
|
|
|
|
catmandu convert RDF --triples 1 --file rdfdump.ttl to RDF --type NTriples |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
# Query a SPARQL endpoint |
371
|
|
|
|
|
|
|
catmandu convert RDF --url http://dbpedia.org/sparql |
372
|
|
|
|
|
|
|
--sparql "SELECT ?film WHERE { ?film dct:subject <http://dbpedia.org/resource/Category:French_films> }" |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
catmandu convert RDF --url http://example.org/sparql --sparql query.rq |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
# Query a Linked Data Fragment endpoint |
377
|
|
|
|
|
|
|
catmandu convert RDF --url http://fragments.dbpedia.org/2014/en |
378
|
|
|
|
|
|
|
--sparql "SELECT ?film WHERE { ?film dct:subject <http://dbpedia.org/resource/Category:French_films> }" |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
In Perl code: |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
use Catmandu::Importer::RDF; |
383
|
|
|
|
|
|
|
my $url = "http://dx.doi.org/10.2474/trol.7.147"; |
384
|
|
|
|
|
|
|
my $rdf = Catmandu::Importer::RDF->new( url => $url )->first; |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=head1 DESCRIPTION |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
This L<Catmandu::Importer> can be use to import RDF data from URLs, files or |
389
|
|
|
|
|
|
|
input streams, SPARQL endpoints, and Linked Data Fragment endpoints. |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
By default an RDF graph is imported as single item in aREF format (see |
392
|
|
|
|
|
|
|
L<RDF::aREF>). |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=head1 CONFIGURATION |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=over |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=item url |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
URL to retrieve RDF from. |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=item type |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
RDF serialization type (e.g. C<ttl> for RDF/Turtle). |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=item base |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
Base URL. By default derived from the URL or file name. |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=item ns |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
Use default namespace prefixes as provided by L<RDF::NS> to abbreviate |
413
|
|
|
|
|
|
|
predicate and datatype URIs. Set to C<0> to disable abbreviating URIs. |
414
|
|
|
|
|
|
|
Set to a specific date to get stable namespace prefix mappings. |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=item triples |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
Import each RDF triple as one aREF subject map (default) or predicate map |
419
|
|
|
|
|
|
|
(option C<predicate_map>), if enabled. This is the most efficient way to |
420
|
|
|
|
|
|
|
process large input files. All the processing can be streamed. |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=item predicate_map |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
Import RDF as aREF predicate map, if possible. |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=item file |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=item fh |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=item encoding |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=item fix |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
Default configuration options of L<Catmandu::Importer>. |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=item sparql |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
The SPARQL query to be executed on the URL endpoint (currectly only SELECT is |
439
|
|
|
|
|
|
|
supported). The query can be supplied as string or as filename. The importer |
440
|
|
|
|
|
|
|
tries to automatically add missing PREFIX statements from the default namespace |
441
|
|
|
|
|
|
|
prefixes. |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=item sparql_result |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
Encoding of SPARQL result values. With C<aref>, query results are encoded in |
446
|
|
|
|
|
|
|
aREF format, with URIs in C<E<lt>> and C<E<gt>> (no qNames) and literal nodes |
447
|
|
|
|
|
|
|
appended by C<@> and optional language code. By default (value C<simple>), all |
448
|
|
|
|
|
|
|
RDF nodes are simplfied to their literal form. |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=item cache |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
Set to a true value to cache repeated URL responses in a L<CHI> based backend. |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=item cache_options |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
Provide the L<CHI> based options for caching result sets. By default a memory store of |
457
|
|
|
|
|
|
|
1MB size is used. This is equal to: |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Catamandu::Importer::RDF->new( ..., |
460
|
|
|
|
|
|
|
cache => 1, |
461
|
|
|
|
|
|
|
cache_options => { |
462
|
|
|
|
|
|
|
driver => 'Memory', |
463
|
|
|
|
|
|
|
global => 1, |
464
|
|
|
|
|
|
|
max_size => 1024*1024 |
465
|
|
|
|
|
|
|
}); |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=item speed |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
If set to a true value, then write RDF file processing speed on the STDERR as |
470
|
|
|
|
|
|
|
number of triples parsed per second. |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=back |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=head1 METHODS |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
See L<Catmandu::Importer>. |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=head1 SEE ALSO |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
L<RDF::Trine::Store>, L<RDF::Trine::Parser> |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=encoding utf8 |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=cut |