line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# RDF::Trine::Parser |
2
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
=head1 NAME |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
RDF::Trine::Parser - RDF Parser class |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 VERSION |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
This document describes RDF::Trine::Parser version 1.018 |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 SYNOPSIS |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use RDF::Trine::Parser; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
RDF::Trine::Parser->parse_url_into_model( $url, $model ); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
my $parser = RDF::Trine::Parser->new( 'turtle' ); |
19
|
|
|
|
|
|
|
$parser->parse_into_model( $base_uri, $rdf, $model ); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
$parser->parse_file_into_model( $base_uri, 'data.ttl', $model ); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 DESCRIPTION |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
RDF::Trine::Parser is a base class for RDF parsers. It may be used as a factory |
26
|
|
|
|
|
|
|
class for constructing parser objects by name or media type with the C<< new >> |
27
|
|
|
|
|
|
|
method, or used to abstract away the logic of choosing a parser based on the |
28
|
|
|
|
|
|
|
media type of RDF content retrieved over the network with the |
29
|
|
|
|
|
|
|
C<< parse_url_into_model >> method. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 METHODS |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=over 4 |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=cut |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
package RDF::Trine::Parser; |
38
|
|
|
|
|
|
|
|
39
|
68
|
|
|
68
|
|
488
|
use strict; |
|
68
|
|
|
|
|
168
|
|
|
68
|
|
|
|
|
1936
|
|
40
|
68
|
|
|
68
|
|
351
|
use warnings; |
|
68
|
|
|
|
|
151
|
|
|
68
|
|
|
|
|
1927
|
|
41
|
68
|
|
|
68
|
|
368
|
no warnings 'redefine'; |
|
68
|
|
|
|
|
146
|
|
|
68
|
|
|
|
|
1881
|
|
42
|
68
|
|
|
68
|
|
361
|
use Data::Dumper; |
|
68
|
|
|
|
|
140
|
|
|
68
|
|
|
|
|
3381
|
|
43
|
68
|
|
|
68
|
|
403
|
use Encode qw(decode); |
|
68
|
|
|
|
|
176
|
|
|
68
|
|
|
|
|
5269
|
|
44
|
68
|
|
|
68
|
|
29213
|
use LWP::MediaTypes; |
|
68
|
|
|
|
|
1003176
|
|
|
68
|
|
|
|
|
6536
|
|
45
|
68
|
|
|
68
|
|
695
|
use Module::Load::Conditional qw[can_load]; |
|
68
|
|
|
|
|
165
|
|
|
68
|
|
|
|
|
7327
|
|
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
our ($VERSION); |
48
|
|
|
|
|
|
|
our %file_extensions; |
49
|
|
|
|
|
|
|
our %parser_names; |
50
|
|
|
|
|
|
|
our %canonical_media_types; |
51
|
|
|
|
|
|
|
our %media_types; |
52
|
|
|
|
|
|
|
our %format_uris; |
53
|
|
|
|
|
|
|
our %encodings; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
BEGIN { |
56
|
68
|
|
|
68
|
|
241
|
$VERSION = '1.018'; |
57
|
68
|
|
|
|
|
478
|
can_load( modules => { |
58
|
|
|
|
|
|
|
'Data::UUID' => undef, |
59
|
|
|
|
|
|
|
'UUID::Tiny' => undef, |
60
|
|
|
|
|
|
|
} ); |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
68
|
|
|
68
|
|
116608
|
use Scalar::Util qw(blessed); |
|
68
|
|
|
|
|
160
|
|
|
68
|
|
|
|
|
3316
|
|
64
|
|
|
|
|
|
|
|
65
|
68
|
|
|
68
|
|
394
|
use RDF::Trine::Error qw(:try); |
|
68
|
|
|
|
|
162
|
|
|
68
|
|
|
|
|
512
|
|
66
|
68
|
|
|
68
|
|
36609
|
use RDF::Trine::Parser::NTriples; |
|
68
|
|
|
|
|
203
|
|
|
68
|
|
|
|
|
1858
|
|
67
|
68
|
|
|
68
|
|
24624
|
use RDF::Trine::Parser::NQuads; |
|
68
|
|
|
|
|
190
|
|
|
68
|
|
|
|
|
1799
|
|
68
|
68
|
|
|
68
|
|
26759
|
use RDF::Trine::Parser::RDFXML; |
|
68
|
|
|
|
|
235
|
|
|
68
|
|
|
|
|
2398
|
|
69
|
68
|
|
|
68
|
|
26883
|
use RDF::Trine::Parser::RDFJSON; |
|
68
|
|
|
|
|
189
|
|
|
68
|
|
|
|
|
2317
|
|
70
|
68
|
|
|
68
|
|
24300
|
use RDF::Trine::Parser::RDFa; |
|
68
|
|
|
|
|
186
|
|
|
68
|
|
|
|
|
182590
|
|
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=item C<< media_type >> |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Returns the canonical media type associated with this parser. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=cut |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub media_type { |
79
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
80
|
0
|
|
0
|
|
|
0
|
my $class = ref($self) || $self; |
81
|
0
|
|
|
|
|
0
|
return $canonical_media_types{ $class }; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=item C<< media_types >> |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Returns the media types associated with this parser. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=cut |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub media_types { |
91
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
92
|
0
|
|
|
|
|
0
|
my @types; |
93
|
0
|
|
|
|
|
0
|
foreach my $type (keys %media_types) { |
94
|
0
|
|
|
|
|
0
|
my $class = $media_types{ $type }; |
95
|
0
|
0
|
|
|
|
0
|
push(@types, $type) if ($self->isa($class)); |
96
|
|
|
|
|
|
|
} |
97
|
0
|
|
|
|
|
0
|
return @types; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item C<< parser_by_media_type ( $media_type ) >> |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Returns the parser class appropriate for parsing content of the specified media type. |
103
|
|
|
|
|
|
|
Returns undef if not appropriate parser is found. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=cut |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub parser_by_media_type { |
108
|
0
|
|
|
0
|
1
|
0
|
my $proto = shift; |
109
|
0
|
|
|
|
|
0
|
my $type = shift; |
110
|
0
|
|
|
|
|
0
|
my $class = $media_types{ $type }; |
111
|
0
|
|
|
|
|
0
|
return $class; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=item C<< guess_parser_by_filename ( $filename ) >> |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Returns the best-guess parser class to parse a file with the given filename. |
117
|
|
|
|
|
|
|
Defaults to L<RDF::Trine::Parser::RDFXML> if not appropriate parser is found. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=cut |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub guess_parser_by_filename { |
122
|
2
|
|
|
2
|
1
|
5
|
my $class = shift; |
123
|
2
|
|
|
|
|
4
|
my $file = shift; |
124
|
2
|
50
|
|
|
|
13
|
if ($file =~ m/[.](\w+)$/) { |
125
|
2
|
|
|
|
|
6
|
my $ext = $1; |
126
|
2
|
50
|
|
|
|
11
|
return $file_extensions{ $ext } if exists $file_extensions{ $ext }; |
127
|
|
|
|
|
|
|
} |
128
|
0
|
|
0
|
|
|
0
|
return $class->parser_by_media_type( 'application/rdf+xml' ) || 'RDF::Trine::Parser::RDFXML'; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=item C<< new ( $parser_name, @args ) >> |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Returns a new RDF::Trine::Parser object for the parser with the specified name |
134
|
|
|
|
|
|
|
(e.g. "rdfxml" or "turtle"). If no parser with the specified name is found, |
135
|
|
|
|
|
|
|
throws a RDF::Trine::Error::ParserError exception. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Any C<< @args >> will be passed through to the format-specific parser |
138
|
|
|
|
|
|
|
constructor. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
If C<< @args >> contains the key-value pair C<< (canonicalize => 1) >>, literal |
141
|
|
|
|
|
|
|
value canonicalization will be attempted during parsing with warnings being |
142
|
|
|
|
|
|
|
emitted for invalid lexical forms for recognized datatypes. |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=cut |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub new { |
147
|
50
|
|
|
50
|
1
|
2354
|
my $class = shift; |
148
|
50
|
|
|
|
|
105
|
my $name = shift; |
149
|
50
|
|
|
|
|
131
|
my $key = lc($name); |
150
|
50
|
|
|
|
|
176
|
$key =~ s/[^a-z]//g; |
151
|
|
|
|
|
|
|
|
152
|
50
|
100
|
|
|
|
299
|
if ($name eq 'guess') { |
|
|
100
|
|
|
|
|
|
153
|
1
|
|
|
|
|
13
|
throw RDF::Trine::Error::UnimplementedError -text => "guess parser heuristics are not implemented yet"; |
154
|
|
|
|
|
|
|
} elsif (my $class = $parser_names{ $key }) { |
155
|
|
|
|
|
|
|
# re-add name for multiformat (e.g. Redland) parsers |
156
|
48
|
|
|
|
|
362
|
return $class->new( name => $key, @_ ); |
157
|
|
|
|
|
|
|
} else { |
158
|
1
|
|
|
|
|
12
|
throw RDF::Trine::Error::ParserError -text => "No parser known named $name"; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=item C<< parse_url_into_model ( $url, $model [, %args] ) >> |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Retrieves the content from C<< $url >> and attempts to parse the resulting RDF |
165
|
|
|
|
|
|
|
into C<< $model >> using a parser chosen by the associated content media type. |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
If C<< %args >> contains a C<< 'content_cb' >> key with a CODE reference value, |
168
|
|
|
|
|
|
|
that callback function will be called after a successful response as: |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
$content_cb->( $url, $content, $http_response_object ) |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
If C<< %args >> contains a C<< 'useragent' >> key with a LWP::UserAgent object value, |
173
|
|
|
|
|
|
|
that object is used to retrieve the requested URL without any configuration (such as |
174
|
|
|
|
|
|
|
setting the Accept: header) which would ordinarily take place. Otherwise, the default |
175
|
|
|
|
|
|
|
user agent (L<RDF::Trine/default_useragent>) is cloned and configured to retrieve |
176
|
|
|
|
|
|
|
content that will be acceptable to any available parser. |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub parse_url_into_model { |
181
|
1
|
|
|
1
|
1
|
2138
|
my $class = shift; |
182
|
1
|
|
|
|
|
3
|
my $url = shift; |
183
|
1
|
|
|
|
|
2
|
my $model = shift; |
184
|
1
|
|
|
|
|
5
|
my %args = @_; |
185
|
|
|
|
|
|
|
|
186
|
1
|
|
|
|
|
3
|
my $base = $url; |
187
|
1
|
50
|
|
|
|
4
|
if (defined($args{base})) { |
188
|
0
|
|
|
|
|
0
|
$base = $args{base}; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
1
|
|
|
|
|
3
|
my $ua; |
192
|
1
|
50
|
|
|
|
3
|
if (defined($args{useragent})) { |
193
|
0
|
|
|
|
|
0
|
$ua = $args{useragent}; |
194
|
|
|
|
|
|
|
} else { |
195
|
1
|
|
|
|
|
7
|
$ua = RDF::Trine->default_useragent->clone; |
196
|
1
|
|
|
|
|
401
|
my $accept = $class->default_accept_header; |
197
|
1
|
|
|
|
|
5
|
$ua->default_headers->push_header( 'Accept' => $accept ); |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
1
|
|
|
|
|
39
|
my $resp = $ua->get( $url ); |
201
|
1
|
50
|
|
|
|
4671
|
if ($url =~ /^file:/) { |
202
|
1
|
|
|
|
|
5
|
my $type = guess_media_type($url); |
203
|
1
|
|
|
|
|
60
|
$resp->header('Content-Type', $type); |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
1
|
50
|
|
|
|
55
|
unless ($resp->is_success) { |
207
|
0
|
|
|
|
|
0
|
throw RDF::Trine::Error::ParserError -text => $resp->status_line; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
1
|
|
|
|
|
17
|
my $content = $resp->content; |
211
|
1
|
50
|
|
|
|
88
|
if (my $cb = $args{content_cb}) { |
212
|
0
|
|
|
|
|
0
|
$cb->( $url, $content, $resp ); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
1
|
|
|
|
|
4
|
my $type = $resp->header('content-type'); |
216
|
1
|
|
|
|
|
48
|
$type =~ s/^([^\s;]+).*/$1/; |
217
|
1
|
|
|
|
|
4
|
my $pclass = $media_types{ $type }; |
218
|
1
|
50
|
33
|
|
|
18
|
if ($pclass and $pclass->can('new')) { |
219
|
1
|
|
|
|
|
2
|
my $data = $content; |
220
|
1
|
50
|
|
|
|
5
|
if (my $e = $encodings{ $pclass }) { |
221
|
1
|
|
|
|
|
8
|
$data = decode( $e, $content ); |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# pass %args in here too so the constructor can take its pick |
225
|
1
|
|
|
|
|
260
|
my $parser = $pclass->new(%args); |
226
|
1
|
|
|
|
|
3
|
my $ok = 0; |
227
|
|
|
|
|
|
|
try { |
228
|
1
|
|
|
1
|
|
55
|
$parser->parse_into_model( $base, $data, $model, %args ); |
229
|
1
|
|
|
|
|
5
|
$ok = 1; |
230
|
1
|
|
|
0
|
|
10
|
} catch RDF::Trine::Error with {}; |
231
|
1
|
50
|
|
|
|
46
|
return 1 if ($ok); |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
### FALLBACK |
235
|
0
|
|
|
|
|
0
|
my %options; |
236
|
0
|
0
|
|
|
|
0
|
if (defined $args{canonicalize}) { |
237
|
0
|
|
|
|
|
0
|
$options{ canonicalize } = $args{canonicalize}; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
0
|
|
|
|
|
0
|
my $ok = 0; |
241
|
|
|
|
|
|
|
try { |
242
|
0
|
0
|
0
|
0
|
|
0
|
if ($url =~ /[.](x?rdf|owl)$/ or $content =~ m/\x{FEFF}?<[?]xml /smo) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
243
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::RDFXML->new(%options); |
244
|
0
|
|
|
|
|
0
|
$parser->parse_into_model( $base, $content, $model, %args ); |
245
|
0
|
|
|
|
|
0
|
$ok = 1;; |
246
|
|
|
|
|
|
|
} elsif ($url =~ /[.]ttl$/ or $content =~ m/@(prefix|base)/smo) { |
247
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::Turtle->new(%options); |
248
|
0
|
|
|
|
|
0
|
my $data = decode('utf8', $content); |
249
|
0
|
|
|
|
|
0
|
$parser->parse_into_model( $base, $data, $model, %args ); |
250
|
0
|
|
|
|
|
0
|
$ok = 1;; |
251
|
|
|
|
|
|
|
} elsif ($url =~ /[.]trig$/) { |
252
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::Trig->new(%options); |
253
|
0
|
|
|
|
|
0
|
my $data = decode('utf8', $content); |
254
|
0
|
|
|
|
|
0
|
$parser->parse_into_model( $base, $data, $model, %args ); |
255
|
0
|
|
|
|
|
0
|
$ok = 1;; |
256
|
|
|
|
|
|
|
} elsif ($url =~ /[.]nt$/) { |
257
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::NTriples->new(%options); |
258
|
0
|
|
|
|
|
0
|
$parser->parse_into_model( $base, $content, $model, %args ); |
259
|
0
|
|
|
|
|
0
|
$ok = 1;; |
260
|
|
|
|
|
|
|
} elsif ($url =~ /[.]nq$/) { |
261
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::NQuads->new(%options); |
262
|
0
|
|
|
|
|
0
|
$parser->parse_into_model( $base, $content, $model, %args ); |
263
|
0
|
|
|
|
|
0
|
$ok = 1;; |
264
|
|
|
|
|
|
|
} elsif ($url =~ /[.]js(?:on)?$/) { |
265
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::RDFJSON->new(%options); |
266
|
0
|
|
|
|
|
0
|
$parser->parse_into_model( $base, $content, $model, %args ); |
267
|
0
|
|
|
|
|
0
|
$ok = 1;; |
268
|
|
|
|
|
|
|
} elsif ($url =~ /[.]x?html?$/) { |
269
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::RDFa->new(%options); |
270
|
0
|
|
|
|
|
0
|
$parser->parse_into_model( $base, $content, $model, %args ); |
271
|
0
|
|
|
|
|
0
|
$ok = 1;; |
272
|
|
|
|
|
|
|
} else { |
273
|
0
|
|
|
|
|
0
|
my @types = keys %{ { map { $_ => 1 } values %media_types } }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
274
|
0
|
|
|
|
|
0
|
foreach my $pclass (@types) { |
275
|
0
|
|
|
|
|
0
|
my $data = $content; |
276
|
0
|
0
|
|
|
|
0
|
if (my $e = $encodings{ $pclass }) { |
277
|
0
|
|
|
|
|
0
|
$data = decode( $e, $content ); |
278
|
|
|
|
|
|
|
} |
279
|
0
|
|
|
|
|
0
|
my $parser = $pclass->new(%options); |
280
|
0
|
|
|
|
|
0
|
my $ok = 0; |
281
|
|
|
|
|
|
|
try { |
282
|
0
|
|
|
|
|
0
|
$parser->parse_into_model( $base, $data, $model, %args ); |
283
|
0
|
|
|
|
|
0
|
$ok = 1; |
284
|
0
|
|
|
|
|
0
|
} catch RDF::Trine::Error::ParserError with {}; |
285
|
0
|
0
|
|
|
|
0
|
last if ($ok); |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
} catch RDF::Trine::Error with { |
289
|
0
|
|
|
0
|
|
0
|
my $e = shift; |
290
|
0
|
|
|
|
|
0
|
}; |
291
|
0
|
0
|
|
|
|
0
|
return 1 if ($ok); |
292
|
|
|
|
|
|
|
|
293
|
0
|
0
|
|
|
|
0
|
if ($pclass) { |
294
|
0
|
|
|
|
|
0
|
throw RDF::Trine::Error::ParserError -text => "Failed to parse data of type $type from $url"; |
295
|
|
|
|
|
|
|
} else { |
296
|
0
|
|
|
|
|
0
|
throw RDF::Trine::Error::ParserError -text => "Failed to parse data from $url"; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=item C<< parse_url ( $url, \&handler [, %args] ) >> |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
Retrieves the content from C<< $url >> and attempts to parse the resulting RDF. |
303
|
|
|
|
|
|
|
For each parsed RDF triple that is parsed, C<&handler> will be called with the |
304
|
|
|
|
|
|
|
triple as an argument. Otherwise, this method acts just like |
305
|
|
|
|
|
|
|
C<parse_url_into_model>. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=cut |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
sub parse_url { |
310
|
0
|
|
|
0
|
1
|
0
|
my $class = shift; |
311
|
0
|
|
|
|
|
0
|
my $url = shift; |
312
|
0
|
|
|
|
|
0
|
my $handler = shift; |
313
|
0
|
|
|
|
|
0
|
my %args = @_; |
314
|
|
|
|
|
|
|
|
315
|
0
|
|
|
|
|
0
|
my $base = $url; |
316
|
0
|
0
|
|
|
|
0
|
if (defined($args{base})) { |
317
|
0
|
|
|
|
|
0
|
$base = $args{base}; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
0
|
|
|
|
|
0
|
my $ua; |
321
|
0
|
0
|
|
|
|
0
|
if (defined($args{useragent})) { |
322
|
0
|
|
|
|
|
0
|
$ua = $args{useragent}; |
323
|
|
|
|
|
|
|
} else { |
324
|
0
|
|
|
|
|
0
|
$ua = RDF::Trine->default_useragent->clone; |
325
|
0
|
|
|
|
|
0
|
my $accept = $class->default_accept_header; |
326
|
0
|
|
|
|
|
0
|
$ua->default_headers->push_header( 'Accept' => $accept ); |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
0
|
|
|
|
|
0
|
my $resp = $ua->get( $url ); |
330
|
0
|
0
|
|
|
|
0
|
if ($url =~ /^file:/) { |
331
|
0
|
|
|
|
|
0
|
my $type = guess_media_type($url); |
332
|
0
|
|
|
|
|
0
|
$resp->header('Content-Type', $type); |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
0
|
0
|
|
|
|
0
|
unless ($resp->is_success) { |
336
|
0
|
|
|
|
|
0
|
throw RDF::Trine::Error::ParserError -text => $resp->status_line; |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
0
|
|
|
|
|
0
|
my $content = $resp->content; |
340
|
0
|
0
|
|
|
|
0
|
if (my $cb = $args{content_cb}) { |
341
|
0
|
|
|
|
|
0
|
$cb->( $url, $content, $resp ); |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
0
|
|
|
|
|
0
|
my $type = $resp->header('content-type'); |
345
|
0
|
|
|
|
|
0
|
$type =~ s/^([^\s;]+).*/$1/; |
346
|
0
|
|
|
|
|
0
|
my $pclass = $media_types{ $type }; |
347
|
0
|
0
|
0
|
|
|
0
|
if ($pclass and $pclass->can('new')) { |
348
|
0
|
|
|
|
|
0
|
my $data = $content; |
349
|
0
|
0
|
|
|
|
0
|
if (my $e = $encodings{ $pclass }) { |
350
|
0
|
|
|
|
|
0
|
$data = decode( $e, $content ); |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# pass %args in here too so the constructor can take its pick |
354
|
0
|
|
|
|
|
0
|
my $parser = $pclass->new(%args); |
355
|
0
|
|
|
|
|
0
|
my $ok = 0; |
356
|
|
|
|
|
|
|
try { |
357
|
0
|
|
|
0
|
|
0
|
$parser->parse( $base, $data, $handler ); |
358
|
0
|
|
|
|
|
0
|
$ok = 1; |
359
|
0
|
|
|
0
|
|
0
|
} catch RDF::Trine::Error with {}; |
360
|
0
|
0
|
|
|
|
0
|
return 1 if ($ok); |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
### FALLBACK |
364
|
0
|
|
|
|
|
0
|
my %options; |
365
|
0
|
0
|
|
|
|
0
|
if (defined $args{canonicalize}) { |
366
|
0
|
|
|
|
|
0
|
$options{ canonicalize } = $args{canonicalize}; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
0
|
|
|
|
|
0
|
my $ok = 0; |
370
|
|
|
|
|
|
|
try { |
371
|
0
|
0
|
0
|
0
|
|
0
|
if ($url =~ /[.](x?rdf|owl)$/ or $content =~ m/\x{FEFF}?<[?]xml /smo) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
372
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::RDFXML->new(%options); |
373
|
0
|
|
|
|
|
0
|
$parser->parse( $base, $content, $handler, %args ); |
374
|
0
|
|
|
|
|
0
|
$ok = 1;; |
375
|
|
|
|
|
|
|
} elsif ($url =~ /[.]ttl$/ or $content =~ m/@(prefix|base)/smo) { |
376
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::Turtle->new(%options); |
377
|
0
|
|
|
|
|
0
|
my $data = decode('utf8', $content); |
378
|
0
|
|
|
|
|
0
|
$parser->parse( $base, $data, $handler, %args ); |
379
|
0
|
|
|
|
|
0
|
$ok = 1;; |
380
|
|
|
|
|
|
|
} elsif ($url =~ /[.]trig$/) { |
381
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::Trig->new(%options); |
382
|
0
|
|
|
|
|
0
|
my $data = decode('utf8', $content); |
383
|
0
|
|
|
|
|
0
|
$parser->parse( $base, $data, $handler, %args ); |
384
|
0
|
|
|
|
|
0
|
$ok = 1;; |
385
|
|
|
|
|
|
|
} elsif ($url =~ /[.]nt$/) { |
386
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::NTriples->new(%options); |
387
|
0
|
|
|
|
|
0
|
$parser->parse( $base, $content, $handler, %args ); |
388
|
0
|
|
|
|
|
0
|
$ok = 1;; |
389
|
|
|
|
|
|
|
} elsif ($url =~ /[.]nq$/) { |
390
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::NQuads->new(%options); |
391
|
0
|
|
|
|
|
0
|
$parser->parse( $base, $content, $handler, %args ); |
392
|
0
|
|
|
|
|
0
|
$ok = 1;; |
393
|
|
|
|
|
|
|
} elsif ($url =~ /[.]js(?:on)?$/) { |
394
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::RDFJSON->new(%options); |
395
|
0
|
|
|
|
|
0
|
$parser->parse( $base, $content, $handler, %args ); |
396
|
0
|
|
|
|
|
0
|
$ok = 1;; |
397
|
|
|
|
|
|
|
} elsif ($url =~ /[.]x?html?$/) { |
398
|
0
|
|
|
|
|
0
|
my $parser = RDF::Trine::Parser::RDFa->new(%options); |
399
|
0
|
|
|
|
|
0
|
$parser->parse( $base, $content, $handler, %args ); |
400
|
0
|
|
|
|
|
0
|
$ok = 1;; |
401
|
|
|
|
|
|
|
} else { |
402
|
0
|
|
|
|
|
0
|
my @types = keys %{ { map { $_ => 1 } values %media_types } }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
403
|
0
|
|
|
|
|
0
|
foreach my $pclass (@types) { |
404
|
0
|
|
|
|
|
0
|
my $data = $content; |
405
|
0
|
0
|
|
|
|
0
|
if (my $e = $encodings{ $pclass }) { |
406
|
0
|
|
|
|
|
0
|
$data = decode( $e, $content ); |
407
|
|
|
|
|
|
|
} |
408
|
0
|
|
|
|
|
0
|
my $parser = $pclass->new(%options); |
409
|
0
|
|
|
|
|
0
|
my $ok = 0; |
410
|
|
|
|
|
|
|
try { |
411
|
0
|
|
|
|
|
0
|
$parser->parse( $base, $data, $handler, %args ); |
412
|
0
|
|
|
|
|
0
|
$ok = 1; |
413
|
0
|
|
|
|
|
0
|
} catch RDF::Trine::Error::ParserError with {}; |
414
|
0
|
0
|
|
|
|
0
|
last if ($ok); |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
} catch RDF::Trine::Error with { |
418
|
0
|
|
|
0
|
|
0
|
my $e = shift; |
419
|
0
|
|
|
|
|
0
|
}; |
420
|
0
|
0
|
|
|
|
0
|
return 1 if ($ok); |
421
|
|
|
|
|
|
|
|
422
|
0
|
0
|
|
|
|
0
|
if ($pclass) { |
423
|
0
|
|
|
|
|
0
|
throw RDF::Trine::Error::ParserError -text => "Failed to parse data of type $type from $url"; |
424
|
|
|
|
|
|
|
} else { |
425
|
0
|
|
|
|
|
0
|
throw RDF::Trine::Error::ParserError -text => "Failed to parse data from $url"; |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=item C<< parse_into_model ( $base_uri, $data, $model [, context => $context] ) >> |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Parses the bytes in C<< $data >>, using the given C<< $base_uri >>. For each RDF |
432
|
|
|
|
|
|
|
statement parsed, will call C<< $model->add_statement( $statement ) >>. |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=cut |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
sub parse_into_model { |
437
|
68
|
|
|
68
|
1
|
452
|
my $proto = shift; |
438
|
68
|
50
|
|
|
|
286
|
my $self = blessed($proto) ? $proto : $proto->new(); |
439
|
68
|
|
|
|
|
163
|
my $uri = shift; |
440
|
68
|
50
|
33
|
|
|
292
|
if (blessed($uri) and $uri->isa('RDF::Trine::Node::Resource')) { |
441
|
0
|
|
|
|
|
0
|
$uri = $uri->uri_value; |
442
|
|
|
|
|
|
|
} |
443
|
68
|
|
|
|
|
137
|
my $input = shift; |
444
|
68
|
|
|
|
|
123
|
my $model = shift; |
445
|
68
|
|
|
|
|
170
|
my %args = @_; |
446
|
68
|
|
|
|
|
156
|
my $context = $args{'context'}; |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
my $handler = sub { |
449
|
2447
|
|
|
2447
|
|
4244
|
my $st = shift; |
450
|
2447
|
50
|
|
|
|
5491
|
if ($context) { |
451
|
0
|
|
|
|
|
0
|
my $quad = RDF::Trine::Statement::Quad->new( $st->nodes, $context ); |
452
|
0
|
|
|
|
|
0
|
$model->add_statement( $quad ); |
453
|
|
|
|
|
|
|
} else { |
454
|
2447
|
|
|
|
|
9130
|
$model->add_statement( $st ); |
455
|
|
|
|
|
|
|
} |
456
|
68
|
|
|
|
|
333
|
}; |
457
|
|
|
|
|
|
|
|
458
|
68
|
|
|
|
|
288
|
$model->begin_bulk_ops(); |
459
|
68
|
|
|
|
|
325
|
my $s = $self->parse( $uri, $input, $handler ); |
460
|
68
|
|
|
|
|
395
|
$model->end_bulk_ops(); |
461
|
68
|
|
|
|
|
533
|
return $s; |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=item C<< parse_file_into_model ( $base_uri, $fh, $model [, context => $context] ) >> |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Parses all data read from the filehandle or file C<< $fh >>, using the |
467
|
|
|
|
|
|
|
given C<< $base_uri >>. For each RDF statement parsed, will call |
468
|
|
|
|
|
|
|
C<< $model->add_statement( $statement ) >>. |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub parse_file_into_model { |
473
|
656
|
|
|
656
|
1
|
151360
|
my $proto = shift; |
474
|
656
|
50
|
66
|
|
|
3802
|
my $self = (blessed($proto) or $proto eq __PACKAGE__) |
475
|
|
|
|
|
|
|
? $proto : $proto->new(); |
476
|
656
|
|
|
|
|
1549
|
my $uri = shift; |
477
|
656
|
50
|
33
|
|
|
2507
|
if (blessed($uri) and $uri->isa('RDF::Trine::Node::Resource')) { |
478
|
0
|
|
|
|
|
0
|
$uri = $uri->uri_value; |
479
|
|
|
|
|
|
|
} |
480
|
656
|
|
|
|
|
1252
|
my $fh = shift; |
481
|
656
|
|
|
|
|
1138
|
my $model = shift; |
482
|
656
|
|
|
|
|
1530
|
my %args = @_; |
483
|
656
|
|
|
|
|
1296
|
my $context = $args{'context'}; |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
my $handler = sub { |
486
|
1168
|
|
|
1168
|
|
2150
|
my $st = shift; |
487
|
1168
|
100
|
|
|
|
2619
|
if ($context) { |
488
|
1
|
|
|
|
|
4
|
my $quad = RDF::Trine::Statement::Quad->new( $st->nodes, $context ); |
489
|
1
|
|
|
|
|
4
|
$model->add_statement( $quad ); |
490
|
|
|
|
|
|
|
} else { |
491
|
1167
|
|
|
|
|
4460
|
$model->add_statement( $st ); |
492
|
|
|
|
|
|
|
} |
493
|
656
|
|
|
|
|
3016
|
}; |
494
|
|
|
|
|
|
|
|
495
|
656
|
|
|
|
|
2790
|
$model->begin_bulk_ops(); |
496
|
656
|
|
|
|
|
2711
|
my $s = $self->parse_file( $uri, $fh, $handler ); |
497
|
655
|
|
|
|
|
3123
|
$model->end_bulk_ops(); |
498
|
655
|
|
|
|
|
7063
|
return $s; |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=item C<< parse_file ( $base_uri, $fh, $handler ) >> |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
Parses all data read from the filehandle or file C<< $fh >>, using the given |
504
|
|
|
|
|
|
|
C<< $base_uri >>. If C<< $fh >> is a filename, this method can guess the |
505
|
|
|
|
|
|
|
associated parse. For each RDF statement parsed, C<< $handler->( $st ) >> is called. |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
=cut |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
sub parse_file { |
510
|
2
|
|
|
2
|
1
|
6
|
my $self = shift; |
511
|
2
|
|
|
|
|
5
|
my $base = shift; |
512
|
2
|
|
|
|
|
3
|
my $fh = shift; |
513
|
2
|
|
|
|
|
5
|
my $handler = shift; |
514
|
|
|
|
|
|
|
|
515
|
2
|
50
|
|
|
|
6
|
unless (ref($fh)) { |
516
|
2
|
|
|
|
|
4
|
my $filename = $fh; |
517
|
2
|
|
|
|
|
4
|
undef $fh; |
518
|
2
|
50
|
|
|
|
14
|
unless ($self->can('parse')) { |
519
|
2
|
|
|
|
|
8
|
my $pclass = $self->guess_parser_by_filename( $filename ); |
520
|
2
|
50
|
33
|
|
|
28
|
$self = $pclass->new() if ($pclass and $pclass->can('new')); |
521
|
|
|
|
|
|
|
} |
522
|
2
|
50
|
|
1
|
|
102
|
open( $fh, '<:encoding(UTF-8)', $filename ) or throw RDF::Trine::Error::ParserError -text => $!; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
|
525
|
2
|
50
|
33
|
|
|
1066
|
if ($self and $self->can('parse')) { |
526
|
2
|
|
|
|
|
4
|
my $content = do { local($/) = undef; <$fh> }; |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
40
|
|
527
|
2
|
|
|
|
|
36
|
return $self->parse( $base, $content, $handler, @_ ); |
528
|
|
|
|
|
|
|
} else { |
529
|
0
|
|
|
|
|
0
|
throw RDF::Trine::Error::ParserError -text => "Cannot parse unknown serialization"; |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=item C<< parse ( $base_uri, $rdf, \&handler ) >> |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=cut |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=item C<< new_bnode_prefix () >> |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
Returns a new prefix to be used in the construction of blank node identifiers. |
541
|
|
|
|
|
|
|
If either Data::UUID or UUID::Tiny are available, they are used to construct |
542
|
|
|
|
|
|
|
a globally unique bnode prefix. Otherwise, an empty string is returned. |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=cut |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
sub new_bnode_prefix { |
547
|
9
|
|
|
9
|
1
|
22
|
my $class = shift; |
548
|
9
|
50
|
33
|
|
|
60
|
if (defined($Data::UUID::VERSION)) { |
|
|
50
|
|
|
|
|
|
549
|
0
|
|
|
|
|
0
|
my $ug = new Data::UUID; |
550
|
0
|
|
|
|
|
0
|
my $uuid = $ug->to_string( $ug->create() ); |
551
|
0
|
|
|
|
|
0
|
$uuid =~ s/-//g; |
552
|
0
|
|
|
|
|
0
|
return 'b' . $uuid; |
553
|
|
|
|
|
|
|
} elsif (defined($UUID::Tiny::VERSION) && ($] < 5.010000)) { # UUID::Tiny 1.03 isn't working nice with thread support in Perl 5.14. When this is fixed, this may be removed and dep added. |
554
|
0
|
|
|
|
|
0
|
my $uuid = UUID::Tiny::create_UUID_as_string(UUID::Tiny::UUID_V1()); |
555
|
0
|
|
|
|
|
0
|
$uuid =~ s/-//g; |
556
|
0
|
|
|
|
|
0
|
return 'b' . $uuid; |
557
|
|
|
|
|
|
|
} else { |
558
|
9
|
|
|
|
|
36
|
return ''; |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=item C<< default_accept_header >> |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
Returns the default HTTP Accept header value used in requesting RDF content (e.g. in |
565
|
|
|
|
|
|
|
L</parse_url_into_model>) that may be parsed by one of the available RDF::Trine::Parser |
566
|
|
|
|
|
|
|
subclasses. |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
By default, RDF/XML and Turtle are preferred over other media types. |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
=cut |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
sub default_accept_header { |
573
|
|
|
|
|
|
|
# prefer RDF/XML or Turtle, then anything else that we've got a parser for. |
574
|
1
|
100
|
|
1
|
1
|
8
|
my $accept = join(',', map { /(turtle|rdf[+]xml)/ ? "$_;q=1.0" : "$_;q=0.9" } keys %media_types); |
|
9
|
|
|
|
|
42
|
|
575
|
1
|
|
|
|
|
4
|
return $accept; |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
1; |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
__END__ |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
=back |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
=head1 BUGS |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
Please report any bugs or feature requests to through the GitHub web interface |
587
|
|
|
|
|
|
|
at L<https://github.com/kasei/perlrdf/issues>. |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
=head1 AUTHOR |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
Gregory Todd Williams C<< <gwilliams@cpan.org> >> |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=head1 COPYRIGHT |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
Copyright (c) 2006-2012 Gregory Todd Williams. This |
596
|
|
|
|
|
|
|
program is free software; you can redistribute it and/or modify it under |
597
|
|
|
|
|
|
|
the same terms as Perl itself. |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
=cut |