line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTTP::LRDD; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
19002
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
27
|
|
4
|
1
|
|
|
1
|
|
16
|
use 5.010; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
29
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
366
|
use HTML::HTML5::Parser 0.107; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use HTML::HTML5::Sanity 0.102; |
8
|
|
|
|
|
|
|
use HTTP::Link::Parser 0.102 qw(:all); |
9
|
|
|
|
|
|
|
use HTTP::Status 0 qw(:constants); |
10
|
|
|
|
|
|
|
use Object::AUTHORITY 0; |
11
|
|
|
|
|
|
|
use RDF::TrineX::Functions 0 -shortcuts; |
12
|
|
|
|
|
|
|
use RDF::RDFa::Parser 1.096; |
13
|
|
|
|
|
|
|
use RDF::Query 2.900; |
14
|
|
|
|
|
|
|
use Scalar::Util 0 qw(blessed); |
15
|
|
|
|
|
|
|
use URI 0; |
16
|
|
|
|
|
|
|
use URI::Escape 0; |
17
|
|
|
|
|
|
|
use XML::Atom::OWL 0.100; |
18
|
|
|
|
|
|
|
use XRD::Parser 0.101; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my (@Predicates, @_Predicates, @MediaTypes); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
BEGIN |
23
|
|
|
|
|
|
|
{ |
24
|
|
|
|
|
|
|
$HTTP::LRDD::AUTHORITY = 'cpan:TOBYINK'; |
25
|
|
|
|
|
|
|
$HTTP::LRDD::VERSION = '0.106'; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
@Predicates = ( |
28
|
|
|
|
|
|
|
'describedby', |
29
|
|
|
|
|
|
|
'lrdd', |
30
|
|
|
|
|
|
|
'http://www.w3.org/2007/05/powder-s#describedby', |
31
|
|
|
|
|
|
|
'http://www.w3.org/1999/xhtml/vocab#meta', |
32
|
|
|
|
|
|
|
'http://www.w3.org/2000/01/rdf-schema#seeAlso', |
33
|
|
|
|
|
|
|
); |
34
|
|
|
|
|
|
|
@_Predicates = @Predicates; |
35
|
|
|
|
|
|
|
@MediaTypes = ( |
36
|
|
|
|
|
|
|
'application/xrd+xml', |
37
|
|
|
|
|
|
|
'application/rdf+xml', |
38
|
|
|
|
|
|
|
'text/turtle', |
39
|
|
|
|
|
|
|
'application/atom+xml;q=0.9', |
40
|
|
|
|
|
|
|
'application/xhtml+xml;q=0.9', |
41
|
|
|
|
|
|
|
'text/html;q=0.9', |
42
|
|
|
|
|
|
|
'*/*;q=0.1', |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub __rdf_query |
47
|
|
|
|
|
|
|
{ |
48
|
|
|
|
|
|
|
my ($sparql, $model) = @_; |
49
|
|
|
|
|
|
|
my $result = RDF::Query->new($sparql)->execute($model); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
if ($result->is_boolean) |
52
|
|
|
|
|
|
|
{ return $result->get_boolean } |
53
|
|
|
|
|
|
|
elsif ($result->is_bindings) |
54
|
|
|
|
|
|
|
{ return $result } |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
$result->is_graph or die; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
my $return = RDF::Trine::Model->new; |
59
|
|
|
|
|
|
|
$return->add_hashref( $result->as_hashref ); |
60
|
|
|
|
|
|
|
return $return; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub import |
64
|
|
|
|
|
|
|
{ |
65
|
|
|
|
|
|
|
my $class = shift; |
66
|
|
|
|
|
|
|
@Predicates = @_ if @_; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub new |
70
|
|
|
|
|
|
|
{ |
71
|
|
|
|
|
|
|
my $class = shift; |
72
|
|
|
|
|
|
|
my $self = bless { }, $class; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
$self->{predicates} = @_ ? \@_ : \@Predicates; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
return $self; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub new_strict |
80
|
|
|
|
|
|
|
{ |
81
|
|
|
|
|
|
|
my $class = shift; |
82
|
|
|
|
|
|
|
return $class->new(qw(describedby lrdd)); |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub new_default |
86
|
|
|
|
|
|
|
{ |
87
|
|
|
|
|
|
|
my $class = shift; |
88
|
|
|
|
|
|
|
return $class->new(@_Predicates); |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub discover |
92
|
|
|
|
|
|
|
{ |
93
|
|
|
|
|
|
|
my $self = shift; |
94
|
|
|
|
|
|
|
my $uri = shift; |
95
|
|
|
|
|
|
|
my $list = wantarray; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
$self = $self->new |
98
|
|
|
|
|
|
|
unless blessed($self) && $self->isa(__PACKAGE__); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
my (@results, $rdfa, $rdfx, $response); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# STEP 1: check the HTTP headers for a descriptor link |
103
|
|
|
|
|
|
|
if ($uri =~ /^https?:/i) |
104
|
|
|
|
|
|
|
{ |
105
|
|
|
|
|
|
|
$response = $self->_ua->head($uri); |
106
|
|
|
|
|
|
|
my $model = rdf_parse(); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# Parse HTTP 'Link' headers. |
109
|
|
|
|
|
|
|
parse_links_into_model($response => $model); |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
if ($response->code eq HTTP_SEE_OTHER) # 303 Redirect |
112
|
|
|
|
|
|
|
{ |
113
|
|
|
|
|
|
|
my $seeother = URI->new_abs( |
114
|
|
|
|
|
|
|
$response->header('Location'), |
115
|
|
|
|
|
|
|
URI->new($uri)); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
$model->add_hashref({ |
118
|
|
|
|
|
|
|
$uri => { |
119
|
|
|
|
|
|
|
'http://www.w3.org/2000/01/rdf-schema#seeAlso' => [ |
120
|
|
|
|
|
|
|
{ 'value' => "$seeother" , 'type' => 'uri' }, |
121
|
|
|
|
|
|
|
], |
122
|
|
|
|
|
|
|
}, |
123
|
|
|
|
|
|
|
}); |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
my $iterator = __rdf_query($self->_make_sparql($uri, $list), $model); |
127
|
|
|
|
|
|
|
while (my $row = $iterator->next) |
128
|
|
|
|
|
|
|
{ |
129
|
|
|
|
|
|
|
push @results, $row->{'descriptor'}->uri |
130
|
|
|
|
|
|
|
if defined $row->{'descriptor'} |
131
|
|
|
|
|
|
|
&& $row->{'descriptor'}->is_resource; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# Bypass further processing if we've got a result and we only wanted one! |
135
|
|
|
|
|
|
|
return $results[0] if @results && !$list; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# STEP 2: check the HTTP body (RDF) for a descriptor link |
139
|
|
|
|
|
|
|
if ($uri =~ /^https?:/i) |
140
|
|
|
|
|
|
|
{ |
141
|
|
|
|
|
|
|
my $model = rdf_parse(); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# Parse as RDFa, if the response is RDFa. |
144
|
|
|
|
|
|
|
($response, $rdfa) = $self->_cond_parse_rdfa($response, $model, $uri); |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# If the response was not RDFa, try parsing as RDF. |
147
|
|
|
|
|
|
|
($response, $rdfx) = $self->_cond_parse_rdf($response, $model, $uri) |
148
|
|
|
|
|
|
|
unless defined $rdfa; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
my $iterator = __rdf_query($self->_make_sparql($uri, $list), $model); |
151
|
|
|
|
|
|
|
while (my $row = $iterator->next) |
152
|
|
|
|
|
|
|
{ |
153
|
|
|
|
|
|
|
push @results, $row->{'descriptor'}->uri |
154
|
|
|
|
|
|
|
if defined $row->{'descriptor'} |
155
|
|
|
|
|
|
|
&& $row->{'descriptor'}->is_resource; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# Bypass further processing if we've got a result and we only wanted one! |
159
|
|
|
|
|
|
|
return $results[0] if @results && !$list; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# STEP 2a: AtomOWL doesn't use <id> as a subject URI. |
163
|
|
|
|
|
|
|
if (defined $rdfa && $rdfa->{'atom_parser'} && blessed($self->{'cache'}->{$uri})) |
164
|
|
|
|
|
|
|
{ |
165
|
|
|
|
|
|
|
my $iterator = __rdf_query($self->_make_sparql_atomowl($uri, $list), $self->{'cache'}->{$uri}); |
166
|
|
|
|
|
|
|
while (my $row = $iterator->next) |
167
|
|
|
|
|
|
|
{ |
168
|
|
|
|
|
|
|
push @results, $row->{'descriptor'}->uri |
169
|
|
|
|
|
|
|
if defined $row->{'descriptor'} |
170
|
|
|
|
|
|
|
&& $row->{'descriptor'}->is_resource; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# Bypass further processing if we've got a result and we only wanted one! |
174
|
|
|
|
|
|
|
return $results[0] if @results && !$list; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# STEP 3: try host-meta. |
178
|
|
|
|
|
|
|
my $hostmeta_location = XRD::Parser::hostmeta_location($uri); |
179
|
|
|
|
|
|
|
unless (blessed($self->{'cache'}->{$hostmeta_location})) |
180
|
|
|
|
|
|
|
{ |
181
|
|
|
|
|
|
|
eval |
182
|
|
|
|
|
|
|
{ |
183
|
|
|
|
|
|
|
my $hm = XRD::Parser->hostmeta($uri); |
184
|
|
|
|
|
|
|
$hm->consume; |
185
|
|
|
|
|
|
|
$self->{'cache'}->{$hostmeta_location} = $hm->graph; |
186
|
|
|
|
|
|
|
}; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
if (blessed( $self->{'cache'}->{$hostmeta_location} )) |
189
|
|
|
|
|
|
|
{ |
190
|
|
|
|
|
|
|
my $hm_graph = $self->{'cache'}->{$hostmeta_location}; |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# First try original query. |
193
|
|
|
|
|
|
|
my $iterator = __rdf_query($self->_make_sparql($uri, $list), $hm_graph); |
194
|
|
|
|
|
|
|
while (my $row = $iterator->next) |
195
|
|
|
|
|
|
|
{ |
196
|
|
|
|
|
|
|
push @results, $row->{'descriptor'}->uri |
197
|
|
|
|
|
|
|
if defined $row->{'descriptor'} |
198
|
|
|
|
|
|
|
&& $row->{'descriptor'}->is_resource; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# Then try using host-meta URI templates. |
202
|
|
|
|
|
|
|
$iterator = __rdf_query($self->_make_sparql_template($uri, $list), $hm_graph); |
203
|
|
|
|
|
|
|
while (my $row = $iterator->next) |
204
|
|
|
|
|
|
|
{ |
205
|
|
|
|
|
|
|
if (defined $row->{'descriptor'} |
206
|
|
|
|
|
|
|
&& $row->{'descriptor'}->is_literal |
207
|
|
|
|
|
|
|
&& $row->{'descriptor'}->literal_datatype eq (XRD::Parser->URI_XRD.'URITemplate')) |
208
|
|
|
|
|
|
|
{ |
209
|
|
|
|
|
|
|
my $u = $row->{'descriptor'}->literal_value; |
210
|
|
|
|
|
|
|
$u =~ s/\{uri\}/uri_escape($uri)/ie; |
211
|
|
|
|
|
|
|
push @results, $u; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# STEP 4: the resource may be self-describing |
217
|
|
|
|
|
|
|
if ($rdfa || $rdfx) |
218
|
|
|
|
|
|
|
{ |
219
|
|
|
|
|
|
|
my $data = $self->parse($uri); |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# only add $uri to @results |
222
|
|
|
|
|
|
|
# if we're completely desparate, |
223
|
|
|
|
|
|
|
# or it seems to provide something useful. |
224
|
|
|
|
|
|
|
push @results, $uri |
225
|
|
|
|
|
|
|
if !@results |
226
|
|
|
|
|
|
|
|| $data->count_statements(RDF::Trine::Node::Resource->new($uri), undef, undef); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
if (@results) |
230
|
|
|
|
|
|
|
{ |
231
|
|
|
|
|
|
|
return $list ? @results : $results[0]; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
return; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
sub parse |
238
|
|
|
|
|
|
|
{ |
239
|
|
|
|
|
|
|
my $self = shift; |
240
|
|
|
|
|
|
|
my $uri = shift or return undef; |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
$self = $self->new |
243
|
|
|
|
|
|
|
unless blessed($self) && $self->isa(__PACKAGE__); |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
unless (blessed($self->{'cache'}{$uri}) |
246
|
|
|
|
|
|
|
and $self->{'cache'}{$uri}->isa('RDF::Trine::Model')) |
247
|
|
|
|
|
|
|
{ |
248
|
|
|
|
|
|
|
my $response = $self->_ua->get($uri); |
249
|
|
|
|
|
|
|
my $model = rdf_parse(); |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# Parse as RDFa, if the response is RDFa. |
252
|
|
|
|
|
|
|
($response, my $rdfa) = $self->_cond_parse_rdfa($response, $model, $uri); |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
# If the response was not RDFa, try parsing as RDF. |
255
|
|
|
|
|
|
|
($response, my $rdfx) = $self->_cond_parse_rdf($response, $model, $uri) |
256
|
|
|
|
|
|
|
unless defined $rdfa; |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# If the response was not RDFa or another type of RDF, try parsing as XRD. |
259
|
|
|
|
|
|
|
($response, my $xrd) = $self->_cond_parse_xrd($response, $model, $uri) |
260
|
|
|
|
|
|
|
unless defined $rdfa || defined $rdfx; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
return $self->{'cache'}{$uri}; |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub process |
267
|
|
|
|
|
|
|
{ |
268
|
|
|
|
|
|
|
my $self = shift; |
269
|
|
|
|
|
|
|
my $uri = shift; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
$self = $self->new |
272
|
|
|
|
|
|
|
unless blessed($self) && $self->isa(__PACKAGE__); |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
my $descriptor = $self->discover($uri); |
275
|
|
|
|
|
|
|
return $self->parse($descriptor) // rdf_parse(); |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub process_all |
279
|
|
|
|
|
|
|
{ |
280
|
|
|
|
|
|
|
my $self = shift; |
281
|
|
|
|
|
|
|
my $uri = shift; |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
$self = $self->new |
284
|
|
|
|
|
|
|
unless blessed($self) && $self->isa(__PACKAGE__); |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
my @descriptors = $self->discover($uri); |
287
|
|
|
|
|
|
|
my $model = $self->parse($uri) // rdf_parse(); |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
foreach my $descriptor (@descriptors) |
290
|
|
|
|
|
|
|
{ |
291
|
|
|
|
|
|
|
my $description = $self->parse($descriptor); |
292
|
|
|
|
|
|
|
rdf_parse($description, model=>$model); # merge |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
return $model; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
sub _make_sparql |
299
|
|
|
|
|
|
|
{ |
300
|
|
|
|
|
|
|
my ($self, $uri, $list) = @_; |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
my @p; |
303
|
|
|
|
|
|
|
foreach my $p (@{ $self->{'predicates'} }) |
304
|
|
|
|
|
|
|
{ |
305
|
|
|
|
|
|
|
push @p, sprintf('{ <%s> <%s> ?descriptor . }', |
306
|
|
|
|
|
|
|
$uri, HTTP::Link::Parser::relationship_uri($p)); |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
return $list ? |
309
|
|
|
|
|
|
|
'SELECT DISTINCT ?descriptor WHERE { '.(join ' UNION ', @p).' }' : |
310
|
|
|
|
|
|
|
'SELECT DISTINCT ?descriptor WHERE { OPTIONAL '.(join ' OPTIONAL ', @p).' }'; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
sub _make_sparql_atomowl |
314
|
|
|
|
|
|
|
{ |
315
|
|
|
|
|
|
|
my ($self, $uri, $list) = @_; |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
my @p; |
318
|
|
|
|
|
|
|
foreach my $p (@{ $self->{'predicates'} }) |
319
|
|
|
|
|
|
|
{ |
320
|
|
|
|
|
|
|
push @p, sprintf('{ ?feed <%s> ?descriptor . }', |
321
|
|
|
|
|
|
|
HTTP::Link::Parser::relationship_uri($p)); |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# this can be ambiguous in the face of atom:source. |
325
|
|
|
|
|
|
|
return $list ? |
326
|
|
|
|
|
|
|
'SELECT DISTINCT ?descriptor WHERE { ?feed a <http://bblfish.net/work/atom-owl/2006-06-06/#Feed> . { '.(join ' UNION ', @p).'} }' : |
327
|
|
|
|
|
|
|
'SELECT DISTINCT ?descriptor WHERE { ?feed a <http://bblfish.net/work/atom-owl/2006-06-06/#Feed> . OPTIONAL '.(join ' OPTIONAL ', @p).' }'; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
sub _make_sparql_template |
331
|
|
|
|
|
|
|
{ |
332
|
|
|
|
|
|
|
my ($self, $uri, $list) = @_; |
333
|
|
|
|
|
|
|
my $hosturi = XRD::Parser::host_uri( $uri ); |
334
|
|
|
|
|
|
|
my @p; |
335
|
|
|
|
|
|
|
foreach my $p (@{ $self->{'predicates'} }) |
336
|
|
|
|
|
|
|
{ |
337
|
|
|
|
|
|
|
push @p, sprintf('{ <%s> <%s> ?descriptor . }', |
338
|
|
|
|
|
|
|
$hosturi, XRD::Parser::template_uri(HTTP::Link::Parser::relationship_uri($p))); |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
return $list ? |
341
|
|
|
|
|
|
|
'SELECT DISTINCT ?descriptor WHERE { '.(join ' UNION ', @p).' }' : |
342
|
|
|
|
|
|
|
'SELECT DISTINCT ?descriptor WHERE { OPTIONAL '.(join ' OPTIONAL ', @p).' }'; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
sub _cond_parse_rdfa |
346
|
|
|
|
|
|
|
{ |
347
|
|
|
|
|
|
|
my ($self, $response, $model, $uri) = @_; |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
my $rdfa_options; |
350
|
|
|
|
|
|
|
my $rdfa_input; |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
if ($response->content_type =~ m'^(application/atom\+xml|image/svg\+xml|application/xhtml\+xml|text/html)'i) |
353
|
|
|
|
|
|
|
{ |
354
|
|
|
|
|
|
|
if (uc $response->request->method ne 'GET') |
355
|
|
|
|
|
|
|
{ |
356
|
|
|
|
|
|
|
$self->_ua->max_redirect(3); |
357
|
|
|
|
|
|
|
$response = $self->_ua->get($uri); |
358
|
|
|
|
|
|
|
$self->_ua->max_redirect(0); |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
else |
362
|
|
|
|
|
|
|
{ |
363
|
|
|
|
|
|
|
return ($response, undef); |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
$response->is_success or return ($response, undef); |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
my $hostlang = RDF::RDFa::Parser::Config->host_from_media_type($response->content_type); |
369
|
|
|
|
|
|
|
$rdfa_options = RDF::RDFa::Parser::Config->new($hostlang, RDF::RDFa::Parser::Config->RDFA_GUESS, |
370
|
|
|
|
|
|
|
atom_parser => ($response->content_type =~ m'^application/atom\+xml'i ? 1 : 0), |
371
|
|
|
|
|
|
|
); |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
if (defined $rdfa_options) |
374
|
|
|
|
|
|
|
{ |
375
|
|
|
|
|
|
|
# Make sure any predicate keywords are recognised in @rel/@rev. |
376
|
|
|
|
|
|
|
# This can override some normal RDFa keywords in some cases. |
377
|
|
|
|
|
|
|
foreach my $attr (qw(rel rev)) |
378
|
|
|
|
|
|
|
{ |
379
|
|
|
|
|
|
|
foreach my $p (@{ $self->{'predicates'} }) |
380
|
|
|
|
|
|
|
{ |
381
|
|
|
|
|
|
|
$rdfa_options->{'keywords'}->{$attr}->{$p} |
382
|
|
|
|
|
|
|
= HTTP::Link::Parser::relationship_uri($p) |
383
|
|
|
|
|
|
|
unless $p =~ /:/; |
384
|
|
|
|
|
|
|
$rdfa_options->{'keywords'}->{'insensitive'}->{$attr}->{$p} |
385
|
|
|
|
|
|
|
= HTTP::Link::Parser::relationship_uri($p) |
386
|
|
|
|
|
|
|
unless $p =~ /:/; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
$rdfa_input = $response->decoded_content |
391
|
|
|
|
|
|
|
unless defined $rdfa_input; |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
my $parser = RDF::RDFa::Parser->new($rdfa_input, $response->base, $rdfa_options, $model->_store); |
394
|
|
|
|
|
|
|
$parser->consume; |
395
|
|
|
|
|
|
|
$self->{'cache'}->{$uri} = $model; |
396
|
|
|
|
|
|
|
return ($response, $rdfa_options); |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
return ($response, undef); |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
sub _cond_parse_rdf |
403
|
|
|
|
|
|
|
{ |
404
|
|
|
|
|
|
|
my ($self, $response, $model, $uri) = @_; |
405
|
|
|
|
|
|
|
my $type; |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
if ($response->content_type =~ m'^(application/rdf\+xml|(application|text)/(x-)?(rdf\+)?(turtle|n3|json))'i) |
408
|
|
|
|
|
|
|
{ |
409
|
|
|
|
|
|
|
if (uc $response->request->method ne 'GET') |
410
|
|
|
|
|
|
|
{ |
411
|
|
|
|
|
|
|
$self->_ua->max_redirect(3); |
412
|
|
|
|
|
|
|
$response = $self->_ua->get($uri); |
413
|
|
|
|
|
|
|
$self->_ua->max_redirect(0); |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
$type = 'Turtle'; |
417
|
|
|
|
|
|
|
$type = 'RDFXML' if $response->content_type =~ /rdf.xml/; |
418
|
|
|
|
|
|
|
$type = 'RDFJSON' if $response->content_type =~ /json/; |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
else |
421
|
|
|
|
|
|
|
{ |
422
|
|
|
|
|
|
|
return ($response, undef); |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
$response->is_success or return ($response, undef); |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
rdf_parse($response->decoded_content, type=>$type, model=>$model, base=>$response->base); |
428
|
|
|
|
|
|
|
$self->{'cache'}->{$uri} = $model; |
429
|
|
|
|
|
|
|
return ($response, 1); |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
sub _cond_parse_xrd |
433
|
|
|
|
|
|
|
{ |
434
|
|
|
|
|
|
|
my ($self, $response, $model, $uri) = @_; |
435
|
|
|
|
|
|
|
my $type; |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
if ($response->content_type =~ m'^(text/plain|application/octet-stream|application/xrd\+xml|(application|text)/xml)'i) |
438
|
|
|
|
|
|
|
{ |
439
|
|
|
|
|
|
|
if (uc $response->request->method ne 'GET') |
440
|
|
|
|
|
|
|
{ |
441
|
|
|
|
|
|
|
$self->_ua->max_redirect(3); |
442
|
|
|
|
|
|
|
$response = $self->_ua->get($uri); |
443
|
|
|
|
|
|
|
$self->_ua->max_redirect(0); |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
else |
447
|
|
|
|
|
|
|
{ |
448
|
|
|
|
|
|
|
return ($response, undef); |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
$response->is_success or return ($response, undef); |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
my $xrd = XRD::Parser->new($response->decoded_content, $response->base, {loose_mime=>1}, $model->_store); |
454
|
|
|
|
|
|
|
$xrd->consume; |
455
|
|
|
|
|
|
|
$self->{'cache'}->{$uri} = $model; |
456
|
|
|
|
|
|
|
return ($response, $xrd); |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
sub _ua |
460
|
|
|
|
|
|
|
{ |
461
|
|
|
|
|
|
|
my $self = shift; |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
unless (defined $self->{ua}) |
464
|
|
|
|
|
|
|
{ |
465
|
|
|
|
|
|
|
$self->{ua} = LWP::UserAgent->new; |
466
|
|
|
|
|
|
|
$self->{ua}->agent(sprintf('%s/%s (%s) ', __PACKAGE__, __PACKAGE__->VERSION, __PACKAGE__->AUTHORITY)); |
467
|
|
|
|
|
|
|
$self->{ua}->default_header(Accept => (join ', ', @MediaTypes)); |
468
|
|
|
|
|
|
|
$self->{ua}->max_redirect(0); |
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
return $self->{ua}; |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
1; |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head1 NAME |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
HTTP::LRDD - link-based resource descriptor discovery |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=head1 SYNOPSIS |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
use HTTP::LRDD; |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
my $lrdd = HTTP::LRDD->new; |
485
|
|
|
|
|
|
|
my @descriptors = $lrdd->discover($resource); |
486
|
|
|
|
|
|
|
foreach my $descriptor (@descriptors) |
487
|
|
|
|
|
|
|
{ |
488
|
|
|
|
|
|
|
my $description = $lrdd->parse($descriptor); |
489
|
|
|
|
|
|
|
# $description is an RDF::Trine::Model |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head1 DESCRIPTION |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
Note: the LRDD specification has ceased to be, with some parts being merged into |
495
|
|
|
|
|
|
|
the host-meta Internet Draft. This CPAN module will go in its own direction, |
496
|
|
|
|
|
|
|
bundling up best-practice techniques for discovering links and descriptors for a |
497
|
|
|
|
|
|
|
given URI. |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=head2 Import Routine |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=over 4 |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=item C<< use HTTP::LRDD (@predicates) >> |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
When importing HTTP::LRDD, you can optionally provide a list of |
506
|
|
|
|
|
|
|
predicate URIs (i.e. the URIs which rel values expand to). This |
507
|
|
|
|
|
|
|
may also include IANA-registered link types, which are short tokens |
508
|
|
|
|
|
|
|
rather than full URIs. If you do not provide a list of predicate |
509
|
|
|
|
|
|
|
URIs, then a sensible default set is used. |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
Because this configuration is global in nature, it is not recommended. |
512
|
|
|
|
|
|
|
It is better to supply a list of predicates to the constructor |
513
|
|
|
|
|
|
|
instead, or rely on the defaults. This feature should be regarded |
514
|
|
|
|
|
|
|
as deprecated. |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=back |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=head2 Constructors |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=over 4 |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=item C<< HTTP::LRDD->new(@predicates) >> |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Create a new LRDD discovery object using the given predicate URIs. |
525
|
|
|
|
|
|
|
If @predicates is omitted, then the predicates passed to the import |
526
|
|
|
|
|
|
|
routine are used instead. |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=item C<< HTTP::LRDD->new_strict >> |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
Create a new LRDD discovery object using the 'describedby' and |
531
|
|
|
|
|
|
|
'lrdd' IANA-registered predicates. |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=item C<< HTTP::LRDD->new_default >> |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
Create a new LRDD discovery object using the default set of |
536
|
|
|
|
|
|
|
predicates ('describedby', 'lrdd', 'wdrs:describedby', 'xhv:meta' |
537
|
|
|
|
|
|
|
and 'rdfs:seeAlso'). |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=back |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=head2 Public Methods |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
=over 4 |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=item C<< $lrdd->discover($resource_uri) >> |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
Discovers a descriptor for the given resource; or if called in a list |
548
|
|
|
|
|
|
|
context, a list of descriptors. |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
A descriptor is a resource that provides a description for something. |
551
|
|
|
|
|
|
|
So, if the given resource URI was the web address for an image, then |
552
|
|
|
|
|
|
|
the descriptor might be the web address for a metadata file about the |
553
|
|
|
|
|
|
|
image. If the given URI was an e-mail address, then the descriptor |
554
|
|
|
|
|
|
|
might be a profile document for the person to whom the address belongs. |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
The following sources are checked (in order) to find links to |
557
|
|
|
|
|
|
|
descriptors. |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=over 4 |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=item * HTTP response headers ("Link" header; "303 See Other" status) |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
=item * HTTP response message (RDF or RDFa) |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=item * https://HOSTNAME/.well-known/host-meta |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=item * http://HOSTNAME/.well-known/host-meta |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=back |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
If none of the above is able to yield a link to a descriptor, then |
572
|
|
|
|
|
|
|
the resource URI itself may be returned if it is in a self-describing |
573
|
|
|
|
|
|
|
format (e.g. RDF). |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
There is no guaranteed file format for the descriptor, but it is |
576
|
|
|
|
|
|
|
usually RDF, POWDER XML or XRD. |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
This method can also be called without an object (as a class method) |
579
|
|
|
|
|
|
|
in which case, a temporary object is created automatically using |
580
|
|
|
|
|
|
|
C<< new >>. |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
=item C<< $lrdd->parse($descriptor_uri) >> |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
Parses a descriptor in XRD or RDF (RDF/XML, RDFa, Turtle, etc). |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
Returns an RDF::Trine::Model or undef if unable to process. |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
This method can also be called without an object (as a class method) |
589
|
|
|
|
|
|
|
in which case, a temporary object is created automatically using |
590
|
|
|
|
|
|
|
C<< new >>. |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=item C<< $lrdd->process($resource_uri) >> |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
Performs the equivalent of C<discover> and C<parse> in one easy step. |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
Calls C<discover> in a non-list context, so only the first descriptor |
597
|
|
|
|
|
|
|
is used. |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
=item C<< $lrdd->process_all($resource_uri) >> |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
Performs the equivalent of C<discover> and C<parse> in one easy step. |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
Calls C<discover> in a list context, so multiple descriptors are |
604
|
|
|
|
|
|
|
combined into the resulting graph. |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=back |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
=head1 EXAMPLES |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
Discover the hub address (PubSubHubub) for a feed: |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
my $lrdd = HTTP::LRDD->new('hub'); |
613
|
|
|
|
|
|
|
my $hub = $lrdd->discover('http://example.net/feed.atom'); |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
Discover an author link (rel="author") from an HTML page: |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
my $lrdd = HTTP::LRDD->new('author'); |
618
|
|
|
|
|
|
|
my $author = $lrdd->discover('http://example.com/page.html'); |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
(For RDF people, you should note that rel="author" is not semantically |
621
|
|
|
|
|
|
|
equivalent to the "foaf:maker" property but closer to the |
622
|
|
|
|
|
|
|
"foaf:maker/foaf:homepage" SPARQL 1.1 property path - i.e. the rel="author" |
623
|
|
|
|
|
|
|
link destination is not a URI for the author themselves, but a page about |
624
|
|
|
|
|
|
|
the author.) |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
If that author resource is in a machine-readable format (e.g. RDF), then |
627
|
|
|
|
|
|
|
parse the data: |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
my $author_data = $lrdd->parse($author); |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
Or, you can combine C<discover> and C<parse>: |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
my $lrdd = HTTP::LRDD->new('author'); |
634
|
|
|
|
|
|
|
my $author_data = $lrdd->process('http://example.com/page.html'); |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
Get metadata for an image: |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
my $lrdd = HTTP::LRDD->new; |
639
|
|
|
|
|
|
|
my $data = $lrdd->process_all('http://example.org/flower.jpeg'); |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
As we're not passing any arguments to the constructor, we can use a shortcut: |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
my $data = HTTP::LRDD->process_all('http://example.org/flower.jpeg'); |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
Find the title of the image: |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
use RDF::QueryX::Lazy; |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
my $image = q<http://example.org/flower.jpeg>; |
650
|
|
|
|
|
|
|
my $results = RDF::QueryX::Lazy |
651
|
|
|
|
|
|
|
-> new("SELECT * WHERE { <$image> dc:title ?t }") |
652
|
|
|
|
|
|
|
-> execute( HTTP::LRDD->process_all($image) ); |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=head1 BUGS |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
Please report any bugs to L<http://rt.cpan.org/>. |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
B<Note>: many problems can stem from servers that send incorrect |
659
|
|
|
|
|
|
|
C<Content-Type> headers. If you send an XRD file as "text/html", |
660
|
|
|
|
|
|
|
then this module will not guess what you're doing - it will assume |
661
|
|
|
|
|
|
|
the file is really HTML, and inspect it for RDFa. For host-meta |
662
|
|
|
|
|
|
|
files, this module is slightly more relaxed, as there's a strong |
663
|
|
|
|
|
|
|
assumption that they are XRD... but YOU SHOULD NOT RELY ON THIS. |
664
|
|
|
|
|
|
|
If you're running a server, use the correct media type. |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=head1 SEE ALSO |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
L<HTTP::Link::Parser>, L<XRD::Parser>, L<XML::Atom::OWL> |
669
|
|
|
|
|
|
|
L<WWW::Finger>. |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
L<http://www.perlrdf.org/>. |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=head1 AUTHOR |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
Toby Inkster E<lt>tobyink@cpan.orgE<gt>. |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENCE |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
Copyright 2010-2012 Toby Inkster |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify it |
682
|
|
|
|
|
|
|
under the same terms as Perl itself. |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
=head1 DISCLAIMER OF WARRANTIES |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED |
687
|
|
|
|
|
|
|
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF |
688
|
|
|
|
|
|
|
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=cut |