line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package RDF::RDFa::Parser; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
BEGIN { |
4
|
9
|
|
|
9
|
|
306502
|
$RDF::RDFa::Parser::AUTHORITY = 'cpan:TOBYINK'; |
5
|
9
|
|
|
|
|
241
|
$RDF::RDFa::Parser::VERSION = '1.097'; |
6
|
|
|
|
|
|
|
} |
7
|
|
|
|
|
|
|
|
8
|
9
|
|
|
9
|
|
85
|
use Carp qw(); |
|
9
|
|
|
|
|
18
|
|
|
9
|
|
|
|
|
137
|
|
9
|
9
|
|
|
9
|
|
15595
|
use Data::UUID; |
|
9
|
|
|
|
|
11973
|
|
|
9
|
|
|
|
|
688
|
|
10
|
9
|
|
|
9
|
|
6018
|
use File::ShareDir qw(dist_file); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use HTML::HTML5::Parser; |
12
|
|
|
|
|
|
|
use HTML::HTML5::Sanity qw(fix_document); |
13
|
|
|
|
|
|
|
use LWP::UserAgent; |
14
|
|
|
|
|
|
|
use RDF::RDFa::Parser::Config; |
15
|
|
|
|
|
|
|
use RDF::RDFa::Parser::InitialContext; |
16
|
|
|
|
|
|
|
use RDF::RDFa::Parser::OpenDocumentObjectModel; |
17
|
|
|
|
|
|
|
use RDF::Trine 0.130; |
18
|
|
|
|
|
|
|
use Scalar::Util qw(blessed); |
19
|
|
|
|
|
|
|
use Storable qw(dclone); |
20
|
|
|
|
|
|
|
use URI::Escape; |
21
|
|
|
|
|
|
|
use URI; |
22
|
|
|
|
|
|
|
use XML::LibXML qw(:all); |
23
|
|
|
|
|
|
|
use XML::RegExp; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use constant { |
26
|
|
|
|
|
|
|
ERR_WARNING => 'w', |
27
|
|
|
|
|
|
|
ERR_ERROR => 'e', |
28
|
|
|
|
|
|
|
}; |
29
|
|
|
|
|
|
|
use constant { |
30
|
|
|
|
|
|
|
ERR_CODE_HOST => 'HOST01', |
31
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MUDDLE => 'RDFX01', |
32
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MESS => 'RDFX02', |
33
|
|
|
|
|
|
|
ERR_CODE_PREFIX_BUILTIN => 'PRFX01', |
34
|
|
|
|
|
|
|
ERR_CODE_PREFIX_ILLEGAL => 'PRFX02', |
35
|
|
|
|
|
|
|
ERR_CODE_PREFIX_DISABLED => 'PRFX03', |
36
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_USED => 'INST01', |
37
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_OVERRULED => 'INST02', |
38
|
|
|
|
|
|
|
ERR_CODE_CURIE_FELLTHROUGH => 'CURI01', |
39
|
|
|
|
|
|
|
ERR_CODE_CURIE_UNDEFINED => 'CURI02', |
40
|
|
|
|
|
|
|
ERR_CODE_BNODE_WRONGPLACE => 'BNOD01', |
41
|
|
|
|
|
|
|
ERR_CODE_VOCAB_DISABLED => 'VOCA01', |
42
|
|
|
|
|
|
|
ERR_CODE_LANG_INVALID => 'LANG01', |
43
|
|
|
|
|
|
|
}; |
44
|
|
|
|
|
|
|
use constant { |
45
|
|
|
|
|
|
|
RDF_XMLLIT => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral', |
46
|
|
|
|
|
|
|
RDF_TYPE => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', |
47
|
|
|
|
|
|
|
RDF_FIRST => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first', |
48
|
|
|
|
|
|
|
RDF_REST => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest', |
49
|
|
|
|
|
|
|
RDF_NIL => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil', |
50
|
|
|
|
|
|
|
}; |
51
|
|
|
|
|
|
|
use common::sense; |
52
|
|
|
|
|
|
|
use 5.010; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
our $HAS_AWOL; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
BEGIN |
57
|
|
|
|
|
|
|
{ |
58
|
|
|
|
|
|
|
local $@; |
59
|
|
|
|
|
|
|
eval "use XML::Atom::OWL;"; |
60
|
|
|
|
|
|
|
$HAS_AWOL = $@ ? 0 : 1; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub new |
64
|
|
|
|
|
|
|
{ |
65
|
|
|
|
|
|
|
my ($class, $markup, $base_uri, $config, $store)= @_; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# Rationalise $config |
68
|
|
|
|
|
|
|
# =================== |
69
|
|
|
|
|
|
|
# If $config is undefined, then use the default configuration |
70
|
|
|
|
|
|
|
if (!defined $config) |
71
|
|
|
|
|
|
|
{ $config = RDF::RDFa::Parser::Config->new; } |
72
|
|
|
|
|
|
|
# If $config is something sensible, then use it. |
73
|
|
|
|
|
|
|
elsif (blessed($config) && $config->isa('RDF::RDFa::Parser::Config')) |
74
|
|
|
|
|
|
|
{ 1; } |
75
|
|
|
|
|
|
|
# If it's a hashref (for backcompat), then use default plus those options |
76
|
|
|
|
|
|
|
elsif ('HASH' eq ref $config) |
77
|
|
|
|
|
|
|
{ $config = RDF::RDFa::Parser::Config->new(undef, undef, %$config); } |
78
|
|
|
|
|
|
|
# If it's something odd, then bail. |
79
|
|
|
|
|
|
|
else |
80
|
|
|
|
|
|
|
{ die "Unrecognised configuration\n"; } |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Rationalise $base_uri |
83
|
|
|
|
|
|
|
# ===================== |
84
|
|
|
|
|
|
|
unless ($base_uri =~ /^[a-z][a-z0-9\+\-\.]*:/i) |
85
|
|
|
|
|
|
|
{ die "Need a valid base URI.\n"; } |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# Rationalise $markup and set $dom |
88
|
|
|
|
|
|
|
# ================================ |
89
|
|
|
|
|
|
|
Carp::croak("Need to provide markup to parse.") unless defined $markup; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $dom; |
92
|
|
|
|
|
|
|
eval { |
93
|
|
|
|
|
|
|
if (blessed($markup) && $markup->isa('XML::LibXML::Document')) |
94
|
|
|
|
|
|
|
{ |
95
|
|
|
|
|
|
|
$dom = $markup; |
96
|
|
|
|
|
|
|
$markup = $dom->toString; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
elsif ($config->{'dom_parser'} =~ /^(opendocument|opendoc|odf|od|odt)$/i) |
99
|
|
|
|
|
|
|
{ |
100
|
|
|
|
|
|
|
my $parser = RDF::RDFa::Parser::OpenDocumentObjectModel->new; |
101
|
|
|
|
|
|
|
$dom = $parser->parse_string($markup, $base_uri); |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
elsif ($config->{'dom_parser'} =~ /^(html|tagsoup|soup)$/i) |
104
|
|
|
|
|
|
|
{ |
105
|
|
|
|
|
|
|
my $parser = HTML::HTML5::Parser->new; |
106
|
|
|
|
|
|
|
$dom = fix_document( $parser->parse_string($markup) ); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
else |
109
|
|
|
|
|
|
|
{ |
110
|
|
|
|
|
|
|
my $parser = XML::LibXML->new; |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
my $catalogue = dist_file('RDF-RDFa-Parser', 'catalogue/index.xml'); |
113
|
|
|
|
|
|
|
$parser->load_catalog($catalogue) |
114
|
|
|
|
|
|
|
if -r $catalogue; |
115
|
|
|
|
|
|
|
$parser->validation(0); |
116
|
|
|
|
|
|
|
#$parser->recover(1); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
$dom = $parser->parse_string($markup); |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
}; |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# Rationalise $store |
123
|
|
|
|
|
|
|
# ================== |
124
|
|
|
|
|
|
|
$store = RDF::Trine::Store::Memory->temporary_store |
125
|
|
|
|
|
|
|
unless defined $store; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
my $self = bless { |
128
|
|
|
|
|
|
|
baseuri => $base_uri, |
129
|
|
|
|
|
|
|
origbase => $base_uri, |
130
|
|
|
|
|
|
|
dom => $dom, |
131
|
|
|
|
|
|
|
model => RDF::Trine::Model->new($store), |
132
|
|
|
|
|
|
|
bnodes => 0, |
133
|
|
|
|
|
|
|
sub => {}, |
134
|
|
|
|
|
|
|
options => $config, |
135
|
|
|
|
|
|
|
Graphs => {}, |
136
|
|
|
|
|
|
|
errors => [], |
137
|
|
|
|
|
|
|
consumed => 0, |
138
|
|
|
|
|
|
|
}, $class; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
$config->auto_config($self); |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
$self->{options} = $config = $config->guess_rdfa_version($self) |
143
|
|
|
|
|
|
|
if $config->{guess_rdfa_version}; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# HTML <base> element. |
146
|
|
|
|
|
|
|
if ($dom and $self->{options}{xhtml_base}) |
147
|
|
|
|
|
|
|
{ |
148
|
|
|
|
|
|
|
my @bases = $self->dom->getElementsByTagName('base'); |
149
|
|
|
|
|
|
|
my $base; |
150
|
|
|
|
|
|
|
foreach my $b (@bases) |
151
|
|
|
|
|
|
|
{ |
152
|
|
|
|
|
|
|
if ($b->hasAttribute('href')) |
153
|
|
|
|
|
|
|
{ |
154
|
|
|
|
|
|
|
$base = $b->getAttribute('href'); |
155
|
|
|
|
|
|
|
$base =~ s/#.*$//g; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
$self->{baseuri} = $self->uri($base) |
159
|
|
|
|
|
|
|
if defined $base && length $base; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
return $self; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub new_from_url |
166
|
|
|
|
|
|
|
{ |
167
|
|
|
|
|
|
|
my ($class, $url, $config, $store)= @_; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
my $response = do |
170
|
|
|
|
|
|
|
{ |
171
|
|
|
|
|
|
|
if (blessed($url) && $url->isa('HTTP::Message')) |
172
|
|
|
|
|
|
|
{ |
173
|
|
|
|
|
|
|
$url; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
else |
176
|
|
|
|
|
|
|
{ |
177
|
|
|
|
|
|
|
my $ua; |
178
|
|
|
|
|
|
|
if (blessed($config) and $config->isa('RDF::RDFa::Parser::Config')) |
179
|
|
|
|
|
|
|
{ $ua = $config->lwp_ua; } |
180
|
|
|
|
|
|
|
elsif (ref $config eq 'HASH') |
181
|
|
|
|
|
|
|
{ $ua = RDF::RDFa::Parser::Config->new('xml', undef, %$config)->lwp_ua; } |
182
|
|
|
|
|
|
|
else |
183
|
|
|
|
|
|
|
{ $ua = RDF::RDFa::Parser::Config->new('xml', undef)->lwp_ua; } |
184
|
|
|
|
|
|
|
$ua->get($url); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
}; |
187
|
|
|
|
|
|
|
my $host = $response->content_type; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
if (blessed($config) and $config->isa('RDF::RDFa::Parser::Config')) |
190
|
|
|
|
|
|
|
{ $config = $config->rehost($host); } |
191
|
|
|
|
|
|
|
elsif (ref $config eq 'HASH') |
192
|
|
|
|
|
|
|
{ $config = RDF::RDFa::Parser::Config->new($host, undef, %$config); } |
193
|
|
|
|
|
|
|
else |
194
|
|
|
|
|
|
|
{ $config = RDF::RDFa::Parser::Config->new($host, undef); } |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
return $class->new( |
197
|
|
|
|
|
|
|
$response->decoded_content, |
198
|
|
|
|
|
|
|
($response->base || $url).'', |
199
|
|
|
|
|
|
|
$config, |
200
|
|
|
|
|
|
|
$store, |
201
|
|
|
|
|
|
|
); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
*new_from_uri = \&new_from_url; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
*new_from_response = \&new_from_url; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub graph |
209
|
|
|
|
|
|
|
{ |
210
|
|
|
|
|
|
|
my $self = shift; |
211
|
|
|
|
|
|
|
my $graph = shift; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
$self->consume; |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
if (defined($graph)) |
216
|
|
|
|
|
|
|
{ |
217
|
|
|
|
|
|
|
my $tg; |
218
|
|
|
|
|
|
|
if ($graph =~ m/^_:(.*)/) |
219
|
|
|
|
|
|
|
{ |
220
|
|
|
|
|
|
|
$tg = RDF::Trine::Node::Blank->new($1); |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
else |
223
|
|
|
|
|
|
|
{ |
224
|
|
|
|
|
|
|
$tg = RDF::Trine::Node::Resource->new($graph, $self->{baseuri}); |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
my $m = RDF::Trine::Model->temporary_model; |
227
|
|
|
|
|
|
|
my $i = $self->{model}->get_statements(undef, undef, undef, $tg); |
228
|
|
|
|
|
|
|
while (my $statement = $i->next) |
229
|
|
|
|
|
|
|
{ |
230
|
|
|
|
|
|
|
$m->add_statement($statement); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
return $m; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
else |
235
|
|
|
|
|
|
|
{ |
236
|
|
|
|
|
|
|
return $self->{model}; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
sub output_graph |
241
|
|
|
|
|
|
|
{ |
242
|
|
|
|
|
|
|
shift->graph; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
sub graphs |
246
|
|
|
|
|
|
|
{ |
247
|
|
|
|
|
|
|
my $self = shift; |
248
|
|
|
|
|
|
|
$self->consume; |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
my @graphs = keys(%{$self->{Graphs}}); |
251
|
|
|
|
|
|
|
my %result; |
252
|
|
|
|
|
|
|
foreach my $graph (@graphs) |
253
|
|
|
|
|
|
|
{ |
254
|
|
|
|
|
|
|
$result{$graph} = $self->graph($graph); |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
return \%result; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
sub opengraph |
260
|
|
|
|
|
|
|
{ |
261
|
|
|
|
|
|
|
my ($self, $property, %opts) = @_; |
262
|
|
|
|
|
|
|
$self->consume; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
$property = $1 |
265
|
|
|
|
|
|
|
if defined $property && $property =~ m'^http://opengraphprotocol\.org/schema/(.*)$'; |
266
|
|
|
|
|
|
|
$property = $1 |
267
|
|
|
|
|
|
|
if defined $property && $property =~ m'^http://ogp\.me/ns#(.*)$'; |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
my $rtp; |
270
|
|
|
|
|
|
|
if (defined $property && $property =~ /^[a-z][a-z0-9\-\.\+]*:/i) |
271
|
|
|
|
|
|
|
{ |
272
|
|
|
|
|
|
|
$rtp = [ RDF::Trine::Node::Resource->new($property) ]; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
elsif (defined $property) |
275
|
|
|
|
|
|
|
{ |
276
|
|
|
|
|
|
|
$rtp = [ |
277
|
|
|
|
|
|
|
RDF::Trine::Node::Resource->new('http://ogp.me/ns#'.$property), |
278
|
|
|
|
|
|
|
RDF::Trine::Node::Resource->new('http://opengraphprotocol.org/schema/'.$property), |
279
|
|
|
|
|
|
|
]; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
my $data = {}; |
283
|
|
|
|
|
|
|
if ($rtp) |
284
|
|
|
|
|
|
|
{ |
285
|
|
|
|
|
|
|
foreach my $rtp2 (@$rtp) |
286
|
|
|
|
|
|
|
{ |
287
|
|
|
|
|
|
|
my $iter = $self->graph->get_statements( |
288
|
|
|
|
|
|
|
RDF::Trine::Node::Resource->new($self->uri), $rtp2, undef); |
289
|
|
|
|
|
|
|
while (my $st = $iter->next) |
290
|
|
|
|
|
|
|
{ |
291
|
|
|
|
|
|
|
my $propkey = $st->predicate->uri; |
292
|
|
|
|
|
|
|
$propkey = $1 |
293
|
|
|
|
|
|
|
if $propkey =~ m'^http://ogp\.me/ns#(.*)$' |
294
|
|
|
|
|
|
|
|| $propkey =~ m'^http://opengraphprotocol\.org/schema/(.*)$'; |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
if ($st->object->is_resource) |
297
|
|
|
|
|
|
|
{ push @{ $data->{$propkey} }, $st->object->uri; } |
298
|
|
|
|
|
|
|
elsif ($st->object->is_literal) |
299
|
|
|
|
|
|
|
{ push @{ $data->{$propkey} }, $st->object->literal_value; } |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
else |
304
|
|
|
|
|
|
|
{ |
305
|
|
|
|
|
|
|
my $iter = $self->graph->get_statements( |
306
|
|
|
|
|
|
|
RDF::Trine::Node::Resource->new($self->uri), undef, undef); |
307
|
|
|
|
|
|
|
while (my $st = $iter->next) |
308
|
|
|
|
|
|
|
{ |
309
|
|
|
|
|
|
|
my $propkey = $st->predicate->uri; |
310
|
|
|
|
|
|
|
$propkey = $1 |
311
|
|
|
|
|
|
|
if $propkey =~ m'^http://ogp\.me/ns#(.*)$' |
312
|
|
|
|
|
|
|
|| $propkey =~ m'^http://opengraphprotocol\.org/schema/(.*)$'; |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
if ($st->object->is_resource) |
315
|
|
|
|
|
|
|
{ push @{ $data->{$propkey} }, $st->object->uri; } |
316
|
|
|
|
|
|
|
elsif ($st->object->is_literal) |
317
|
|
|
|
|
|
|
{ push @{ $data->{$propkey} }, $st->object->literal_value; } |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
my @return; |
322
|
|
|
|
|
|
|
if (defined $property) |
323
|
|
|
|
|
|
|
{ @return = @{$data->{$property}} if defined $data->{$property}; } |
324
|
|
|
|
|
|
|
else |
325
|
|
|
|
|
|
|
{ @return = keys %$data; } |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
return wantarray ? @return : $return[0]; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
sub dom |
331
|
|
|
|
|
|
|
{ |
332
|
|
|
|
|
|
|
my $self = shift; |
333
|
|
|
|
|
|
|
return $self->{dom}; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
sub uri |
337
|
|
|
|
|
|
|
{ |
338
|
|
|
|
|
|
|
my $self = shift; |
339
|
|
|
|
|
|
|
my $param = shift || ''; |
340
|
|
|
|
|
|
|
my $opts = shift || {}; |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
if ((ref $opts) =~ /^XML::LibXML/) |
343
|
|
|
|
|
|
|
{ |
344
|
|
|
|
|
|
|
my $x = {'element' => $opts}; |
345
|
|
|
|
|
|
|
$opts = $x; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
if ($param =~ /^([a-z][a-z0-9\+\.\-]*)\:/i) |
349
|
|
|
|
|
|
|
{ |
350
|
|
|
|
|
|
|
# seems to be an absolute URI, so can safely return "as is". |
351
|
|
|
|
|
|
|
return $param; |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
elsif ($opts->{'require-absolute'}) |
354
|
|
|
|
|
|
|
{ |
355
|
|
|
|
|
|
|
return undef; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
my $base = $self->{baseuri}; |
359
|
|
|
|
|
|
|
if ($self->{'options'}->{'xml_base'}) |
360
|
|
|
|
|
|
|
{ |
361
|
|
|
|
|
|
|
$base = $opts->{'xml_base'} || $self->{baseuri}; |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
my $rv = $self->{options}{uri_class}->new_abs($param, $base); |
365
|
|
|
|
|
|
|
return "$rv"; |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub errors |
369
|
|
|
|
|
|
|
{ |
370
|
|
|
|
|
|
|
my $self = shift; |
371
|
|
|
|
|
|
|
return @{$self->{errors}}; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub processor_graph |
375
|
|
|
|
|
|
|
{ |
376
|
|
|
|
|
|
|
my ($self, $model, $context) = @_; |
377
|
|
|
|
|
|
|
$model ||= RDF::Trine::Model->new( RDF::Trine::Store->temporary_store ); |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
my $RDF = RDF::Trine::Namespace->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#'); |
380
|
|
|
|
|
|
|
my $RDFA = RDF::Trine::Namespace->new('http://www.w3.org/ns/rdfa#'); |
381
|
|
|
|
|
|
|
my $CNT = RDF::Trine::Namespace->new('http://www.w3.org/2011/content#'); |
382
|
|
|
|
|
|
|
my $PTR = RDF::Trine::Namespace->new('http://www.w3.org/2009/pointers#'); |
383
|
|
|
|
|
|
|
my $DC = RDF::Trine::Namespace->new('http://purl.org/dc/terms/'); |
384
|
|
|
|
|
|
|
my $ERR = RDF::Trine::Namespace->new('tag:buzzword.org.uk,2010:RDF-RDFa-Parser:error:'); |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
my $uuid = Data::UUID->new; |
387
|
|
|
|
|
|
|
my $mkuri = sub |
388
|
|
|
|
|
|
|
{ |
389
|
|
|
|
|
|
|
my $id = $uuid->create_str; |
390
|
|
|
|
|
|
|
return $ERR->$id; |
391
|
|
|
|
|
|
|
}; |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
my $st = sub |
394
|
|
|
|
|
|
|
{ |
395
|
|
|
|
|
|
|
my @n = map |
396
|
|
|
|
|
|
|
{ blessed($_) ? $_ : RDF::Trine::Node::Literal->new($_); } |
397
|
|
|
|
|
|
|
@_; |
398
|
|
|
|
|
|
|
if ($context) |
399
|
|
|
|
|
|
|
{ |
400
|
|
|
|
|
|
|
$model->add_statement( |
401
|
|
|
|
|
|
|
RDF::Trine::Statement::Quad->new(@n, $context) |
402
|
|
|
|
|
|
|
); |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
else |
405
|
|
|
|
|
|
|
{ |
406
|
|
|
|
|
|
|
$model->add_statement( |
407
|
|
|
|
|
|
|
RDF::Trine::Statement->new(@n) |
408
|
|
|
|
|
|
|
); |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
}; |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
my $typemap = {( |
413
|
|
|
|
|
|
|
ERR_CODE_HOST , 'DocumentError', |
414
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MUDDLE , '', |
415
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MESS , 'DocumentError', |
416
|
|
|
|
|
|
|
ERR_CODE_PREFIX_BUILTIN , 'DocumentError', |
417
|
|
|
|
|
|
|
ERR_CODE_PREFIX_ILLEGAL , 'DocumentError', |
418
|
|
|
|
|
|
|
ERR_CODE_PREFIX_DISABLED , '', |
419
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_USED , '', |
420
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_OVERRULED , '', |
421
|
|
|
|
|
|
|
ERR_CODE_CURIE_FELLTHROUGH , '', |
422
|
|
|
|
|
|
|
ERR_CODE_CURIE_UNDEFINED , 'UnresolvedCURIE', |
423
|
|
|
|
|
|
|
ERR_CODE_BNODE_WRONGPLACE , '', |
424
|
|
|
|
|
|
|
ERR_CODE_VOCAB_DISABLED , '', |
425
|
|
|
|
|
|
|
ERR_CODE_LANG_INVALID , 'DocumentError', |
426
|
|
|
|
|
|
|
)}; |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
foreach my $err ($self->errors) |
429
|
|
|
|
|
|
|
{ |
430
|
|
|
|
|
|
|
my $iri = $mkuri->(); |
431
|
|
|
|
|
|
|
my ($level, $code, $message, $args) = @$err; |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
if ($level eq ERR_WARNING) |
434
|
|
|
|
|
|
|
{ |
435
|
|
|
|
|
|
|
$st->($iri, $RDF->type, $RDFA->Warning); |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
elsif ($level eq ERR_ERROR) |
438
|
|
|
|
|
|
|
{ |
439
|
|
|
|
|
|
|
$st->($iri, $RDF->type, $RDFA->Error); |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
if (my $class = $typemap->{$code}) |
442
|
|
|
|
|
|
|
{ |
443
|
|
|
|
|
|
|
$st->($iri, $RDF->type, $RDFA->$class); |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
$st->($iri, $DC->description, $message); |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
if (blessed($args->{element}) and $args->{element}->can('nodePath')) |
449
|
|
|
|
|
|
|
{ |
450
|
|
|
|
|
|
|
my $p_iri = $mkuri->(); |
451
|
|
|
|
|
|
|
$st->($iri, $RDFA->context, $p_iri); |
452
|
|
|
|
|
|
|
$st->($p_iri, $RDF->type, $PTR->XPathPointer); |
453
|
|
|
|
|
|
|
$st->($p_iri, $PTR->expression, $args->{element}->nodePath); |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
return $model; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
sub processor_and_output_graph |
461
|
|
|
|
|
|
|
{ |
462
|
|
|
|
|
|
|
my $self = shift; |
463
|
|
|
|
|
|
|
my $model = RDF::Trine::Model->new; |
464
|
|
|
|
|
|
|
$self->$_->get_statements->each(sub { $model->add_statement(+shift) }) |
465
|
|
|
|
|
|
|
foreach qw( processor_graph graph ); |
466
|
|
|
|
|
|
|
return $model; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
sub _log_error |
470
|
|
|
|
|
|
|
{ |
471
|
|
|
|
|
|
|
my ($self, $level, $code, $message, %args) = @_; |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
if (defined $self->{'sub'}->{'onerror'}) |
474
|
|
|
|
|
|
|
{ |
475
|
|
|
|
|
|
|
$self->{'sub'}->{'onerror'}(@_); |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
elsif ($level eq ERR_ERROR) |
478
|
|
|
|
|
|
|
{ |
479
|
|
|
|
|
|
|
Carp::carp(sprintf("%04X: %s\n", $code, $message)); |
480
|
|
|
|
|
|
|
Carp::carp(sprintf("... with URI <%s>\n", $args{'uri'})) |
481
|
|
|
|
|
|
|
if defined $args{'uri'}; |
482
|
|
|
|
|
|
|
Carp::carp(sprintf("... on element '%s' with path '%s'\n", $args{'element'}->localname, $args{'element'}->nodePath)) |
483
|
|
|
|
|
|
|
if blessed($args{'element'}) && $args{'element'}->isa('XML::LibXML::Node'); |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
push @{$self->{errors}}, [$level, $code, $message, \%args]; |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
sub consume |
490
|
|
|
|
|
|
|
{ |
491
|
|
|
|
|
|
|
my ($self, %args) = @_; |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
return if $self->{'consumed'}; |
494
|
|
|
|
|
|
|
$self->{'consumed'}++; |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
if (!$self->{dom}) |
497
|
|
|
|
|
|
|
{ |
498
|
|
|
|
|
|
|
if ($args{survive}) |
499
|
|
|
|
|
|
|
{ |
500
|
|
|
|
|
|
|
$self->_log_error( |
501
|
|
|
|
|
|
|
ERR_ERROR, |
502
|
|
|
|
|
|
|
ERR_CODE_HOST, |
503
|
|
|
|
|
|
|
'Input could not be parsed into a DOM!', |
504
|
|
|
|
|
|
|
); |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
else |
507
|
|
|
|
|
|
|
{ |
508
|
|
|
|
|
|
|
Carp::croak("Input could not be parsed into a DOM!"); |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
return $self; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
if ($self->{options}{graph}) |
514
|
|
|
|
|
|
|
{ |
515
|
|
|
|
|
|
|
$self->{options}{graph_attr} = 'graph' |
516
|
|
|
|
|
|
|
unless defined $self->{options}{graph_attr}; |
517
|
|
|
|
|
|
|
$self->{options}{graph_type} = 'about' |
518
|
|
|
|
|
|
|
unless defined $self->{options}{graph_type}; |
519
|
|
|
|
|
|
|
$self->{options}{graph_default} = $self->bnode |
520
|
|
|
|
|
|
|
unless defined $self->{options}{graph_default}; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
local *XML::LibXML::Element::getAttributeNsSafe = sub |
524
|
|
|
|
|
|
|
{ |
525
|
|
|
|
|
|
|
my ($element, $nsuri, $attribute) = @_; |
526
|
|
|
|
|
|
|
return defined $nsuri ? $element->getAttributeNS($nsuri, $attribute) : $element->getAttribute($attribute); |
527
|
|
|
|
|
|
|
}; |
528
|
|
|
|
|
|
|
local *XML::LibXML::Element::hasAttributeNsSafe = sub |
529
|
|
|
|
|
|
|
{ |
530
|
|
|
|
|
|
|
my ($element, $nsuri, $attribute) = @_; |
531
|
|
|
|
|
|
|
return defined $nsuri ? $element->hasAttributeNS($nsuri, $attribute) : $element->hasAttribute($attribute); |
532
|
|
|
|
|
|
|
}; |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
$self->_consume_element($self->dom->documentElement, { init => 1}); |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
if ($self->{options}{atom_parser} && $HAS_AWOL) |
537
|
|
|
|
|
|
|
{ |
538
|
|
|
|
|
|
|
my $awol = XML::Atom::OWL->new( $self->dom , $self->uri , undef, $self->{'model'} ); |
539
|
|
|
|
|
|
|
$awol->{'bnode_generator'} = $self; |
540
|
|
|
|
|
|
|
$awol->set_callbacks( $self->{'sub'} ); |
541
|
|
|
|
|
|
|
$awol->consume; |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
return $self; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
sub _consume_element |
548
|
|
|
|
|
|
|
# http://www.w3.org/TR/rdfa-syntax/#sec_5.5. |
549
|
|
|
|
|
|
|
{ |
550
|
|
|
|
|
|
|
my $self = shift; |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# Processing begins by applying the processing rules below to the document |
553
|
|
|
|
|
|
|
# object, in the context of this initial [evaluation context]. All elements |
554
|
|
|
|
|
|
|
# in the tree are also processed according to the rules described below, |
555
|
|
|
|
|
|
|
# depth-first, although the [evaluation context] used for each set of rules |
556
|
|
|
|
|
|
|
# will be based on previous rules that may have been applied. |
557
|
|
|
|
|
|
|
my $current_element = shift; |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
# shouldn't happen, but return 0 if it does. |
560
|
|
|
|
|
|
|
return 0 unless $current_element->nodeType == XML_ELEMENT_NODE; |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# The evaluation context. |
563
|
|
|
|
|
|
|
my $args = shift; |
564
|
|
|
|
|
|
|
my ($base, $parent_subject, $parent_subject_elem, $parent_object, $parent_object_elem, |
565
|
|
|
|
|
|
|
$list_mappings, $uri_mappings, $term_mappings, $incomplete_triples, $language, |
566
|
|
|
|
|
|
|
$graph, $graph_elem, $xml_base); |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
if ($args->{'init'}) |
569
|
|
|
|
|
|
|
{ |
570
|
|
|
|
|
|
|
my $init = RDF::RDFa::Parser::InitialContext->new( |
571
|
|
|
|
|
|
|
$self->{options}{initial_context}, |
572
|
|
|
|
|
|
|
); |
573
|
|
|
|
|
|
|
# At the beginning of processing, an initial [evaluation context] is created |
574
|
|
|
|
|
|
|
$base = $self->uri; |
575
|
|
|
|
|
|
|
$parent_subject = $base; |
576
|
|
|
|
|
|
|
$parent_subject_elem = $self->dom->documentElement; |
577
|
|
|
|
|
|
|
$parent_object = undef; |
578
|
|
|
|
|
|
|
$parent_object_elem = undef; |
579
|
|
|
|
|
|
|
$uri_mappings = +{ insensitive => $init->uri_mappings }; |
580
|
|
|
|
|
|
|
$term_mappings = +{ insensitive => $init->term_mappings }; |
581
|
|
|
|
|
|
|
$incomplete_triples = []; |
582
|
|
|
|
|
|
|
$list_mappings = {}; |
583
|
|
|
|
|
|
|
$language = undef; |
584
|
|
|
|
|
|
|
$graph = $self->{options}{graph} ? $self->{options}{graph_default} : undef; |
585
|
|
|
|
|
|
|
$graph_elem = undef; |
586
|
|
|
|
|
|
|
$xml_base = undef; |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
if ($self->{options}{vocab_default}) |
589
|
|
|
|
|
|
|
{ |
590
|
|
|
|
|
|
|
$uri_mappings->{'(VOCAB)'} = $self->{options}{vocab_default}; |
591
|
|
|
|
|
|
|
} |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
if ($self->{options}{prefix_default}) |
594
|
|
|
|
|
|
|
{ |
595
|
|
|
|
|
|
|
$uri_mappings->{'(DEFAULT PREFIX)'} = $self->{options}{prefix_default}; |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
else |
599
|
|
|
|
|
|
|
{ |
600
|
|
|
|
|
|
|
$base = $args->{'base'}; |
601
|
|
|
|
|
|
|
$parent_subject = $args->{'parent_subject'}; |
602
|
|
|
|
|
|
|
$parent_subject_elem = $args->{'parent_subject_elem'}; |
603
|
|
|
|
|
|
|
$parent_object = $args->{'parent_object'}; |
604
|
|
|
|
|
|
|
$parent_object_elem = $args->{'parent_object_elem'}; |
605
|
|
|
|
|
|
|
$uri_mappings = dclone($args->{'uri_mappings'}); |
606
|
|
|
|
|
|
|
$term_mappings = dclone($args->{'term_mappings'}); |
607
|
|
|
|
|
|
|
$incomplete_triples = $args->{'incomplete_triples'}; |
608
|
|
|
|
|
|
|
$list_mappings = $args->{'list_mappings'}; |
609
|
|
|
|
|
|
|
$language = $args->{'language'}; |
610
|
|
|
|
|
|
|
$graph = $args->{'graph'}; |
611
|
|
|
|
|
|
|
$graph_elem = $args->{'graph_elem'}; |
612
|
|
|
|
|
|
|
$xml_base = $args->{'xml_base'}; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# Used by OpenDocument, otherwise usually undef. |
616
|
|
|
|
|
|
|
my $rdfans = $self->{options}{ns} || undef; |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
# First, the local values are initialized |
619
|
|
|
|
|
|
|
my $recurse = 1; |
620
|
|
|
|
|
|
|
my $skip_element = 0; |
621
|
|
|
|
|
|
|
my $new_subject = undef; |
622
|
|
|
|
|
|
|
my $new_subject_elem = undef; |
623
|
|
|
|
|
|
|
my $current_object_resource = undef; |
624
|
|
|
|
|
|
|
my $current_object_resource_elem = undef; |
625
|
|
|
|
|
|
|
my $typed_resource = undef; |
626
|
|
|
|
|
|
|
my $typed_resource_elem = undef; |
627
|
|
|
|
|
|
|
my $local_uri_mappings = $uri_mappings; |
628
|
|
|
|
|
|
|
my $local_term_mappings = $term_mappings; |
629
|
|
|
|
|
|
|
my $local_incomplete_triples = []; |
630
|
|
|
|
|
|
|
my $current_language = $language; |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
my $activity = 0; |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
# MOVED THIS SLIGHTLY EARLIER IN THE PROCESSING so that it can apply |
635
|
|
|
|
|
|
|
# to RDF/XML chunks. |
636
|
|
|
|
|
|
|
# |
637
|
|
|
|
|
|
|
# The [current element] is also parsed for any language information, and |
638
|
|
|
|
|
|
|
# if present, [current language] is set accordingly. |
639
|
|
|
|
|
|
|
# Language information can be provided using the general-purpose XML |
640
|
|
|
|
|
|
|
# attribute @xml:lang . |
641
|
|
|
|
|
|
|
if ($self->{options}{xhtml_lang} |
642
|
|
|
|
|
|
|
&& $current_element->hasAttribute('lang')) |
643
|
|
|
|
|
|
|
{ |
644
|
|
|
|
|
|
|
if ($self->_valid_lang( $current_element->getAttribute('lang') )) |
645
|
|
|
|
|
|
|
{ |
646
|
|
|
|
|
|
|
$current_language = $current_element->getAttribute('lang'); |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
else |
649
|
|
|
|
|
|
|
{ |
650
|
|
|
|
|
|
|
$self->_log_error( |
651
|
|
|
|
|
|
|
ERR_WARNING, |
652
|
|
|
|
|
|
|
ERR_CODE_LANG_INVALID, |
653
|
|
|
|
|
|
|
sprintf('Language code "%s" is not valid.', $current_element->getAtrribute('lang')), |
654
|
|
|
|
|
|
|
element => $current_element, |
655
|
|
|
|
|
|
|
lang => $current_element->getAttribute('lang'), |
656
|
|
|
|
|
|
|
) if $@; |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
if ($self->{options}{xml_lang} |
660
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe(XML_XML_NS, 'lang')) |
661
|
|
|
|
|
|
|
{ |
662
|
|
|
|
|
|
|
if ($self->_valid_lang( $current_element->getAttributeNsSafe(XML_XML_NS, 'lang') )) |
663
|
|
|
|
|
|
|
{ |
664
|
|
|
|
|
|
|
$current_language = $current_element->getAttributeNsSafe(XML_XML_NS, 'lang'); |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
else |
667
|
|
|
|
|
|
|
{ |
668
|
|
|
|
|
|
|
$self->_log_error( |
669
|
|
|
|
|
|
|
ERR_WARNING, |
670
|
|
|
|
|
|
|
ERR_CODE_LANG_INVALID, |
671
|
|
|
|
|
|
|
sprintf('Language code "%s" is not valid.', $current_element->getAttributeNsSafe(XML_XML_NS, 'lang')), |
672
|
|
|
|
|
|
|
element => $current_element, |
673
|
|
|
|
|
|
|
lang => $current_element->getAttributeNsSafe(XML_XML_NS, 'lang'), |
674
|
|
|
|
|
|
|
) if $@; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
# EXTENSION |
679
|
|
|
|
|
|
|
# xml:base - important for RDF/XML extension |
680
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe(XML_XML_NS, 'base')) |
681
|
|
|
|
|
|
|
{ |
682
|
|
|
|
|
|
|
my $old_base = $xml_base; |
683
|
|
|
|
|
|
|
$xml_base = $current_element->getAttributeNsSafe(XML_XML_NS, 'base'); |
684
|
|
|
|
|
|
|
$xml_base =~ s/#.*$//g; |
685
|
|
|
|
|
|
|
$xml_base = $self->uri($xml_base, |
686
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$old_base}); |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
my $hrefsrc_base = $base; |
689
|
|
|
|
|
|
|
if ($self->{options}{xml_base}==2 && defined $xml_base) |
690
|
|
|
|
|
|
|
{ |
691
|
|
|
|
|
|
|
$hrefsrc_base = $xml_base; |
692
|
|
|
|
|
|
|
} |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# EXTENSION |
695
|
|
|
|
|
|
|
# Parses embedded RDF/XML - mostly useful for non-XHTML documents, e.g. SVG. |
696
|
|
|
|
|
|
|
if ($self->{options}{embedded_rdfxml} |
697
|
|
|
|
|
|
|
&& $current_element->localname eq 'RDF' |
698
|
|
|
|
|
|
|
&& $current_element->namespaceURI eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#') |
699
|
|
|
|
|
|
|
{ |
700
|
|
|
|
|
|
|
return 1 if $self->{options}{embedded_rdfxml}==2; |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
my $g = $graph; |
703
|
|
|
|
|
|
|
unless ($self->{options}{embedded_rdfxml} == 3) |
704
|
|
|
|
|
|
|
{ |
705
|
|
|
|
|
|
|
$g = $self->bnode; |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
my $fake_lang = 0; |
709
|
|
|
|
|
|
|
unless ($current_element->hasAttributeNsSafe(XML_XML_NS, 'lang')) |
710
|
|
|
|
|
|
|
{ |
711
|
|
|
|
|
|
|
$current_element->setAttributeNS(XML_XML_NS, 'lang', $current_language); |
712
|
|
|
|
|
|
|
$fake_lang = 1; |
713
|
|
|
|
|
|
|
} |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
my $rdfxml_base = $self->{'origbase'}; |
716
|
|
|
|
|
|
|
$rdfxml_base = $base |
717
|
|
|
|
|
|
|
if $self->{options}{xhtml_base}==2; |
718
|
|
|
|
|
|
|
$rdfxml_base = $xml_base |
719
|
|
|
|
|
|
|
if defined $xml_base; |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
eval { |
722
|
|
|
|
|
|
|
my $_map; |
723
|
|
|
|
|
|
|
my $bnode_mapper = sub { |
724
|
|
|
|
|
|
|
my $orig = shift; |
725
|
|
|
|
|
|
|
$_map->{$orig} = $self->bnode |
726
|
|
|
|
|
|
|
unless defined $_map->{$orig}; |
727
|
|
|
|
|
|
|
return $_map->{$orig}; |
728
|
|
|
|
|
|
|
}; |
729
|
|
|
|
|
|
|
my $parser = RDF::Trine::Parser->new('rdfxml'); |
730
|
|
|
|
|
|
|
my $r = $parser->parse( |
731
|
|
|
|
|
|
|
$rdfxml_base, |
732
|
|
|
|
|
|
|
$current_element->toStringEC14N, |
733
|
|
|
|
|
|
|
sub { |
734
|
|
|
|
|
|
|
my $st = shift; |
735
|
|
|
|
|
|
|
my ($s, $p, @o); |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
$s = $st->subject->is_blank ? |
738
|
|
|
|
|
|
|
$bnode_mapper->($st->subject->blank_identifier) : |
739
|
|
|
|
|
|
|
$st->subject->uri_value ; |
740
|
|
|
|
|
|
|
$p = $st->predicate->uri_value ; |
741
|
|
|
|
|
|
|
if ($st->object->is_literal) |
742
|
|
|
|
|
|
|
{ |
743
|
|
|
|
|
|
|
@o = ( |
744
|
|
|
|
|
|
|
$st->object->literal_value, |
745
|
|
|
|
|
|
|
$st->object->literal_datatype, |
746
|
|
|
|
|
|
|
$st->object->literal_value_language, |
747
|
|
|
|
|
|
|
); |
748
|
|
|
|
|
|
|
$self->_insert_triple_literal({current=>$current_element}, |
749
|
|
|
|
|
|
|
$s, $p, @o, |
750
|
|
|
|
|
|
|
($self->{options}{graph} ? $g : undef)); |
751
|
|
|
|
|
|
|
} |
752
|
|
|
|
|
|
|
else |
753
|
|
|
|
|
|
|
{ |
754
|
|
|
|
|
|
|
push @o, $st->object->is_blank ? |
755
|
|
|
|
|
|
|
$bnode_mapper->($st->object->blank_identifier) : |
756
|
|
|
|
|
|
|
$st->object->uri_value; |
757
|
|
|
|
|
|
|
$self->_insert_triple_resource({current=>$current_element}, |
758
|
|
|
|
|
|
|
$s, $p, @o, |
759
|
|
|
|
|
|
|
($self->{options}{graph} ? $g : undef)); |
760
|
|
|
|
|
|
|
} |
761
|
|
|
|
|
|
|
}); |
762
|
|
|
|
|
|
|
}; |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
$self->_log_error( |
765
|
|
|
|
|
|
|
ERR_ERROR, |
766
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MESS, |
767
|
|
|
|
|
|
|
"Could not parse embedded RDF/XML content: ${@}", |
768
|
|
|
|
|
|
|
element => $current_element, |
769
|
|
|
|
|
|
|
) if $@; |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
$current_element->removeAttributeNS(XML_XML_NS, 'lang') |
772
|
|
|
|
|
|
|
if ($fake_lang); |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
return 1; |
775
|
|
|
|
|
|
|
} |
776
|
|
|
|
|
|
|
elsif ($current_element->localname eq 'RDF' |
777
|
|
|
|
|
|
|
and $current_element->namespaceURI eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#') |
778
|
|
|
|
|
|
|
{ |
779
|
|
|
|
|
|
|
$self->_log_error( |
780
|
|
|
|
|
|
|
ERR_WARNING, |
781
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MUDDLE, |
782
|
|
|
|
|
|
|
'Encountered embedded RDF/XML content, but not configured to parse or skip it.', |
783
|
|
|
|
|
|
|
element => $current_element, |
784
|
|
|
|
|
|
|
); |
785
|
|
|
|
|
|
|
} |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
# Next the [current element] is parsed for [URI mapping]s and these are |
788
|
|
|
|
|
|
|
# added to the [local list of URI mappings]. Note that a [URI mapping] |
789
|
|
|
|
|
|
|
# will simply overwrite any current mapping in the list that has the same |
790
|
|
|
|
|
|
|
# name |
791
|
|
|
|
|
|
|
# |
792
|
|
|
|
|
|
|
# Mappings are provided by @xmlns. The value to be mapped is set by |
793
|
|
|
|
|
|
|
# the XML namespace prefix, and the value to map is the value of the |
794
|
|
|
|
|
|
|
# attribute - a URI. Note that the URI is not processed in any way; |
795
|
|
|
|
|
|
|
# in particular if it is a relative path it is not resolved against |
796
|
|
|
|
|
|
|
# the current [base]. Authors are advised to follow best practice |
797
|
|
|
|
|
|
|
# for using namespaces, which includes not using relative paths. |
798
|
|
|
|
|
|
|
if ($self->{'options'}->{'xmlns_attr'}) |
799
|
|
|
|
|
|
|
{ |
800
|
|
|
|
|
|
|
foreach my $A ($current_element->getAttributes) |
801
|
|
|
|
|
|
|
{ |
802
|
|
|
|
|
|
|
my $attr = $A->getName; |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
if ($attr =~ /^xmlns\:(.+)$/i) |
805
|
|
|
|
|
|
|
{ |
806
|
|
|
|
|
|
|
my $pfx = $self->{'options'}->{'prefix_nocase_xmlns'} ? (lc $1) : $1; |
807
|
|
|
|
|
|
|
my $cls = $self->{'options'}->{'prefix_nocase_xmlns'} ? 'insensitive' : 'sensitive'; |
808
|
|
|
|
|
|
|
my $uri = $A->getValue; |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
if ($pfx =~ /^(xml|xmlns|_)$/i) |
811
|
|
|
|
|
|
|
{ |
812
|
|
|
|
|
|
|
$self->_log_error( |
813
|
|
|
|
|
|
|
ERR_ERROR, |
814
|
|
|
|
|
|
|
ERR_CODE_PREFIX_BUILTIN, |
815
|
|
|
|
|
|
|
"Attempt to redefine built-in CURIE prefix '$pfx' not allowed.", |
816
|
|
|
|
|
|
|
element => $current_element, |
817
|
|
|
|
|
|
|
prefix => $pfx, |
818
|
|
|
|
|
|
|
uri => $uri, |
819
|
|
|
|
|
|
|
); |
820
|
|
|
|
|
|
|
} |
821
|
|
|
|
|
|
|
elsif ($pfx !~ /^($XML::RegExp::NCName)$/) |
822
|
|
|
|
|
|
|
{ |
823
|
|
|
|
|
|
|
$self->_log_error( |
824
|
|
|
|
|
|
|
ERR_ERROR, |
825
|
|
|
|
|
|
|
ERR_CODE_PREFIX_ILLEGAL, |
826
|
|
|
|
|
|
|
"Attempt to define non-NCName CURIE prefix '$pfx' not allowed.", |
827
|
|
|
|
|
|
|
element => $current_element, |
828
|
|
|
|
|
|
|
prefix => $pfx, |
829
|
|
|
|
|
|
|
uri => $uri, |
830
|
|
|
|
|
|
|
); |
831
|
|
|
|
|
|
|
} |
832
|
|
|
|
|
|
|
elsif ($uri eq XML_XML_NS || $uri eq XML_XMLNS_NS) |
833
|
|
|
|
|
|
|
{ |
834
|
|
|
|
|
|
|
$self->_log_error( |
835
|
|
|
|
|
|
|
ERR_ERROR, |
836
|
|
|
|
|
|
|
ERR_CODE_PREFIX_BUILTIN, |
837
|
|
|
|
|
|
|
"Attempt to define any CURIE prefix for '$uri' not allowed using \@xmlns.", |
838
|
|
|
|
|
|
|
element => $current_element, |
839
|
|
|
|
|
|
|
prefix => $pfx, |
840
|
|
|
|
|
|
|
uri => $uri, |
841
|
|
|
|
|
|
|
); |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
else |
844
|
|
|
|
|
|
|
{ |
845
|
|
|
|
|
|
|
$self->{'sub'}->{'onprefix'}($self, $current_element, $pfx, $uri, $cls) |
846
|
|
|
|
|
|
|
if defined $self->{'sub'}->{'onprefix'}; |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
$local_uri_mappings->{$cls}->{$pfx} = $uri; |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
} |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
# RDFa 1.1 - @prefix support. |
855
|
|
|
|
|
|
|
# Note that this overwrites @xmlns:foo. |
856
|
|
|
|
|
|
|
if ($self->{'options'}->{'prefix_attr'} |
857
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'prefix')) |
858
|
|
|
|
|
|
|
{ |
859
|
|
|
|
|
|
|
my $pfx_attr = $current_element->getAttributeNsSafe($rdfans, 'prefix') . ' '; |
860
|
|
|
|
|
|
|
my @bits = split /[\s\r\n]+/, $pfx_attr; |
861
|
|
|
|
|
|
|
while (@bits) |
862
|
|
|
|
|
|
|
{ |
863
|
|
|
|
|
|
|
my ($bit1, $bit2, @rest) = @bits; |
864
|
|
|
|
|
|
|
@bits = @rest; |
865
|
|
|
|
|
|
|
$bit1 =~ s/:$//; |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
my $pfx = $self->{'options'}->{'prefix_nocase_attr'} ? (lc $bit1) : $bit1; |
868
|
|
|
|
|
|
|
my $cls = $self->{'options'}->{'prefix_nocase_attr'} ? 'insensitive' : 'sensitive'; |
869
|
|
|
|
|
|
|
my $uri = $bit2; |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
unless ($pfx =~ /^$XML::RegExp::NCName$/) |
872
|
|
|
|
|
|
|
{ |
873
|
|
|
|
|
|
|
$self->_log_error( |
874
|
|
|
|
|
|
|
ERR_ERROR, |
875
|
|
|
|
|
|
|
ERR_CODE_PREFIX_ILLEGAL, |
876
|
|
|
|
|
|
|
"Attempt to define non-NCName CURIE prefix '$pfx' not allowed.", |
877
|
|
|
|
|
|
|
element => $current_element, |
878
|
|
|
|
|
|
|
prefix => $pfx, |
879
|
|
|
|
|
|
|
uri => $uri, |
880
|
|
|
|
|
|
|
); |
881
|
|
|
|
|
|
|
next; |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
$self->{'sub'}->{'onprefix'}($self, $current_element, $pfx, $uri, $cls) |
885
|
|
|
|
|
|
|
if defined $self->{'sub'}->{'onprefix'}; |
886
|
|
|
|
|
|
|
$local_uri_mappings->{$cls}->{$pfx} = $uri; |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
} |
889
|
|
|
|
|
|
|
elsif ($current_element->hasAttributeNsSafe($rdfans, 'prefix')) |
890
|
|
|
|
|
|
|
{ |
891
|
|
|
|
|
|
|
$self->_log_error( |
892
|
|
|
|
|
|
|
ERR_WARNING, |
893
|
|
|
|
|
|
|
ERR_CODE_PREFIX_DISABLED, |
894
|
|
|
|
|
|
|
"\@prefix found, but support disabled.", |
895
|
|
|
|
|
|
|
element => $current_element, |
896
|
|
|
|
|
|
|
); |
897
|
|
|
|
|
|
|
} |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
# RDFa 1.1 - @vocab support |
900
|
|
|
|
|
|
|
if ($self->{options}{vocab_attr} |
901
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'vocab')) |
902
|
|
|
|
|
|
|
{ |
903
|
|
|
|
|
|
|
if ($current_element->getAttributeNsSafe($rdfans, 'vocab') eq '') |
904
|
|
|
|
|
|
|
{ |
905
|
|
|
|
|
|
|
$local_uri_mappings->{'(VOCAB)'} = $self->{options}{vocab_default}; |
906
|
|
|
|
|
|
|
} |
907
|
|
|
|
|
|
|
else |
908
|
|
|
|
|
|
|
{ |
909
|
|
|
|
|
|
|
$local_uri_mappings->{'(VOCAB)'} = $self->uri( |
910
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'vocab'), |
911
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$xml_base}); |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
} |
914
|
|
|
|
|
|
|
elsif ($current_element->hasAttributeNsSafe($rdfans, 'vocab')) |
915
|
|
|
|
|
|
|
{ |
916
|
|
|
|
|
|
|
$self->_log_error( |
917
|
|
|
|
|
|
|
ERR_WARNING, |
918
|
|
|
|
|
|
|
ERR_CODE_VOCAB_DISABLED, |
919
|
|
|
|
|
|
|
"\@vocab found, but support disabled.", |
920
|
|
|
|
|
|
|
element => $current_element, |
921
|
|
|
|
|
|
|
uri => $self->uri( |
922
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'vocab'), |
923
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$xml_base}), |
924
|
|
|
|
|
|
|
); |
925
|
|
|
|
|
|
|
} |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
# EXTENSION |
928
|
|
|
|
|
|
|
# KjetilK's named graphs. |
929
|
|
|
|
|
|
|
if ($self->{'options'}->{'graph'}) |
930
|
|
|
|
|
|
|
{ |
931
|
|
|
|
|
|
|
my ($xmlns, $attr) = ($self->{'options'}->{'graph_attr'} =~ /^(?:\{(.+)\})?(.+)$/); |
932
|
|
|
|
|
|
|
unless ($attr) |
933
|
|
|
|
|
|
|
{ |
934
|
|
|
|
|
|
|
$xmlns = $rdfans; |
935
|
|
|
|
|
|
|
$attr = 'graph'; |
936
|
|
|
|
|
|
|
} |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
if ($self->{'options'}->{'graph_type'} eq 'id' |
939
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($xmlns, $attr)) |
940
|
|
|
|
|
|
|
{ |
941
|
|
|
|
|
|
|
$graph = $self->uri('#' . $current_element->getAttributeNsSafe($xmlns, $attr), |
942
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base}); |
943
|
|
|
|
|
|
|
} |
944
|
|
|
|
|
|
|
elsif ($self->{'options'}->{'graph_type'} eq 'about' |
945
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($xmlns, $attr)) |
946
|
|
|
|
|
|
|
{ |
947
|
|
|
|
|
|
|
$graph = $self->_expand_curie( |
948
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($xmlns, $attr), |
949
|
|
|
|
|
|
|
element => $current_element, |
950
|
|
|
|
|
|
|
attribute => 'graph', |
951
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
952
|
|
|
|
|
|
|
terms => $local_term_mappings, |
953
|
|
|
|
|
|
|
xml_base => $xml_base, |
954
|
|
|
|
|
|
|
); |
955
|
|
|
|
|
|
|
$graph = $self->{'options'}->{'graph_default'} |
956
|
|
|
|
|
|
|
unless defined $graph; |
957
|
|
|
|
|
|
|
} |
958
|
|
|
|
|
|
|
} |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
if ($self->{options}{vocab_triple} |
961
|
|
|
|
|
|
|
and $self->{options}{vocab_attr} |
962
|
|
|
|
|
|
|
and $current_element->hasAttributeNsSafe($rdfans, 'vocab') |
963
|
|
|
|
|
|
|
and defined $local_uri_mappings->{'(VOCAB)'}) |
964
|
|
|
|
|
|
|
{ |
965
|
|
|
|
|
|
|
$self->_insert_triple_resource({ |
966
|
|
|
|
|
|
|
current => $current_element, |
967
|
|
|
|
|
|
|
subject => $current_element->ownerDocument->documentElement, |
968
|
|
|
|
|
|
|
predicate => $current_element, |
969
|
|
|
|
|
|
|
object => $current_element, |
970
|
|
|
|
|
|
|
graph => $graph_elem, |
971
|
|
|
|
|
|
|
}, |
972
|
|
|
|
|
|
|
$base, |
973
|
|
|
|
|
|
|
'http://www.w3.org/ns/rdfa#usesVocabulary', |
974
|
|
|
|
|
|
|
$local_uri_mappings->{'(VOCAB)'}, |
975
|
|
|
|
|
|
|
$graph); |
976
|
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
# EXTENSION: @role |
979
|
|
|
|
|
|
|
if ($self->{'options'}->{'role_attr'} |
980
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'role')) |
981
|
|
|
|
|
|
|
{ |
982
|
|
|
|
|
|
|
my @role = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'role') ); |
983
|
|
|
|
|
|
|
my @ROLE = map { |
984
|
|
|
|
|
|
|
my $x = $self->_expand_curie( |
985
|
|
|
|
|
|
|
$_, |
986
|
|
|
|
|
|
|
element => $current_element, |
987
|
|
|
|
|
|
|
attribute => 'role', |
988
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
989
|
|
|
|
|
|
|
terms => $local_term_mappings, |
990
|
|
|
|
|
|
|
xml_base => $xml_base, |
991
|
|
|
|
|
|
|
); |
992
|
|
|
|
|
|
|
defined $x ? ($x) : (); |
993
|
|
|
|
|
|
|
} @role; |
994
|
|
|
|
|
|
|
if (@ROLE) |
995
|
|
|
|
|
|
|
{ |
996
|
|
|
|
|
|
|
if ($current_element->hasAttribute('id') |
997
|
|
|
|
|
|
|
and !defined $self->{element_subjects}->{$current_element->nodePath}) |
998
|
|
|
|
|
|
|
{ |
999
|
|
|
|
|
|
|
$self->{element_subjects}->{$current_element->nodePath} = $self->uri(sprintf('#%s', |
1000
|
|
|
|
|
|
|
$current_element->getAttribute('id')), |
1001
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base}); |
1002
|
|
|
|
|
|
|
} |
1003
|
|
|
|
|
|
|
elsif (!defined $self->{element_subjects}->{$current_element->nodePath}) |
1004
|
|
|
|
|
|
|
{ |
1005
|
|
|
|
|
|
|
$self->{element_subjects}->{$current_element->nodePath} = $self->bnode; |
1006
|
|
|
|
|
|
|
} |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
foreach my $r (@ROLE) |
1009
|
|
|
|
|
|
|
{ |
1010
|
|
|
|
|
|
|
my $E = { |
1011
|
|
|
|
|
|
|
current => $current_element, |
1012
|
|
|
|
|
|
|
subject => $current_element, |
1013
|
|
|
|
|
|
|
predicate => $current_element, |
1014
|
|
|
|
|
|
|
object => $current_element, |
1015
|
|
|
|
|
|
|
graph => $graph_elem, |
1016
|
|
|
|
|
|
|
}; |
1017
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $self->{element_subjects}->{$current_element->nodePath}, 'http://www.w3.org/1999/xhtml/vocab#role', $r, $graph); |
1018
|
|
|
|
|
|
|
} |
1019
|
|
|
|
|
|
|
} |
1020
|
|
|
|
|
|
|
} |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
# EXTENSION: @cite |
1023
|
|
|
|
|
|
|
if ($self->{'options'}->{'cite_attr'} |
1024
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'cite')) |
1025
|
|
|
|
|
|
|
{ |
1026
|
|
|
|
|
|
|
my $citation = $self->uri( |
1027
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'cite'), |
1028
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base} |
1029
|
|
|
|
|
|
|
); |
1030
|
|
|
|
|
|
|
if (defined $citation) |
1031
|
|
|
|
|
|
|
{ |
1032
|
|
|
|
|
|
|
if ($current_element->hasAttribute('id') |
1033
|
|
|
|
|
|
|
and !defined $self->{element_subjects}->{$current_element->nodePath}) |
1034
|
|
|
|
|
|
|
{ |
1035
|
|
|
|
|
|
|
$self->{element_subjects}->{$current_element->nodePath} = $self->uri(sprintf('#%s', |
1036
|
|
|
|
|
|
|
$current_element->getAttribute('id')), |
1037
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base}); |
1038
|
|
|
|
|
|
|
} |
1039
|
|
|
|
|
|
|
elsif (!defined $self->{element_subjects}->{$current_element->nodePath}) |
1040
|
|
|
|
|
|
|
{ |
1041
|
|
|
|
|
|
|
$self->{element_subjects}->{$current_element->nodePath} = $self->bnode; |
1042
|
|
|
|
|
|
|
} |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
my $E = { |
1045
|
|
|
|
|
|
|
current => $current_element, |
1046
|
|
|
|
|
|
|
subject => $current_element, |
1047
|
|
|
|
|
|
|
predicate => $current_element, |
1048
|
|
|
|
|
|
|
object => $current_element, |
1049
|
|
|
|
|
|
|
graph => $graph_elem, |
1050
|
|
|
|
|
|
|
}; |
1051
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $self->{element_subjects}->{$current_element->nodePath}, 'http://www.w3.org/1999/xhtml/vocab#cite', $citation, $graph); |
1052
|
|
|
|
|
|
|
} |
1053
|
|
|
|
|
|
|
} |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
my @rel = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'rel') ); |
1056
|
|
|
|
|
|
|
my @rev = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'rev') ); |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
# EXTENSION: rel="alternate stylesheet" |
1059
|
|
|
|
|
|
|
if ($self->{options}{alt_stylesheet} |
1060
|
|
|
|
|
|
|
&& (grep /^alternate$/i, @rel) |
1061
|
|
|
|
|
|
|
&& (grep /^stylesheet$/i, @rel)) |
1062
|
|
|
|
|
|
|
{ |
1063
|
|
|
|
|
|
|
@rel = grep !/^(alternate|stylesheet)$/i, @rel; |
1064
|
|
|
|
|
|
|
push @rel, ':ALTERNATE-STYLESHEET'; |
1065
|
|
|
|
|
|
|
} |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
my @REL = map { |
1068
|
|
|
|
|
|
|
my $x = $self->_expand_curie( |
1069
|
|
|
|
|
|
|
$_, |
1070
|
|
|
|
|
|
|
element => $current_element, |
1071
|
|
|
|
|
|
|
attribute => 'rel', |
1072
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
1073
|
|
|
|
|
|
|
terms => $local_term_mappings, |
1074
|
|
|
|
|
|
|
xml_base => $xml_base, |
1075
|
|
|
|
|
|
|
); |
1076
|
|
|
|
|
|
|
defined $x ? ($x) : (); |
1077
|
|
|
|
|
|
|
} @rel; |
1078
|
|
|
|
|
|
|
my @REV = map { |
1079
|
|
|
|
|
|
|
my $x = $self->_expand_curie( |
1080
|
|
|
|
|
|
|
$_, |
1081
|
|
|
|
|
|
|
element => $current_element, |
1082
|
|
|
|
|
|
|
attribute => 'rev', |
1083
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
1084
|
|
|
|
|
|
|
terms => $local_term_mappings, |
1085
|
|
|
|
|
|
|
xml_base => $xml_base, |
1086
|
|
|
|
|
|
|
); |
1087
|
|
|
|
|
|
|
defined $x ? ($x) : (); |
1088
|
|
|
|
|
|
|
} @rev; |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
my $NEW_SUBJECT_ATTR_ABOUT = sub |
1091
|
|
|
|
|
|
|
{ |
1092
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'about')) |
1093
|
|
|
|
|
|
|
{ |
1094
|
|
|
|
|
|
|
my $s = $self->_expand_curie( |
1095
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'about'), |
1096
|
|
|
|
|
|
|
element => $current_element, |
1097
|
|
|
|
|
|
|
attribute => 'about', |
1098
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
1099
|
|
|
|
|
|
|
terms => $local_term_mappings, |
1100
|
|
|
|
|
|
|
xml_base => $xml_base, |
1101
|
|
|
|
|
|
|
); |
1102
|
|
|
|
|
|
|
my $e = $current_element; |
1103
|
|
|
|
|
|
|
return ($s, $e); |
1104
|
|
|
|
|
|
|
} |
1105
|
|
|
|
|
|
|
return; |
1106
|
|
|
|
|
|
|
}; |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
my $NEW_SUBJECT_ATTR_SRC = sub |
1109
|
|
|
|
|
|
|
{ |
1110
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'src')) |
1111
|
|
|
|
|
|
|
{ |
1112
|
|
|
|
|
|
|
my $s = $self->uri( |
1113
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'src'), |
1114
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base} |
1115
|
|
|
|
|
|
|
); |
1116
|
|
|
|
|
|
|
my $e = $current_element; |
1117
|
|
|
|
|
|
|
return ($s, $e); |
1118
|
|
|
|
|
|
|
} |
1119
|
|
|
|
|
|
|
return; |
1120
|
|
|
|
|
|
|
}; |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
my $NEW_SUBJECT_DEFAULTS = sub |
1123
|
|
|
|
|
|
|
{ |
1124
|
|
|
|
|
|
|
if ($current_element == $current_element->ownerDocument->documentElement) |
1125
|
|
|
|
|
|
|
{ |
1126
|
|
|
|
|
|
|
return ($self->uri(undef, {'element'=>$current_element,'xml_base'=>$hrefsrc_base}), $current_element); |
1127
|
|
|
|
|
|
|
} |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
# if the element is the head or body element then act as if |
1130
|
|
|
|
|
|
|
# there is an empty @about present, and process it according to |
1131
|
|
|
|
|
|
|
# the rule for @about, above; |
1132
|
|
|
|
|
|
|
if ($self->{options}{xhtml_elements} |
1133
|
|
|
|
|
|
|
&& ($current_element->namespaceURI eq 'http://www.w3.org/1999/xhtml') |
1134
|
|
|
|
|
|
|
&& ($current_element->tagName eq 'head' || $current_element->tagName eq 'body')) |
1135
|
|
|
|
|
|
|
{ |
1136
|
|
|
|
|
|
|
return ($parent_object, $parent_object_elem) |
1137
|
|
|
|
|
|
|
if $self->{options}{xhtml_elements}==2; |
1138
|
|
|
|
|
|
|
return ($self->uri(undef, {'element'=>$current_element,'xml_base'=>$hrefsrc_base}), $current_element); |
1139
|
|
|
|
|
|
|
} |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
# EXTENSION: atom elements |
1142
|
|
|
|
|
|
|
if ($self->{options}{atom_elements} |
1143
|
|
|
|
|
|
|
&& ($current_element->namespaceURI eq 'http://www.w3.org/2005/Atom') |
1144
|
|
|
|
|
|
|
&& ($current_element->tagName eq 'feed' || $current_element->tagName eq 'entry')) |
1145
|
|
|
|
|
|
|
{ |
1146
|
|
|
|
|
|
|
return ($self->_atom_magic($current_element), $current_element); |
1147
|
|
|
|
|
|
|
} |
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
return; |
1150
|
|
|
|
|
|
|
}; |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
my $NEW_SUBJECT_INHERIT = sub |
1153
|
|
|
|
|
|
|
{ |
1154
|
|
|
|
|
|
|
$skip_element = 1 |
1155
|
|
|
|
|
|
|
if shift |
1156
|
|
|
|
|
|
|
&& not $current_element->hasAttributeNsSafe($rdfans, 'property'); |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
return ($parent_object, $parent_object_elem) if $parent_object; |
1159
|
|
|
|
|
|
|
return; |
1160
|
|
|
|
|
|
|
}; |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
my $NEW_SUBJECT_ATTR_RESOURCE = sub |
1163
|
|
|
|
|
|
|
{ |
1164
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'resource')) |
1165
|
|
|
|
|
|
|
{ |
1166
|
|
|
|
|
|
|
my $s = $self->_expand_curie( |
1167
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'resource'), |
1168
|
|
|
|
|
|
|
element => $current_element, |
1169
|
|
|
|
|
|
|
attribute => 'resource', |
1170
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
1171
|
|
|
|
|
|
|
terms => $local_term_mappings, |
1172
|
|
|
|
|
|
|
xml_base => $xml_base, |
1173
|
|
|
|
|
|
|
); |
1174
|
|
|
|
|
|
|
return ($s, $current_element); |
1175
|
|
|
|
|
|
|
} |
1176
|
|
|
|
|
|
|
return; |
1177
|
|
|
|
|
|
|
}; |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
my $NEW_SUBJECT_ATTR_HREF = sub |
1180
|
|
|
|
|
|
|
{ |
1181
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'href')) |
1182
|
|
|
|
|
|
|
{ |
1183
|
|
|
|
|
|
|
my $s = $self->uri( |
1184
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'href'), |
1185
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base} |
1186
|
|
|
|
|
|
|
); |
1187
|
|
|
|
|
|
|
return ($s, $current_element); |
1188
|
|
|
|
|
|
|
} |
1189
|
|
|
|
|
|
|
return; |
1190
|
|
|
|
|
|
|
}; |
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
my $NEW_SUBJECT_ATTR_TYPEOF = sub |
1193
|
|
|
|
|
|
|
{ |
1194
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'typeof') |
1195
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'instanceof')) |
1196
|
|
|
|
|
|
|
{ |
1197
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'instanceof') |
1198
|
|
|
|
|
|
|
and not $current_element->hasAttributeNsSafe($rdfans, 'typeof')) |
1199
|
|
|
|
|
|
|
{ |
1200
|
|
|
|
|
|
|
$self->_log_error( |
1201
|
|
|
|
|
|
|
ERR_WARNING, |
1202
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_USED, |
1203
|
|
|
|
|
|
|
"Deprecated \@instanceof found; using it anyway.", |
1204
|
|
|
|
|
|
|
element => $current_element, |
1205
|
|
|
|
|
|
|
); |
1206
|
|
|
|
|
|
|
} |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
return ($self->bnode($current_element), $current_element); |
1209
|
|
|
|
|
|
|
} |
1210
|
|
|
|
|
|
|
return; |
1211
|
|
|
|
|
|
|
}; |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
# If the current element contains no @rel or @rev attribute, then the |
1214
|
|
|
|
|
|
|
# next step is to establish a value for new subject. This step has two |
1215
|
|
|
|
|
|
|
# possible alternatives. |
1216
|
|
|
|
|
|
|
# |
1217
|
|
|
|
|
|
|
# If the current element contains the @property attribute, but does not |
1218
|
|
|
|
|
|
|
# contain either the @content or @datatype attributes, then |
1219
|
|
|
|
|
|
|
# |
1220
|
|
|
|
|
|
|
if (!$current_element->hasAttributeNsSafe($rdfans, 'rel') |
1221
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'rev') |
1222
|
|
|
|
|
|
|
and $current_element->hasAttributeNsSafe($rdfans, 'property') |
1223
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'datatype') |
1224
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'content') |
1225
|
|
|
|
|
|
|
and $self->{options}{property_resources}) |
1226
|
|
|
|
|
|
|
{ |
1227
|
|
|
|
|
|
|
# new subject is set to the resource obtained from the first match |
1228
|
|
|
|
|
|
|
# from the following rule: |
1229
|
|
|
|
|
|
|
# |
1230
|
|
|
|
|
|
|
# - by using the resource from @about, if present, obtained according |
1231
|
|
|
|
|
|
|
# to the section on CURIE and IRI Processing; |
1232
|
|
|
|
|
|
|
# - otherwise, if the element is the root element of the document, then |
1233
|
|
|
|
|
|
|
# act as if there is an empty @about present, and process it according |
1234
|
|
|
|
|
|
|
# to the rule for @about, above; |
1235
|
|
|
|
|
|
|
# - otherwise, if parent object is present, new subject is set to the |
1236
|
|
|
|
|
|
|
# value of parent object. |
1237
|
|
|
|
|
|
|
# |
1238
|
|
|
|
|
|
|
# TOBYINK: we add @src to that for RDFa 1.0/1.1 mish-mashes. |
1239
|
|
|
|
|
|
|
# |
1240
|
|
|
|
|
|
|
foreach my $code ( |
1241
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_ABOUT, |
1242
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object}, |
1243
|
|
|
|
|
|
|
$NEW_SUBJECT_DEFAULTS, |
1244
|
|
|
|
|
|
|
$NEW_SUBJECT_INHERIT, |
1245
|
|
|
|
|
|
|
) { |
1246
|
|
|
|
|
|
|
($new_subject, $new_subject_elem) = $code->() unless $new_subject; |
1247
|
|
|
|
|
|
|
} |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
# If @typeof is present then typed resource is set to the resource |
1250
|
|
|
|
|
|
|
# obtained from the first match from the following rules: |
1251
|
|
|
|
|
|
|
# |
1252
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'typeof') |
1253
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'instanceof')) |
1254
|
|
|
|
|
|
|
{ |
1255
|
|
|
|
|
|
|
# - by using the resource from @about, if present, obtained |
1256
|
|
|
|
|
|
|
# according to the section on CURIE and IRI Processing; |
1257
|
|
|
|
|
|
|
# - otherwise, if the element is the root element of the |
1258
|
|
|
|
|
|
|
# document, then act as if there is an empty @about present |
1259
|
|
|
|
|
|
|
# and process it according to the previous rule; |
1260
|
|
|
|
|
|
|
# |
1261
|
|
|
|
|
|
|
foreach my $code ( |
1262
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_ABOUT, |
1263
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object}, |
1264
|
|
|
|
|
|
|
$NEW_SUBJECT_DEFAULTS, |
1265
|
|
|
|
|
|
|
) { |
1266
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = $code->() unless $typed_resource; |
1267
|
|
|
|
|
|
|
} |
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
# - otherwise, |
1270
|
|
|
|
|
|
|
unless ($typed_resource) |
1271
|
|
|
|
|
|
|
{ |
1272
|
|
|
|
|
|
|
# + by using the resource from @resource, if present, |
1273
|
|
|
|
|
|
|
# obtained according to the section on CURIE and IRI |
1274
|
|
|
|
|
|
|
# Processing; |
1275
|
|
|
|
|
|
|
# + otherwise, by using the IRI from @href, if present, |
1276
|
|
|
|
|
|
|
# obtained according to the section on CURIE and IRI |
1277
|
|
|
|
|
|
|
# Processing; |
1278
|
|
|
|
|
|
|
# + otherwise, by using the IRI from @src, if present, |
1279
|
|
|
|
|
|
|
# obtained according to the section on CURIE and IRI |
1280
|
|
|
|
|
|
|
# Processing; |
1281
|
|
|
|
|
|
|
# |
1282
|
|
|
|
|
|
|
foreach my $code ( |
1283
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_RESOURCE, |
1284
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_HREF, |
1285
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!!$self->{options}{src_sets_object}, |
1286
|
|
|
|
|
|
|
) { |
1287
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = $code->() unless $typed_resource; |
1288
|
|
|
|
|
|
|
} |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
# + otherwise, the value of typed resource is set to a |
1291
|
|
|
|
|
|
|
# newly created bnode. |
1292
|
|
|
|
|
|
|
# |
1293
|
|
|
|
|
|
|
unless ($typed_resource) |
1294
|
|
|
|
|
|
|
{ |
1295
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = |
1296
|
|
|
|
|
|
|
($self->bnode($current_element), $current_element); |
1297
|
|
|
|
|
|
|
} |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
# + The value of the current object resource is then set |
1300
|
|
|
|
|
|
|
# to the value of typed resource. |
1301
|
|
|
|
|
|
|
# |
1302
|
|
|
|
|
|
|
($current_object_resource, $current_object_resource_elem) = |
1303
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem); |
1304
|
|
|
|
|
|
|
} |
1305
|
|
|
|
|
|
|
} |
1306
|
|
|
|
|
|
|
} |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
# otherwise |
1309
|
|
|
|
|
|
|
elsif (!$current_element->hasAttributeNsSafe($rdfans, 'rel') |
1310
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'rev')) |
1311
|
|
|
|
|
|
|
{ |
1312
|
|
|
|
|
|
|
# - If the element contains an @about, @href, @src, or @resource |
1313
|
|
|
|
|
|
|
# attribute, new subject is set to the resource obtained as |
1314
|
|
|
|
|
|
|
# follows: |
1315
|
|
|
|
|
|
|
# + by using the resource from @about, if present, obtained |
1316
|
|
|
|
|
|
|
# according to the section on CURIE and IRI Processing; |
1317
|
|
|
|
|
|
|
# + otherwise, by using the resource from @resource, if |
1318
|
|
|
|
|
|
|
# present, obtained according to the section on CURIE and |
1319
|
|
|
|
|
|
|
# IRI Processing; |
1320
|
|
|
|
|
|
|
# + otherwise, by using the IRI from @href, if present, |
1321
|
|
|
|
|
|
|
# obtained according to the section on CURIE and IRI |
1322
|
|
|
|
|
|
|
# Processing; |
1323
|
|
|
|
|
|
|
# + otherwise, by using the IRI from @src, if present, |
1324
|
|
|
|
|
|
|
# obtained according to the section on CURIE and IRI |
1325
|
|
|
|
|
|
|
# Processing. |
1326
|
|
|
|
|
|
|
# - otherwise, if no resource is provided by a resource |
1327
|
|
|
|
|
|
|
# attribute, then the first match from the following rules |
1328
|
|
|
|
|
|
|
# will apply: |
1329
|
|
|
|
|
|
|
# + if the element is the root element of the document, |
1330
|
|
|
|
|
|
|
# then act as if there is an empty @about present, and |
1331
|
|
|
|
|
|
|
# process it according to the rule for @about, above; |
1332
|
|
|
|
|
|
|
# + otherwise, if @typeof is present, then new subject is |
1333
|
|
|
|
|
|
|
# set to be a newly created bnode; |
1334
|
|
|
|
|
|
|
# + otherwise, if parent object is present, new subject is |
1335
|
|
|
|
|
|
|
# set to the value of parent object. Additionally, if |
1336
|
|
|
|
|
|
|
# @property is not present then the skip element flag is |
1337
|
|
|
|
|
|
|
# set to 'true'. |
1338
|
|
|
|
|
|
|
# |
1339
|
|
|
|
|
|
|
my $i; |
1340
|
|
|
|
|
|
|
foreach my $code ( |
1341
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_ABOUT, |
1342
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object}, |
1343
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_RESOURCE, |
1344
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_HREF, |
1345
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!!$self->{options}{src_sets_object}, |
1346
|
|
|
|
|
|
|
$NEW_SUBJECT_DEFAULTS, |
1347
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_TYPEOF, |
1348
|
|
|
|
|
|
|
sub { $NEW_SUBJECT_INHERIT->(1) }, |
1349
|
|
|
|
|
|
|
) { |
1350
|
|
|
|
|
|
|
last if $new_subject; |
1351
|
|
|
|
|
|
|
($new_subject, $new_subject_elem) = $code->(); |
1352
|
|
|
|
|
|
|
} |
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
# if ($current_element->{'x-foo'}) |
1355
|
|
|
|
|
|
|
# { |
1356
|
|
|
|
|
|
|
# use Data::Dumper; |
1357
|
|
|
|
|
|
|
# print Dumper \%args; |
1358
|
|
|
|
|
|
|
# } |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
# - Finally, if @typeof is present, set the typed resource |
1361
|
|
|
|
|
|
|
# to the value of new subject. |
1362
|
|
|
|
|
|
|
# |
1363
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'typeof') |
1364
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'instanceof')) |
1365
|
|
|
|
|
|
|
{ |
1366
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = ($new_subject, $new_subject_elem); |
1367
|
|
|
|
|
|
|
} |
1368
|
|
|
|
|
|
|
} |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
# If the [current element] does contain a valid @rel or @rev URI, obtained |
1371
|
|
|
|
|
|
|
# according to the section on CURIE and URI Processing, then the next step |
1372
|
|
|
|
|
|
|
# is to establish both a value for [new subject] and a value for [current |
1373
|
|
|
|
|
|
|
# object resource]: |
1374
|
|
|
|
|
|
|
else |
1375
|
|
|
|
|
|
|
{ |
1376
|
|
|
|
|
|
|
foreach my $code ( |
1377
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_ABOUT, |
1378
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object}, |
1379
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_TYPEOF) x!$self->{options}{typeof_resources}, |
1380
|
|
|
|
|
|
|
$NEW_SUBJECT_DEFAULTS, |
1381
|
|
|
|
|
|
|
$NEW_SUBJECT_INHERIT, |
1382
|
|
|
|
|
|
|
) { |
1383
|
|
|
|
|
|
|
($new_subject, $new_subject_elem) = $code->() unless $new_subject; |
1384
|
|
|
|
|
|
|
} |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
foreach my $code ( |
1387
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_RESOURCE, |
1388
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_HREF, |
1389
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!!$self->{options}{src_sets_object}, |
1390
|
|
|
|
|
|
|
) { |
1391
|
|
|
|
|
|
|
($current_object_resource, $current_object_resource_elem) = $code->() unless $current_object_resource; |
1392
|
|
|
|
|
|
|
} |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'typeof') |
1395
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'instanceof')) |
1396
|
|
|
|
|
|
|
{ |
1397
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'about')) |
1398
|
|
|
|
|
|
|
{ |
1399
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = ($new_subject, $new_subject_elem); |
1400
|
|
|
|
|
|
|
} |
1401
|
|
|
|
|
|
|
elsif ($self->{options}{typeof_resources}) |
1402
|
|
|
|
|
|
|
{ |
1403
|
|
|
|
|
|
|
($current_object_resource, $current_object_resource_elem) = |
1404
|
|
|
|
|
|
|
($self->bnode($current_element), $current_element) |
1405
|
|
|
|
|
|
|
unless $current_object_resource; |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = ($current_object_resource, $current_object_resource_elem); |
1408
|
|
|
|
|
|
|
} |
1409
|
|
|
|
|
|
|
else |
1410
|
|
|
|
|
|
|
{ |
1411
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = ($new_subject, $new_subject_elem); |
1412
|
|
|
|
|
|
|
} |
1413
|
|
|
|
|
|
|
} |
1414
|
|
|
|
|
|
|
} |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
# # NOTE: x876587 |
1417
|
|
|
|
|
|
|
# if (!defined $new_subject |
1418
|
|
|
|
|
|
|
# and $current_element->nodePath eq $self->dom->documentElement->nodePath) |
1419
|
|
|
|
|
|
|
# { |
1420
|
|
|
|
|
|
|
# $new_subject = $self->uri(''); |
1421
|
|
|
|
|
|
|
# $new_subject_elem = $self->dom->documentElement; |
1422
|
|
|
|
|
|
|
# $skip_element = 1 |
1423
|
|
|
|
|
|
|
# unless $current_element->hasAttributeNsSafe($rdfans, 'property'); |
1424
|
|
|
|
|
|
|
# } |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
# If in any of the previous steps a [typed resource] was set to a non-null |
1427
|
|
|
|
|
|
|
# value, it is now used to provide a subject for type values |
1428
|
|
|
|
|
|
|
if ($typed_resource |
1429
|
|
|
|
|
|
|
&& ( $current_element->hasAttributeNsSafe($rdfans, 'instanceof') |
1430
|
|
|
|
|
|
|
|| $current_element->hasAttributeNsSafe($rdfans, 'typeof'))) |
1431
|
|
|
|
|
|
|
{ |
1432
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'instanceof') |
1434
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'typeof')) |
1435
|
|
|
|
|
|
|
{ |
1436
|
|
|
|
|
|
|
$self->_log_error( |
1437
|
|
|
|
|
|
|
ERR_WARNING, |
1438
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_OVERRULED, |
1439
|
|
|
|
|
|
|
"Deprecated \@instanceof found; ignored because \@typeof also present.", |
1440
|
|
|
|
|
|
|
element => $current_element, |
1441
|
|
|
|
|
|
|
); |
1442
|
|
|
|
|
|
|
} |
1443
|
|
|
|
|
|
|
elsif ($current_element->hasAttributeNsSafe($rdfans, 'instanceof')) |
1444
|
|
|
|
|
|
|
{ |
1445
|
|
|
|
|
|
|
$self->_log_error( |
1446
|
|
|
|
|
|
|
ERR_WARNING, |
1447
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_USED, |
1448
|
|
|
|
|
|
|
"Deprecated \@instanceof found; using it anyway.", |
1449
|
|
|
|
|
|
|
element => $current_element, |
1450
|
|
|
|
|
|
|
); |
1451
|
|
|
|
|
|
|
} |
1452
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
# One or more 'types' for the [ new subject ] can be set by using |
1454
|
|
|
|
|
|
|
# @instanceof. If present, the attribute must contain one or more |
1455
|
|
|
|
|
|
|
# URIs, obtained according to the section on URI and CURIE Processing... |
1456
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
my @instanceof = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'typeof') |
1458
|
|
|
|
|
|
|
|| $current_element->getAttributeNsSafe($rdfans, 'instanceof') ); |
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
foreach my $curie (@instanceof) |
1461
|
|
|
|
|
|
|
{ |
1462
|
|
|
|
|
|
|
my $rdftype = $self->_expand_curie( |
1463
|
|
|
|
|
|
|
$curie, |
1464
|
|
|
|
|
|
|
element => $current_element, |
1465
|
|
|
|
|
|
|
attribute => 'typeof', |
1466
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
1467
|
|
|
|
|
|
|
terms => $local_term_mappings, |
1468
|
|
|
|
|
|
|
xml_base => $xml_base, |
1469
|
|
|
|
|
|
|
); |
1470
|
|
|
|
|
|
|
next unless defined $rdftype; |
1471
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
# ... each of which is used to generate a triple as follows: |
1473
|
|
|
|
|
|
|
# |
1474
|
|
|
|
|
|
|
# subject |
1475
|
|
|
|
|
|
|
# [new subject] |
1476
|
|
|
|
|
|
|
# predicate |
1477
|
|
|
|
|
|
|
# http://www.w3.org/1999/02/22-rdf-syntax-ns#type |
1478
|
|
|
|
|
|
|
# object |
1479
|
|
|
|
|
|
|
# full URI of 'type' |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
my $E = { # provenance tracking |
1482
|
|
|
|
|
|
|
current => $current_element, |
1483
|
|
|
|
|
|
|
subject => $typed_resource_elem, |
1484
|
|
|
|
|
|
|
predicate => $current_element, |
1485
|
|
|
|
|
|
|
object => $current_element, |
1486
|
|
|
|
|
|
|
graph => $graph_elem, |
1487
|
|
|
|
|
|
|
}; |
1488
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $typed_resource, RDF_TYPE, $rdftype, $graph); |
1489
|
|
|
|
|
|
|
$activity++; |
1490
|
|
|
|
|
|
|
} |
1491
|
|
|
|
|
|
|
} |
1492
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
# EXTENSION: @longdesc |
1494
|
|
|
|
|
|
|
if ($self->{'options'}->{'longdesc_attr'} |
1495
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'longdesc')) |
1496
|
|
|
|
|
|
|
{ |
1497
|
|
|
|
|
|
|
my $longdesc = $self->uri( |
1498
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'longdesc'), |
1499
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base} |
1500
|
|
|
|
|
|
|
); |
1501
|
|
|
|
|
|
|
if (defined $longdesc) |
1502
|
|
|
|
|
|
|
{ |
1503
|
|
|
|
|
|
|
my $E = { |
1504
|
|
|
|
|
|
|
current => $new_subject_elem, |
1505
|
|
|
|
|
|
|
subject => $current_element, |
1506
|
|
|
|
|
|
|
predicate => $current_element, |
1507
|
|
|
|
|
|
|
object => $current_element, |
1508
|
|
|
|
|
|
|
graph => $graph_elem, |
1509
|
|
|
|
|
|
|
}; |
1510
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $new_subject, 'http://www.w3.org/2007/05/powder-s#describedby', $longdesc, $graph); |
1511
|
|
|
|
|
|
|
} |
1512
|
|
|
|
|
|
|
} |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
# If in any of the previous steps a new subject was set to a non-null value |
1515
|
|
|
|
|
|
|
# different from the parent object; The list mapping taken from the |
1516
|
|
|
|
|
|
|
# evaluation context is set to a new, empty mapping. |
1517
|
|
|
|
|
|
|
if (defined $new_subject |
1518
|
|
|
|
|
|
|
and $new_subject ne $parent_subject || !%$list_mappings) |
1519
|
|
|
|
|
|
|
{ |
1520
|
|
|
|
|
|
|
$list_mappings = { |
1521
|
|
|
|
|
|
|
'::meta' => { |
1522
|
|
|
|
|
|
|
id => Data::UUID->new->create_str, |
1523
|
|
|
|
|
|
|
owner => $current_element, |
1524
|
|
|
|
|
|
|
}, |
1525
|
|
|
|
|
|
|
}; |
1526
|
|
|
|
|
|
|
} |
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
# If in any of the previous steps a [current object resource] was set to |
1529
|
|
|
|
|
|
|
# a non-null value, it is now used to generate triples and add entries to |
1530
|
|
|
|
|
|
|
# the local list mapping |
1531
|
|
|
|
|
|
|
if ($current_object_resource) |
1532
|
|
|
|
|
|
|
{ |
1533
|
|
|
|
|
|
|
# If the element contains both the inlist and the rel attributes: the |
1534
|
|
|
|
|
|
|
# rel may contain one or more IRIs, obtained according to the section |
1535
|
|
|
|
|
|
|
# on CURIE and IRI Processing each of which is used to add an entry to |
1536
|
|
|
|
|
|
|
# the list mapping as follows: |
1537
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'inlist') |
1538
|
|
|
|
|
|
|
and $current_element->hasAttributeNsSafe($rdfans, 'rel')) |
1539
|
|
|
|
|
|
|
{ |
1540
|
|
|
|
|
|
|
foreach my $r (@REL) |
1541
|
|
|
|
|
|
|
{ |
1542
|
|
|
|
|
|
|
# if the local list mapping does not contain a list associated with |
1543
|
|
|
|
|
|
|
# the IRI, instantiate a new list and add to local list mappings |
1544
|
|
|
|
|
|
|
$list_mappings->{$r} = [] unless defined $list_mappings->{$r}; |
1545
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
# add the current object resource to the list associated with the IRI |
1547
|
|
|
|
|
|
|
# in the local list mapping |
1548
|
|
|
|
|
|
|
push @{ $list_mappings->{$r} }, [resource => $current_object_resource]; |
1549
|
|
|
|
|
|
|
$activity++; |
1550
|
|
|
|
|
|
|
} |
1551
|
|
|
|
|
|
|
} |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
# XXX:@inlist doesn't support @rev? |
1554
|
|
|
|
|
|
|
# |
1555
|
|
|
|
|
|
|
# if ($current_element->hasAttributeNsSafe($rdfans, 'inlist') |
1556
|
|
|
|
|
|
|
# and $current_element->hasAttributeNsSafe($rdfans, 'rev')) |
1557
|
|
|
|
|
|
|
# { |
1558
|
|
|
|
|
|
|
# foreach my $r (@REV) |
1559
|
|
|
|
|
|
|
# { |
1560
|
|
|
|
|
|
|
# # if the local list mapping does not contain a list associated with |
1561
|
|
|
|
|
|
|
# # the IRI, instantiate a new list and add to local list mappings |
1562
|
|
|
|
|
|
|
# $list_mappings->{'REV:'.$r} = [] unless defined $list_mappings->{'REV:'.$r}; |
1563
|
|
|
|
|
|
|
# |
1564
|
|
|
|
|
|
|
# # add the current object resource to the list associated with the IRI |
1565
|
|
|
|
|
|
|
# # in the local list mapping |
1566
|
|
|
|
|
|
|
# push @{ $list_mappings->{'REV:'.$r} }, [resource => $current_object_resource]; |
1567
|
|
|
|
|
|
|
# } |
1568
|
|
|
|
|
|
|
# } |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
my $E = { # provenance tracking |
1571
|
|
|
|
|
|
|
current => $current_element, |
1572
|
|
|
|
|
|
|
subject => $new_subject_elem, |
1573
|
|
|
|
|
|
|
predicate => $current_element, |
1574
|
|
|
|
|
|
|
object => $current_object_resource_elem, |
1575
|
|
|
|
|
|
|
graph => $graph_elem, |
1576
|
|
|
|
|
|
|
}; |
1577
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
# Predicates for the [ current object resource ] can be set by |
1579
|
|
|
|
|
|
|
# using one or both of the @rel and @rev attributes, but, in |
1580
|
|
|
|
|
|
|
# case of the @rel attribute, only if the @inlist is not present: |
1581
|
|
|
|
|
|
|
# |
1582
|
|
|
|
|
|
|
# * If present, @rel will contain one or more URIs, obtained |
1583
|
|
|
|
|
|
|
# according to the section on CURIE and URI Processing each |
1584
|
|
|
|
|
|
|
# of which is used to generate a triple as follows: |
1585
|
|
|
|
|
|
|
# |
1586
|
|
|
|
|
|
|
# subject |
1587
|
|
|
|
|
|
|
# [new subject] |
1588
|
|
|
|
|
|
|
# predicate |
1589
|
|
|
|
|
|
|
# full URI |
1590
|
|
|
|
|
|
|
# object |
1591
|
|
|
|
|
|
|
# [current object resource] |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
unless ($current_element->hasAttributeNsSafe($rdfans, 'inlist')) |
1594
|
|
|
|
|
|
|
{ |
1595
|
|
|
|
|
|
|
foreach my $r (@REL) |
1596
|
|
|
|
|
|
|
{ |
1597
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $new_subject, $r, $current_object_resource, $graph); |
1598
|
|
|
|
|
|
|
$activity++; |
1599
|
|
|
|
|
|
|
} |
1600
|
|
|
|
|
|
|
} |
1601
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
# * If present, @rev will contain one or more URIs, obtained |
1603
|
|
|
|
|
|
|
# according to the section on CURIE and URI Processing each |
1604
|
|
|
|
|
|
|
# of which is used to generate a triple as follows: |
1605
|
|
|
|
|
|
|
# |
1606
|
|
|
|
|
|
|
# subject |
1607
|
|
|
|
|
|
|
# [current object resource] |
1608
|
|
|
|
|
|
|
# predicate |
1609
|
|
|
|
|
|
|
# full URI |
1610
|
|
|
|
|
|
|
# object |
1611
|
|
|
|
|
|
|
# [new subject] |
1612
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
$E = { # provenance tracking |
1614
|
|
|
|
|
|
|
current => $current_element, |
1615
|
|
|
|
|
|
|
subject => $current_object_resource_elem, |
1616
|
|
|
|
|
|
|
predicate => $current_element, |
1617
|
|
|
|
|
|
|
object => $new_subject_elem, |
1618
|
|
|
|
|
|
|
graph => $graph_elem, |
1619
|
|
|
|
|
|
|
}; |
1620
|
|
|
|
|
|
|
foreach my $r (@REV) |
1621
|
|
|
|
|
|
|
{ |
1622
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $current_object_resource, $r, $new_subject, $graph); |
1623
|
|
|
|
|
|
|
$activity++; |
1624
|
|
|
|
|
|
|
} |
1625
|
|
|
|
|
|
|
} |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
# If however [current object resource] was set to null, but there are |
1628
|
|
|
|
|
|
|
# predicates present, then they must be stored as [incomplete triple]s, |
1629
|
|
|
|
|
|
|
# pending the discovery of a subject that can be used as the object. Also, |
1630
|
|
|
|
|
|
|
# [current object resource] should be set to a newly created [bnode] |
1631
|
|
|
|
|
|
|
elsif ((scalar @REL) || (scalar @REV)) |
1632
|
|
|
|
|
|
|
{ |
1633
|
|
|
|
|
|
|
# Predicates for [incomplete triple]s can be set by using one or |
1634
|
|
|
|
|
|
|
# both of the @rel and @rev attributes: |
1635
|
|
|
|
|
|
|
# |
1636
|
|
|
|
|
|
|
# * If present, @rel must contain one or more URIs, obtained |
1637
|
|
|
|
|
|
|
# according to the section on CURIE and URI Processing each |
1638
|
|
|
|
|
|
|
# of which is added to the [local list of incomplete triples] |
1639
|
|
|
|
|
|
|
# as follows: |
1640
|
|
|
|
|
|
|
# |
1641
|
|
|
|
|
|
|
# predicate |
1642
|
|
|
|
|
|
|
# full URI |
1643
|
|
|
|
|
|
|
# direction |
1644
|
|
|
|
|
|
|
# forward |
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
push @$local_incomplete_triples, |
1647
|
|
|
|
|
|
|
map { |
1648
|
|
|
|
|
|
|
$current_element->hasAttributeNsSafe($rdfans, 'inlist') |
1649
|
|
|
|
|
|
|
?{ |
1650
|
|
|
|
|
|
|
list => do { $list_mappings->{$_} = [] unless defined $list_mappings->{$_}; $list_mappings->{$_} }, |
1651
|
|
|
|
|
|
|
direction => 'none', |
1652
|
|
|
|
|
|
|
} |
1653
|
|
|
|
|
|
|
:{ |
1654
|
|
|
|
|
|
|
predicate => $_, |
1655
|
|
|
|
|
|
|
direction => 'forward', |
1656
|
|
|
|
|
|
|
graph => $graph, |
1657
|
|
|
|
|
|
|
predicate_element => $current_element, |
1658
|
|
|
|
|
|
|
graph_element => $graph_elem, |
1659
|
|
|
|
|
|
|
} |
1660
|
|
|
|
|
|
|
} @REL; |
1661
|
|
|
|
|
|
|
|
1662
|
|
|
|
|
|
|
# * If present, @rev must contain one or more URIs, obtained |
1663
|
|
|
|
|
|
|
# according to the section on CURIE and URI Processing, each |
1664
|
|
|
|
|
|
|
# of which is added to the [local list of incomplete triples] |
1665
|
|
|
|
|
|
|
# as follows: |
1666
|
|
|
|
|
|
|
# |
1667
|
|
|
|
|
|
|
# predicate |
1668
|
|
|
|
|
|
|
# full URI |
1669
|
|
|
|
|
|
|
# direction |
1670
|
|
|
|
|
|
|
# reverse |
1671
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
push @$local_incomplete_triples, |
1673
|
|
|
|
|
|
|
map { |
1674
|
|
|
|
|
|
|
# $current_element->hasAttributeNsSafe($rdfans, 'inlist') |
1675
|
|
|
|
|
|
|
# ?{ |
1676
|
|
|
|
|
|
|
# list => do { $list_mappings->{'REV:'.$_} = [] unless defined $list_mappings->{'REV:'.$_}; $list_mappings->{'REV:'.$_}; }, |
1677
|
|
|
|
|
|
|
# direction => 'none', |
1678
|
|
|
|
|
|
|
# } |
1679
|
|
|
|
|
|
|
# :{ |
1680
|
|
|
|
|
|
|
+{ |
1681
|
|
|
|
|
|
|
predicate => $_, |
1682
|
|
|
|
|
|
|
direction => 'reverse', |
1683
|
|
|
|
|
|
|
graph => $graph, |
1684
|
|
|
|
|
|
|
predicate_element => $current_element, |
1685
|
|
|
|
|
|
|
graph_element => $graph_elem, |
1686
|
|
|
|
|
|
|
} |
1687
|
|
|
|
|
|
|
} @REV; |
1688
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
$current_object_resource = $self->bnode; |
1690
|
|
|
|
|
|
|
$current_object_resource_elem = $current_element; |
1691
|
|
|
|
|
|
|
} |
1692
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
# The next step of the iteration is to establish any [current |
1694
|
|
|
|
|
|
|
# property value] |
1695
|
|
|
|
|
|
|
my @current_property_value; |
1696
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
my @prop = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'property') ); |
1698
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
my $has_datatype = 0; |
1700
|
|
|
|
|
|
|
my $datatype = undef; |
1701
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'datatype')) |
1702
|
|
|
|
|
|
|
{ |
1703
|
|
|
|
|
|
|
$has_datatype = 1; |
1704
|
|
|
|
|
|
|
$datatype = $self->_expand_curie( |
1705
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'datatype'), |
1706
|
|
|
|
|
|
|
element => $current_element, |
1707
|
|
|
|
|
|
|
attribute => 'datatype', |
1708
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
1709
|
|
|
|
|
|
|
terms => $local_term_mappings, |
1710
|
|
|
|
|
|
|
xml_base => $xml_base, |
1711
|
|
|
|
|
|
|
); |
1712
|
|
|
|
|
|
|
} |
1713
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
if (@prop) |
1715
|
|
|
|
|
|
|
{ |
1716
|
|
|
|
|
|
|
# Predicates for the [current object literal] can be set by using |
1717
|
|
|
|
|
|
|
# @property. If present, one or more URIs are obtained according |
1718
|
|
|
|
|
|
|
# to the section on CURIE and URI Processing and then the actual |
1719
|
|
|
|
|
|
|
# literal value is obtained as follows: |
1720
|
|
|
|
|
|
|
|
1721
|
|
|
|
|
|
|
# HTML+RDFa |
1722
|
|
|
|
|
|
|
if ($self->{options}{datetime_attr} |
1723
|
|
|
|
|
|
|
and ( |
1724
|
|
|
|
|
|
|
$current_element->hasAttributeNsSafe($rdfans, 'datetime') |
1725
|
|
|
|
|
|
|
or $current_element->namespaceURI eq 'http://www.w3.org/1999/xhtml' |
1726
|
|
|
|
|
|
|
&& lc($current_element->tagName) eq 'time' |
1727
|
|
|
|
|
|
|
)) { |
1728
|
|
|
|
|
|
|
@current_property_value = ( |
1729
|
|
|
|
|
|
|
$current_element->hasAttributeNsSafe($rdfans, 'datetime') |
1730
|
|
|
|
|
|
|
? $current_element->getAttributeNsSafe($rdfans, 'datetime') |
1731
|
|
|
|
|
|
|
: $self->_element_to_string($current_element) |
1732
|
|
|
|
|
|
|
); |
1733
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
push @current_property_value, do |
1735
|
|
|
|
|
|
|
{ |
1736
|
|
|
|
|
|
|
local $_ = $current_property_value[0]; |
1737
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
if (!!$has_datatype == !!1) |
1739
|
|
|
|
|
|
|
{ $datatype } |
1740
|
|
|
|
|
|
|
elsif (/^(\-?\d{4,})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2})(?:\.\d+)?)?(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
1741
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#dateTime' } |
1742
|
|
|
|
|
|
|
elsif (/^(\d{2}):(\d{2})(:(\d{2})(?:\.\d+)?)?(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
1743
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#time' } |
1744
|
|
|
|
|
|
|
elsif (/^(\-?\d{4,})-(\d{2})-(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
1745
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#date' } |
1746
|
|
|
|
|
|
|
elsif (/^(\-?\d{4,})-(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
1747
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#gYearMonth' } # XXX: not in spec! |
1748
|
|
|
|
|
|
|
elsif (/^(\-?\d{4,})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
1749
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#gYear' } # XXX: not in spec! |
1750
|
|
|
|
|
|
|
elsif (/^--(\d{2})-(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
1751
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#gMonthDay' } # XXX: not in spec! |
1752
|
|
|
|
|
|
|
elsif (/^---(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
1753
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#gDay' } # XXX: not in spec! |
1754
|
|
|
|
|
|
|
elsif (/^--(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
1755
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#gMonth' } # XXX: not in spec! |
1756
|
|
|
|
|
|
|
elsif (/^P([\d\.]+Y)?([\d\.]+M)?([\d\.]+D)?(T([\d\.]+H)?([\d\.]+M)?([\d\.]+S)?)?$/i) |
1757
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#duration' } |
1758
|
|
|
|
|
|
|
else |
1759
|
|
|
|
|
|
|
{ undef } |
1760
|
|
|
|
|
|
|
}, $current_language; |
1761
|
|
|
|
|
|
|
} |
1762
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
# HTML+RDFa |
1764
|
|
|
|
|
|
|
elsif ($self->{options}{value_attr} |
1765
|
|
|
|
|
|
|
and $current_element->hasAttributeNsSafe($rdfans, 'value')) |
1766
|
|
|
|
|
|
|
{ |
1767
|
|
|
|
|
|
|
@current_property_value = ( |
1768
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'value'), |
1769
|
|
|
|
|
|
|
($has_datatype ? $datatype : undef), |
1770
|
|
|
|
|
|
|
$current_language, |
1771
|
|
|
|
|
|
|
); |
1772
|
|
|
|
|
|
|
} |
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
# as a [ plain literal ] if: |
1775
|
|
|
|
|
|
|
# |
1776
|
|
|
|
|
|
|
# @content is present; |
1777
|
|
|
|
|
|
|
elsif ($current_element->hasAttributeNsSafe($rdfans, 'content')) |
1778
|
|
|
|
|
|
|
{ |
1779
|
|
|
|
|
|
|
@current_property_value = ( |
1780
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'content'), |
1781
|
|
|
|
|
|
|
($has_datatype ? $datatype : undef), |
1782
|
|
|
|
|
|
|
$current_language, |
1783
|
|
|
|
|
|
|
); |
1784
|
|
|
|
|
|
|
} |
1785
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
# OpenDocument 1.2 extension |
1787
|
|
|
|
|
|
|
elsif (defined $self->{options}{bookmark_end} |
1788
|
|
|
|
|
|
|
and defined $self->{options}{bookmark_name} |
1789
|
|
|
|
|
|
|
and sprintf('{%s}%s', $current_element->namespaceURI, $current_element->localname) |
1790
|
|
|
|
|
|
|
~~ ['{}'.$self->{options}{bookmark_start}, $self->{options}{bookmark_start}] |
1791
|
|
|
|
|
|
|
) { |
1792
|
|
|
|
|
|
|
@current_property_value = ( |
1793
|
|
|
|
|
|
|
$self->_element_to_bookmarked_string($current_element), |
1794
|
|
|
|
|
|
|
($has_datatype ? $datatype: undef), |
1795
|
|
|
|
|
|
|
$current_language, |
1796
|
|
|
|
|
|
|
); |
1797
|
|
|
|
|
|
|
} |
1798
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
# Additionally, if there is a value for [current language] then |
1800
|
|
|
|
|
|
|
# the value of the [plain literal] should include this language |
1801
|
|
|
|
|
|
|
# information, as described in [RDF-CONCEPTS]. The actual literal |
1802
|
|
|
|
|
|
|
# is either the value of @content (if present) or a string created |
1803
|
|
|
|
|
|
|
# by concatenating the text content of each of the descendant |
1804
|
|
|
|
|
|
|
# elements of the [current element] in document order. |
1805
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
# or all children of the [current element] are text nodes; |
1807
|
|
|
|
|
|
|
# or there are no child nodes; |
1808
|
|
|
|
|
|
|
# or the body of the [ current element ] does have non-text |
1809
|
|
|
|
|
|
|
# child nodes but @datatype is present, with an empty value. |
1810
|
|
|
|
|
|
|
elsif ($has_datatype and $datatype eq '') |
1811
|
|
|
|
|
|
|
{ |
1812
|
|
|
|
|
|
|
@current_property_value = ( |
1813
|
|
|
|
|
|
|
$self->_element_to_string($current_element), |
1814
|
|
|
|
|
|
|
($has_datatype ? $datatype: undef), |
1815
|
|
|
|
|
|
|
$current_language, |
1816
|
|
|
|
|
|
|
); |
1817
|
|
|
|
|
|
|
} |
1818
|
|
|
|
|
|
|
|
1819
|
|
|
|
|
|
|
# as an [XML literal] if: explicitly rdf:XMLLiteral. |
1820
|
|
|
|
|
|
|
elsif ($datatype eq RDF_XMLLIT) |
1821
|
|
|
|
|
|
|
{ |
1822
|
|
|
|
|
|
|
@current_property_value = ( |
1823
|
|
|
|
|
|
|
$self->_element_to_xml($current_element, $current_language), |
1824
|
|
|
|
|
|
|
RDF_XMLLIT, |
1825
|
|
|
|
|
|
|
$current_language, |
1826
|
|
|
|
|
|
|
); |
1827
|
|
|
|
|
|
|
$recurse = $self->{options}{xmllit_recurse}; |
1828
|
|
|
|
|
|
|
} |
1829
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
# as a [typed literal] if: |
1831
|
|
|
|
|
|
|
# |
1832
|
|
|
|
|
|
|
# * @datatype is present, and does not have an empty value. |
1833
|
|
|
|
|
|
|
# |
1834
|
|
|
|
|
|
|
# The actual literal is either the value of @content (if present) |
1835
|
|
|
|
|
|
|
# or a string created by concatenating the value of all descendant |
1836
|
|
|
|
|
|
|
# text nodes, of the [current element] in turn. The final string |
1837
|
|
|
|
|
|
|
# includes the datatype URI, as described in [RDF-CONCEPTS], which |
1838
|
|
|
|
|
|
|
# will have been obtained according to the section on CURIE and URI |
1839
|
|
|
|
|
|
|
# Processing. |
1840
|
|
|
|
|
|
|
elsif ($has_datatype) |
1841
|
|
|
|
|
|
|
{ |
1842
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'content')) |
1843
|
|
|
|
|
|
|
{ |
1844
|
|
|
|
|
|
|
@current_property_value = ( |
1845
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'content'), |
1846
|
|
|
|
|
|
|
$datatype, |
1847
|
|
|
|
|
|
|
$current_language, |
1848
|
|
|
|
|
|
|
); |
1849
|
|
|
|
|
|
|
} |
1850
|
|
|
|
|
|
|
else |
1851
|
|
|
|
|
|
|
{ |
1852
|
|
|
|
|
|
|
@current_property_value = ( |
1853
|
|
|
|
|
|
|
$self->_element_to_string($current_element), |
1854
|
|
|
|
|
|
|
$datatype, |
1855
|
|
|
|
|
|
|
$current_language, |
1856
|
|
|
|
|
|
|
); |
1857
|
|
|
|
|
|
|
} |
1858
|
|
|
|
|
|
|
} |
1859
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
elsif ($self->{options}{property_resources} |
1861
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'datatype') |
1862
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'content') |
1863
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'rel') |
1864
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'rev') |
1865
|
|
|
|
|
|
|
and ( |
1866
|
|
|
|
|
|
|
$current_element->hasAttributeNsSafe($rdfans, 'resource') |
1867
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'href') |
1868
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'src') |
1869
|
|
|
|
|
|
|
&& $self->{options}{src_sets_object} |
1870
|
|
|
|
|
|
|
)) |
1871
|
|
|
|
|
|
|
{ |
1872
|
|
|
|
|
|
|
my $resource; |
1873
|
|
|
|
|
|
|
foreach my $attr (qw(resource href src)) |
1874
|
|
|
|
|
|
|
{ |
1875
|
|
|
|
|
|
|
next unless $current_element->hasAttributeNsSafe($rdfans, $attr); |
1876
|
|
|
|
|
|
|
$resource = $self->_expand_curie( |
1877
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, $attr), |
1878
|
|
|
|
|
|
|
element => $current_element, |
1879
|
|
|
|
|
|
|
attribute => $attr, |
1880
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
1881
|
|
|
|
|
|
|
terms => $local_term_mappings, |
1882
|
|
|
|
|
|
|
xml_base => $xml_base, |
1883
|
|
|
|
|
|
|
); |
1884
|
|
|
|
|
|
|
last if defined $resource; |
1885
|
|
|
|
|
|
|
} |
1886
|
|
|
|
|
|
|
@current_property_value = ([ $resource ]) if defined $resource; |
1887
|
|
|
|
|
|
|
} |
1888
|
|
|
|
|
|
|
|
1889
|
|
|
|
|
|
|
elsif ($self->{options}{property_resources} |
1890
|
|
|
|
|
|
|
and defined $typed_resource |
1891
|
|
|
|
|
|
|
and $current_element->hasAttributeNsSafe($rdfans, 'typeof') |
1892
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'about')) |
1893
|
|
|
|
|
|
|
{ |
1894
|
|
|
|
|
|
|
@current_property_value = ([ $typed_resource ]); |
1895
|
|
|
|
|
|
|
} |
1896
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
# or all children of the [current element] are text nodes; |
1898
|
|
|
|
|
|
|
# or there are no child nodes; |
1899
|
|
|
|
|
|
|
# or the body of the [ current element ] does have non-text |
1900
|
|
|
|
|
|
|
# child nodes but @datatype is present, with an empty value. |
1901
|
|
|
|
|
|
|
elsif (not $current_element->getElementsByTagName('*')) |
1902
|
|
|
|
|
|
|
{ |
1903
|
|
|
|
|
|
|
@current_property_value = ( |
1904
|
|
|
|
|
|
|
$self->_element_to_string($current_element), |
1905
|
|
|
|
|
|
|
($has_datatype ? $datatype: undef), |
1906
|
|
|
|
|
|
|
$current_language, |
1907
|
|
|
|
|
|
|
); |
1908
|
|
|
|
|
|
|
} |
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
# In RDFa 1.0 by default generate an XML Literal; |
1911
|
|
|
|
|
|
|
# in RDFa 1.1 by default generate a plain literal. |
1912
|
|
|
|
|
|
|
elsif (!$has_datatype and $current_element->getElementsByTagName('*')) |
1913
|
|
|
|
|
|
|
{ |
1914
|
|
|
|
|
|
|
if ($self->{options}{xmllit_default}) |
1915
|
|
|
|
|
|
|
{ |
1916
|
|
|
|
|
|
|
@current_property_value = ($self->_element_to_xml($current_element, $current_language), |
1917
|
|
|
|
|
|
|
RDF_XMLLIT, |
1918
|
|
|
|
|
|
|
$current_language); |
1919
|
|
|
|
|
|
|
$recurse = $self->{options}{xmllit_recurse}; |
1920
|
|
|
|
|
|
|
} |
1921
|
|
|
|
|
|
|
else |
1922
|
|
|
|
|
|
|
{ |
1923
|
|
|
|
|
|
|
@current_property_value = ($self->_element_to_string($current_element), |
1924
|
|
|
|
|
|
|
undef, |
1925
|
|
|
|
|
|
|
$current_language); |
1926
|
|
|
|
|
|
|
} |
1927
|
|
|
|
|
|
|
} |
1928
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
else |
1930
|
|
|
|
|
|
|
{ |
1931
|
|
|
|
|
|
|
die("How did we get here??\n"); |
1932
|
|
|
|
|
|
|
} |
1933
|
|
|
|
|
|
|
} |
1934
|
|
|
|
|
|
|
|
1935
|
|
|
|
|
|
|
my $E = { # provenance tracking |
1936
|
|
|
|
|
|
|
current => $current_element, |
1937
|
|
|
|
|
|
|
subject => $new_subject_elem, |
1938
|
|
|
|
|
|
|
predicate => $current_element, |
1939
|
|
|
|
|
|
|
object => $current_element, |
1940
|
|
|
|
|
|
|
graph => $graph_elem, |
1941
|
|
|
|
|
|
|
}; |
1942
|
|
|
|
|
|
|
foreach my $property (@prop) |
1943
|
|
|
|
|
|
|
{ |
1944
|
|
|
|
|
|
|
next unless defined $current_property_value[0]; |
1945
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
# The [current property value] is then used with each predicate to |
1947
|
|
|
|
|
|
|
# generate a triple as follows: |
1948
|
|
|
|
|
|
|
# |
1949
|
|
|
|
|
|
|
# subject |
1950
|
|
|
|
|
|
|
# [new subject] |
1951
|
|
|
|
|
|
|
# predicate |
1952
|
|
|
|
|
|
|
# full URI |
1953
|
|
|
|
|
|
|
# object |
1954
|
|
|
|
|
|
|
# [current object literal] |
1955
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
my $p = $self->_expand_curie( |
1957
|
|
|
|
|
|
|
$property, |
1958
|
|
|
|
|
|
|
element => $current_element, |
1959
|
|
|
|
|
|
|
attribute => 'property', |
1960
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
1961
|
|
|
|
|
|
|
terms => $local_term_mappings, |
1962
|
|
|
|
|
|
|
xml_base => $xml_base, |
1963
|
|
|
|
|
|
|
); |
1964
|
|
|
|
|
|
|
next unless defined $p; |
1965
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
if (ref $current_property_value[0] eq 'ARRAY') |
1967
|
|
|
|
|
|
|
{ |
1968
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'inlist')) |
1969
|
|
|
|
|
|
|
{ |
1970
|
|
|
|
|
|
|
$list_mappings->{$p} = [] unless defined $list_mappings->{$p}; |
1971
|
|
|
|
|
|
|
push @{ $list_mappings->{$p} }, [resource => $current_property_value[0][0]]; |
1972
|
|
|
|
|
|
|
} |
1973
|
|
|
|
|
|
|
else |
1974
|
|
|
|
|
|
|
{ |
1975
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $new_subject, $p, $current_property_value[0][0], $graph); |
1976
|
|
|
|
|
|
|
$activity++; |
1977
|
|
|
|
|
|
|
} |
1978
|
|
|
|
|
|
|
} |
1979
|
|
|
|
|
|
|
else |
1980
|
|
|
|
|
|
|
{ |
1981
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'inlist')) |
1982
|
|
|
|
|
|
|
{ |
1983
|
|
|
|
|
|
|
$list_mappings->{$p} = [] unless defined $list_mappings->{$p}; |
1984
|
|
|
|
|
|
|
push @{ $list_mappings->{$p} }, [literal => @current_property_value]; |
1985
|
|
|
|
|
|
|
} |
1986
|
|
|
|
|
|
|
else |
1987
|
|
|
|
|
|
|
{ |
1988
|
|
|
|
|
|
|
$self->_insert_triple_literal($E, $new_subject, $p, @current_property_value, $graph); |
1989
|
|
|
|
|
|
|
$activity++; |
1990
|
|
|
|
|
|
|
} |
1991
|
|
|
|
|
|
|
} |
1992
|
|
|
|
|
|
|
# Once the triple has been created, if the [datatype] of the |
1993
|
|
|
|
|
|
|
# [current object literal] is rdf:XMLLiteral, then the [recurse] |
1994
|
|
|
|
|
|
|
# flag is set to false. |
1995
|
|
|
|
|
|
|
# $recurse = 0 |
1996
|
|
|
|
|
|
|
# if $datatype eq RDF_XMLLIT; |
1997
|
|
|
|
|
|
|
} |
1998
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
# # If the [skip element] flag is 'false', and either: the previous step |
2000
|
|
|
|
|
|
|
# # resulted in a 'true' flag, or [new subject] was set to a non-null and |
2001
|
|
|
|
|
|
|
# # non-bnode value, then any [incomplete triple]s within the current context |
2002
|
|
|
|
|
|
|
# # should be completed: |
2003
|
|
|
|
|
|
|
# if (!$skip_element && ($flag || ((defined $new_subject) && ($new_subject !~ /^bnodeXXX:/)))) |
2004
|
|
|
|
|
|
|
# { |
2005
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
if (!$skip_element && defined $new_subject) |
2007
|
|
|
|
|
|
|
{ |
2008
|
|
|
|
|
|
|
# Loop through list of incomplete triples... |
2009
|
|
|
|
|
|
|
foreach my $it (@$incomplete_triples) |
2010
|
|
|
|
|
|
|
{ |
2011
|
|
|
|
|
|
|
my $direction = $it->{direction}; |
2012
|
|
|
|
|
|
|
my $predicate = $it->{predicate}; |
2013
|
|
|
|
|
|
|
my $parent_graph = $it->{graph}; |
2014
|
|
|
|
|
|
|
|
2015
|
|
|
|
|
|
|
if ($direction eq 'none' and defined $it->{list}) |
2016
|
|
|
|
|
|
|
{ |
2017
|
|
|
|
|
|
|
push @{$it->{list}}, [resource => $new_subject]; |
2018
|
|
|
|
|
|
|
} |
2019
|
|
|
|
|
|
|
elsif ($direction eq 'forward') |
2020
|
|
|
|
|
|
|
{ |
2021
|
|
|
|
|
|
|
my $E = { # provenance tracking |
2022
|
|
|
|
|
|
|
current => $current_element, |
2023
|
|
|
|
|
|
|
subject => $parent_subject_elem, |
2024
|
|
|
|
|
|
|
predicate => $it->{predicate_element}, |
2025
|
|
|
|
|
|
|
object => $new_subject_elem, |
2026
|
|
|
|
|
|
|
graph => $it->{graph_element}, |
2027
|
|
|
|
|
|
|
}; |
2028
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $parent_subject, $predicate, $new_subject, $parent_graph); |
2030
|
|
|
|
|
|
|
$activity++; |
2031
|
|
|
|
|
|
|
} |
2032
|
|
|
|
|
|
|
elsif ($direction eq 'reverse') |
2033
|
|
|
|
|
|
|
{ |
2034
|
|
|
|
|
|
|
my $E = { # provenance tracking |
2035
|
|
|
|
|
|
|
current => $current_element, |
2036
|
|
|
|
|
|
|
subject => $new_subject_elem, |
2037
|
|
|
|
|
|
|
predicate => $it->{predicate_element}, |
2038
|
|
|
|
|
|
|
object => $parent_subject_elem, |
2039
|
|
|
|
|
|
|
graph => $it->{graph_element}, |
2040
|
|
|
|
|
|
|
}; |
2041
|
|
|
|
|
|
|
|
2042
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $new_subject, $predicate, $parent_subject, $parent_graph); |
2043
|
|
|
|
|
|
|
$activity++; |
2044
|
|
|
|
|
|
|
} |
2045
|
|
|
|
|
|
|
else |
2046
|
|
|
|
|
|
|
{ |
2047
|
|
|
|
|
|
|
die "Direction is '$direction'??"; |
2048
|
|
|
|
|
|
|
} |
2049
|
|
|
|
|
|
|
} |
2050
|
|
|
|
|
|
|
} |
2051
|
|
|
|
|
|
|
|
2052
|
|
|
|
|
|
|
# If the [recurse] flag is 'true', all elements that are children of the |
2053
|
|
|
|
|
|
|
# [current element] are processed using the rules described here, using a |
2054
|
|
|
|
|
|
|
# new [evaluation context], initialized as follows |
2055
|
|
|
|
|
|
|
my $flag = 0; |
2056
|
|
|
|
|
|
|
if ($recurse) |
2057
|
|
|
|
|
|
|
{ |
2058
|
|
|
|
|
|
|
my $evaluation_context; |
2059
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
# If the [skip element] flag is 'true' then the new [evaluation context] |
2061
|
|
|
|
|
|
|
# is a copy of the current context that was passed in to this level of |
2062
|
|
|
|
|
|
|
# processing, with the [language] and [list of URI mappings] values |
2063
|
|
|
|
|
|
|
# replaced with the local values; |
2064
|
|
|
|
|
|
|
if ($skip_element) |
2065
|
|
|
|
|
|
|
{ |
2066
|
|
|
|
|
|
|
$evaluation_context = { |
2067
|
|
|
|
|
|
|
%$args, |
2068
|
|
|
|
|
|
|
base => $base, |
2069
|
|
|
|
|
|
|
language => $current_language, |
2070
|
|
|
|
|
|
|
uri_mappings => $uri_mappings, |
2071
|
|
|
|
|
|
|
term_mappings => $term_mappings, |
2072
|
|
|
|
|
|
|
list_mappings => $list_mappings, |
2073
|
|
|
|
|
|
|
# parent_subject => $parent_subject, |
2074
|
|
|
|
|
|
|
# parent_subject_elem => $parent_subject_elem, |
2075
|
|
|
|
|
|
|
# parent_object => $parent_object, |
2076
|
|
|
|
|
|
|
# parent_object_elem => $parent_object_elem, |
2077
|
|
|
|
|
|
|
# incomplete_triples => $incomplete_triples, |
2078
|
|
|
|
|
|
|
graph => $graph, |
2079
|
|
|
|
|
|
|
graph_elem => $graph_elem, |
2080
|
|
|
|
|
|
|
xml_base => $xml_base, |
2081
|
|
|
|
|
|
|
parent => $args, |
2082
|
|
|
|
|
|
|
}; |
2083
|
|
|
|
|
|
|
} |
2084
|
|
|
|
|
|
|
|
2085
|
|
|
|
|
|
|
# Otherwise, the values are: |
2086
|
|
|
|
|
|
|
else |
2087
|
|
|
|
|
|
|
{ |
2088
|
|
|
|
|
|
|
$evaluation_context = { |
2089
|
|
|
|
|
|
|
base => $base, |
2090
|
|
|
|
|
|
|
parent_subject => $new_subject, |
2091
|
|
|
|
|
|
|
parent_subject_elem => $new_subject_elem, |
2092
|
|
|
|
|
|
|
parent_object => (defined $current_object_resource ? $current_object_resource : (defined $new_subject ? $new_subject : $parent_subject)), |
2093
|
|
|
|
|
|
|
parent_object_elem => (defined $current_object_resource_elem ? $current_object_resource_elem : (defined $new_subject_elem ? $new_subject_elem : $parent_subject_elem)), |
2094
|
|
|
|
|
|
|
uri_mappings => $local_uri_mappings, |
2095
|
|
|
|
|
|
|
term_mappings => $local_term_mappings, |
2096
|
|
|
|
|
|
|
incomplete_triples => $local_incomplete_triples, |
2097
|
|
|
|
|
|
|
list_mappings => $list_mappings, |
2098
|
|
|
|
|
|
|
language => $current_language, |
2099
|
|
|
|
|
|
|
graph => $graph, |
2100
|
|
|
|
|
|
|
graph_elem => $graph_elem, |
2101
|
|
|
|
|
|
|
xml_base => $xml_base, |
2102
|
|
|
|
|
|
|
parent => $args, |
2103
|
|
|
|
|
|
|
}; |
2104
|
|
|
|
|
|
|
} |
2105
|
|
|
|
|
|
|
|
2106
|
|
|
|
|
|
|
foreach my $kid ($current_element->getChildrenByTagName('*')) |
2107
|
|
|
|
|
|
|
{ |
2108
|
|
|
|
|
|
|
$flag = $self->_consume_element($kid, $evaluation_context) || $flag; |
2109
|
|
|
|
|
|
|
} |
2110
|
|
|
|
|
|
|
} |
2111
|
|
|
|
|
|
|
|
2112
|
|
|
|
|
|
|
# Once all the child elements have been traversed, list triples are |
2113
|
|
|
|
|
|
|
# generated, if necessary. |
2114
|
|
|
|
|
|
|
if ($list_mappings->{'::meta'}{owner} == $current_element) |
2115
|
|
|
|
|
|
|
{ |
2116
|
|
|
|
|
|
|
foreach my $iri (keys %$list_mappings) |
2117
|
|
|
|
|
|
|
{ |
2118
|
|
|
|
|
|
|
next if $iri eq '::meta'; |
2119
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
# For each IRI in the local list mapping, if the equivalent list does |
2121
|
|
|
|
|
|
|
# not exist in the evaluation context, indicating that the list was |
2122
|
|
|
|
|
|
|
# originally defined on the current element, use the list as follows: |
2123
|
|
|
|
|
|
|
if ($args->{list_mappings}{$iri} == $list_mappings->{$iri} |
2124
|
|
|
|
|
|
|
and ref $args->{list_mappings}{$iri} eq 'HASH' |
2125
|
|
|
|
|
|
|
and %{ $args->{list_mappings}{$iri} }) |
2126
|
|
|
|
|
|
|
{ |
2127
|
|
|
|
|
|
|
next; |
2128
|
|
|
|
|
|
|
} |
2129
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
# Create a new 'bnode' array containing newly created bnodes, one for |
2131
|
|
|
|
|
|
|
# each element in the list |
2132
|
|
|
|
|
|
|
my @bnode = map { $self->bnode; } @{ $list_mappings->{$iri} }; |
2133
|
|
|
|
|
|
|
my $first = @bnode ? $bnode[0] : undef; |
2134
|
|
|
|
|
|
|
|
2135
|
|
|
|
|
|
|
while (my $bnode = shift @bnode) |
2136
|
|
|
|
|
|
|
{ |
2137
|
|
|
|
|
|
|
my $value = shift @{ $list_mappings->{$iri} }; |
2138
|
|
|
|
|
|
|
my $type = shift @$value; |
2139
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
my $E = { # provenance tracking |
2141
|
|
|
|
|
|
|
current => $current_element, |
2142
|
|
|
|
|
|
|
graph => $graph_elem, |
2143
|
|
|
|
|
|
|
}; |
2144
|
|
|
|
|
|
|
if ($type eq 'literal') |
2145
|
|
|
|
|
|
|
{ |
2146
|
|
|
|
|
|
|
$self->_insert_triple_literal($E, $bnode, RDF_FIRST, @$value, $graph); |
2147
|
|
|
|
|
|
|
} |
2148
|
|
|
|
|
|
|
else |
2149
|
|
|
|
|
|
|
{ |
2150
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $bnode, RDF_FIRST, @$value, $graph); |
2151
|
|
|
|
|
|
|
} |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
if (exists $bnode[0]) |
2154
|
|
|
|
|
|
|
{ |
2155
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $bnode, RDF_REST, $bnode[0], $graph); |
2156
|
|
|
|
|
|
|
} |
2157
|
|
|
|
|
|
|
else |
2158
|
|
|
|
|
|
|
{ |
2159
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $bnode, RDF_REST, RDF_NIL, $graph); |
2160
|
|
|
|
|
|
|
} |
2161
|
|
|
|
|
|
|
} |
2162
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
my $E = { # provenance tracking |
2164
|
|
|
|
|
|
|
current => $current_element, |
2165
|
|
|
|
|
|
|
subject => $new_subject_elem, |
2166
|
|
|
|
|
|
|
predicate => $current_element, |
2167
|
|
|
|
|
|
|
graph => $graph_elem, |
2168
|
|
|
|
|
|
|
}; |
2169
|
|
|
|
|
|
|
|
2170
|
|
|
|
|
|
|
#my ($attr, $iri) = split /:/, $iri, 2; |
2171
|
|
|
|
|
|
|
my $attr = 'REL'; |
2172
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
if (defined $first) |
2174
|
|
|
|
|
|
|
{ |
2175
|
|
|
|
|
|
|
$attr eq 'REV' |
2176
|
|
|
|
|
|
|
? $self->_insert_triple_resource($E, $first, $iri, $new_subject, $graph) |
2177
|
|
|
|
|
|
|
: $self->_insert_triple_resource($E, $new_subject, $iri, $first, $graph); |
2178
|
|
|
|
|
|
|
} |
2179
|
|
|
|
|
|
|
else |
2180
|
|
|
|
|
|
|
{ |
2181
|
|
|
|
|
|
|
$attr eq 'REV' |
2182
|
|
|
|
|
|
|
? $self->_insert_triple_resource($E, RDF_NIL, $iri, $new_subject, $graph) |
2183
|
|
|
|
|
|
|
: $self->_insert_triple_resource($E, $new_subject, $iri, RDF_NIL, $graph); |
2184
|
|
|
|
|
|
|
} |
2185
|
|
|
|
|
|
|
|
2186
|
|
|
|
|
|
|
$activity++; |
2187
|
|
|
|
|
|
|
} |
2188
|
|
|
|
|
|
|
} |
2189
|
|
|
|
|
|
|
|
2190
|
|
|
|
|
|
|
return 1 if $activity || $new_subject || $flag; |
2191
|
|
|
|
|
|
|
return 0; |
2192
|
|
|
|
|
|
|
} |
2193
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
sub set_callbacks |
2195
|
|
|
|
|
|
|
# Set callback functions for handling RDF triples. |
2196
|
|
|
|
|
|
|
{ |
2197
|
|
|
|
|
|
|
my $self = shift; |
2198
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
if ('HASH' eq ref $_[0]) |
2200
|
|
|
|
|
|
|
{ |
2201
|
|
|
|
|
|
|
$self->{'sub'} = $_[0]; |
2202
|
|
|
|
|
|
|
$self->{'sub'}->{'pretriple_resource'} = \&_print0 |
2203
|
|
|
|
|
|
|
if lc ($self->{'sub'}->{'pretriple_resource'}||'') eq 'print'; |
2204
|
|
|
|
|
|
|
$self->{'sub'}->{'pretriple_literal'} = \&_print1 |
2205
|
|
|
|
|
|
|
if lc ($self->{'sub'}->{'pretriple_literal'}||'') eq 'print'; |
2206
|
|
|
|
|
|
|
} |
2207
|
|
|
|
|
|
|
else |
2208
|
|
|
|
|
|
|
{ |
2209
|
|
|
|
|
|
|
die "Unsupported set_callbacks call.\n"; |
2210
|
|
|
|
|
|
|
} |
2211
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
return $self; |
2213
|
|
|
|
|
|
|
} |
2214
|
|
|
|
|
|
|
|
2215
|
|
|
|
|
|
|
sub _print0 |
2216
|
|
|
|
|
|
|
# Prints a Turtle triple. |
2217
|
|
|
|
|
|
|
{ |
2218
|
|
|
|
|
|
|
my $self = shift; |
2219
|
|
|
|
|
|
|
my $element = shift; |
2220
|
|
|
|
|
|
|
my $subject = shift; |
2221
|
|
|
|
|
|
|
my $pred = shift; |
2222
|
|
|
|
|
|
|
my $object = shift; |
2223
|
|
|
|
|
|
|
my $graph = shift; |
2224
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
if ($graph) |
2226
|
|
|
|
|
|
|
{ |
2227
|
|
|
|
|
|
|
print "# GRAPH $graph\n"; |
2228
|
|
|
|
|
|
|
} |
2229
|
|
|
|
|
|
|
if ($element) |
2230
|
|
|
|
|
|
|
{ |
2231
|
|
|
|
|
|
|
printf("# Triple on element %s.\n", $element->nodePath); |
2232
|
|
|
|
|
|
|
} |
2233
|
|
|
|
|
|
|
else |
2234
|
|
|
|
|
|
|
{ |
2235
|
|
|
|
|
|
|
printf("# Triple.\n"); |
2236
|
|
|
|
|
|
|
} |
2237
|
|
|
|
|
|
|
|
2238
|
|
|
|
|
|
|
printf("%s %s %s .\n", |
2239
|
|
|
|
|
|
|
($subject =~ /^_:/ ? $subject : "<$subject>"), |
2240
|
|
|
|
|
|
|
"<$pred>", |
2241
|
|
|
|
|
|
|
($object =~ /^_:/ ? $object : "<$object>")); |
2242
|
|
|
|
|
|
|
|
2243
|
|
|
|
|
|
|
return; |
2244
|
|
|
|
|
|
|
} |
2245
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
sub _print1 |
2247
|
|
|
|
|
|
|
# Prints a Turtle triple. |
2248
|
|
|
|
|
|
|
{ |
2249
|
|
|
|
|
|
|
my $self = shift; |
2250
|
|
|
|
|
|
|
my $element = shift; |
2251
|
|
|
|
|
|
|
my $subject = shift; |
2252
|
|
|
|
|
|
|
my $pred = shift; |
2253
|
|
|
|
|
|
|
my $object = shift; |
2254
|
|
|
|
|
|
|
my $dt = shift; |
2255
|
|
|
|
|
|
|
my $lang = shift; |
2256
|
|
|
|
|
|
|
my $graph = shift; |
2257
|
|
|
|
|
|
|
|
2258
|
|
|
|
|
|
|
# Clumsy, but probably works. |
2259
|
|
|
|
|
|
|
$object =~ s/\\/\\\\/g; |
2260
|
|
|
|
|
|
|
$object =~ s/\n/\\n/g; |
2261
|
|
|
|
|
|
|
$object =~ s/\r/\\r/g; |
2262
|
|
|
|
|
|
|
$object =~ s/\t/\\t/g; |
2263
|
|
|
|
|
|
|
$object =~ s/\"/\\\"/g; |
2264
|
|
|
|
|
|
|
|
2265
|
|
|
|
|
|
|
if ($graph) |
2266
|
|
|
|
|
|
|
{ |
2267
|
|
|
|
|
|
|
print "# GRAPH $graph\n"; |
2268
|
|
|
|
|
|
|
} |
2269
|
|
|
|
|
|
|
if ($element) |
2270
|
|
|
|
|
|
|
{ |
2271
|
|
|
|
|
|
|
printf("# Triple on element %s.\n", $element->nodePath); |
2272
|
|
|
|
|
|
|
} |
2273
|
|
|
|
|
|
|
else |
2274
|
|
|
|
|
|
|
{ |
2275
|
|
|
|
|
|
|
printf("# Triple.\n"); |
2276
|
|
|
|
|
|
|
} |
2277
|
|
|
|
|
|
|
|
2278
|
|
|
|
|
|
|
printf("%s %s %s%s%s .\n", |
2279
|
|
|
|
|
|
|
($subject =~ /^_:/ ? $subject : "<$subject>"), |
2280
|
|
|
|
|
|
|
"<$pred>", |
2281
|
|
|
|
|
|
|
"\"$object\"", |
2282
|
|
|
|
|
|
|
(length $dt ? "^^<$dt>" : ''), |
2283
|
|
|
|
|
|
|
((length $lang && !length $dt) ? "\@$lang" : '') |
2284
|
|
|
|
|
|
|
); |
2285
|
|
|
|
|
|
|
|
2286
|
|
|
|
|
|
|
return; |
2287
|
|
|
|
|
|
|
} |
2288
|
|
|
|
|
|
|
|
2289
|
|
|
|
|
|
|
sub element_subjects |
2290
|
|
|
|
|
|
|
{ |
2291
|
|
|
|
|
|
|
my ($self) = shift; |
2292
|
|
|
|
|
|
|
$self->consume; |
2293
|
|
|
|
|
|
|
$self->{element_subjects} = shift if @_; |
2294
|
|
|
|
|
|
|
return $self->{element_subjects}; |
2295
|
|
|
|
|
|
|
} |
2296
|
|
|
|
|
|
|
|
2297
|
|
|
|
|
|
|
sub _insert_triple_resource |
2298
|
|
|
|
|
|
|
{ |
2299
|
|
|
|
|
|
|
my $self = shift; |
2300
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
my $element = shift; # A reference to the XML::LibXML element being parsed |
2302
|
|
|
|
|
|
|
my $subject = shift; # Subject URI or bnode |
2303
|
|
|
|
|
|
|
my $predicate = shift; # Predicate URI |
2304
|
|
|
|
|
|
|
my $object = shift; # Resource URI or bnode |
2305
|
|
|
|
|
|
|
my $graph = shift; # Graph URI or bnode (if named graphs feature is enabled) |
2306
|
|
|
|
|
|
|
|
2307
|
|
|
|
|
|
|
my $suppress_triple = 0; |
2308
|
|
|
|
|
|
|
$suppress_triple = $self->{'sub'}->{'pretriple_resource'}( |
2309
|
|
|
|
|
|
|
$self, |
2310
|
|
|
|
|
|
|
ref $element ? $element->{current} : undef, |
2311
|
|
|
|
|
|
|
$subject, |
2312
|
|
|
|
|
|
|
$predicate, |
2313
|
|
|
|
|
|
|
$object, |
2314
|
|
|
|
|
|
|
$graph, |
2315
|
|
|
|
|
|
|
) |
2316
|
|
|
|
|
|
|
if defined $self->{'sub'}->{'pretriple_resource'}; |
2317
|
|
|
|
|
|
|
return if $suppress_triple; |
2318
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
# First make sure the object node type is ok. |
2320
|
|
|
|
|
|
|
my $to; |
2321
|
|
|
|
|
|
|
if ($object =~ m/^_:(.*)/) |
2322
|
|
|
|
|
|
|
{ |
2323
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Blank->new($1); |
2324
|
|
|
|
|
|
|
} |
2325
|
|
|
|
|
|
|
else |
2326
|
|
|
|
|
|
|
{ |
2327
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Resource->new($object); |
2328
|
|
|
|
|
|
|
} |
2329
|
|
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
# Run the common function |
2331
|
|
|
|
|
|
|
return $self->_insert_triple_common($element, $subject, $predicate, $to, $graph); |
2332
|
|
|
|
|
|
|
} |
2333
|
|
|
|
|
|
|
|
2334
|
|
|
|
|
|
|
sub _insert_triple_literal |
2335
|
|
|
|
|
|
|
{ |
2336
|
|
|
|
|
|
|
my $self = shift; |
2337
|
|
|
|
|
|
|
|
2338
|
|
|
|
|
|
|
my $element = shift; # A reference to the XML::LibXML element being parsed |
2339
|
|
|
|
|
|
|
my $subject = shift; # Subject URI or bnode |
2340
|
|
|
|
|
|
|
my $predicate = shift; # Predicate URI |
2341
|
|
|
|
|
|
|
my $object = shift; # Resource Literal |
2342
|
|
|
|
|
|
|
my $datatype = shift; # Datatype URI (possibly undef or '') |
2343
|
|
|
|
|
|
|
my $language = shift; # Language (possibly undef or '') |
2344
|
|
|
|
|
|
|
my $graph = shift; # Graph URI or bnode (if named graphs feature is enabled) |
2345
|
|
|
|
|
|
|
|
2346
|
|
|
|
|
|
|
my $suppress_triple = 0; |
2347
|
|
|
|
|
|
|
$suppress_triple = $self->{'sub'}->{'pretriple_literal'}( |
2348
|
|
|
|
|
|
|
$self, |
2349
|
|
|
|
|
|
|
ref $element ? $element->{current} : undef, |
2350
|
|
|
|
|
|
|
$subject, |
2351
|
|
|
|
|
|
|
$predicate, |
2352
|
|
|
|
|
|
|
$object, |
2353
|
|
|
|
|
|
|
$datatype, |
2354
|
|
|
|
|
|
|
$language, |
2355
|
|
|
|
|
|
|
$graph, |
2356
|
|
|
|
|
|
|
) |
2357
|
|
|
|
|
|
|
if defined $self->{'sub'}->{'pretriple_literal'}; |
2358
|
|
|
|
|
|
|
return if $suppress_triple; |
2359
|
|
|
|
|
|
|
|
2360
|
|
|
|
|
|
|
# Now we know there's a literal |
2361
|
|
|
|
|
|
|
my $to; |
2362
|
|
|
|
|
|
|
|
2363
|
|
|
|
|
|
|
# Work around bad Unicode handling in RDF::Trine. |
2364
|
|
|
|
|
|
|
# $object = encode_utf8($object); |
2365
|
|
|
|
|
|
|
|
2366
|
|
|
|
|
|
|
if (defined $datatype) |
2367
|
|
|
|
|
|
|
{ |
2368
|
|
|
|
|
|
|
if ($datatype eq RDF_XMLLIT) |
2369
|
|
|
|
|
|
|
{ |
2370
|
|
|
|
|
|
|
if ($self->{options}{use_rtnlx}) |
2371
|
|
|
|
|
|
|
{ |
2372
|
|
|
|
|
|
|
eval |
2373
|
|
|
|
|
|
|
{ |
2374
|
|
|
|
|
|
|
require RDF::Trine::Node::Literal::XML; |
2375
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Literal::XML->new($element->childNodes); |
2376
|
|
|
|
|
|
|
}; |
2377
|
|
|
|
|
|
|
} |
2378
|
|
|
|
|
|
|
|
2379
|
|
|
|
|
|
|
if ( $@ || !defined $to) |
2380
|
|
|
|
|
|
|
{ |
2381
|
|
|
|
|
|
|
my $orig = $RDF::Trine::Node::Literal::USE_XMLLITERALS; |
2382
|
|
|
|
|
|
|
$RDF::Trine::Node::Literal::USE_XMLLITERALS = 0; |
2383
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Literal->new($object, undef, $datatype); |
2384
|
|
|
|
|
|
|
$RDF::Trine::Node::Literal::USE_XMLLITERALS = $orig; |
2385
|
|
|
|
|
|
|
} |
2386
|
|
|
|
|
|
|
} |
2387
|
|
|
|
|
|
|
else |
2388
|
|
|
|
|
|
|
{ |
2389
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Literal->new($object, undef, $datatype); |
2390
|
|
|
|
|
|
|
} |
2391
|
|
|
|
|
|
|
} |
2392
|
|
|
|
|
|
|
else |
2393
|
|
|
|
|
|
|
{ |
2394
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Literal->new($object, $language, undef); |
2395
|
|
|
|
|
|
|
} |
2396
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
# Run the common function |
2398
|
|
|
|
|
|
|
$self->_insert_triple_common($element, $subject, $predicate, $to, $graph); |
2399
|
|
|
|
|
|
|
} |
2400
|
|
|
|
|
|
|
|
2401
|
|
|
|
|
|
|
sub _insert_triple_common |
2402
|
|
|
|
|
|
|
{ |
2403
|
|
|
|
|
|
|
my $self = shift; # A reference to the RDF::RDFa::Parser object |
2404
|
|
|
|
|
|
|
my $element = shift; # A reference to the XML::LibXML element being parsed |
2405
|
|
|
|
|
|
|
my $subject = shift; # Subject URI or bnode |
2406
|
|
|
|
|
|
|
my $predicate = shift; # Predicate URI |
2407
|
|
|
|
|
|
|
my $to = shift; # RDF::Trine::Node Resource URI or bnode |
2408
|
|
|
|
|
|
|
my $graph = shift; # Graph URI or bnode (if named graphs feature is enabled) |
2409
|
|
|
|
|
|
|
|
2410
|
|
|
|
|
|
|
# First, make sure subject and predicates are the right kind of nodes |
2411
|
|
|
|
|
|
|
my $tp = RDF::Trine::Node::Resource->new($predicate); |
2412
|
|
|
|
|
|
|
my $ts; |
2413
|
|
|
|
|
|
|
if ($subject =~ m/^_:(.*)/) |
2414
|
|
|
|
|
|
|
{ |
2415
|
|
|
|
|
|
|
$ts = RDF::Trine::Node::Blank->new($1); |
2416
|
|
|
|
|
|
|
} |
2417
|
|
|
|
|
|
|
else |
2418
|
|
|
|
|
|
|
{ |
2419
|
|
|
|
|
|
|
$ts = RDF::Trine::Node::Resource->new($subject); |
2420
|
|
|
|
|
|
|
} |
2421
|
|
|
|
|
|
|
|
2422
|
|
|
|
|
|
|
my $statement; |
2423
|
|
|
|
|
|
|
|
2424
|
|
|
|
|
|
|
# If we are configured for it, and graph name can be found, add it. |
2425
|
|
|
|
|
|
|
if ($self->{'options'}->{'graph'} && $graph) |
2426
|
|
|
|
|
|
|
{ |
2427
|
|
|
|
|
|
|
$self->{Graphs}->{$graph}++; |
2428
|
|
|
|
|
|
|
|
2429
|
|
|
|
|
|
|
my $tg; |
2430
|
|
|
|
|
|
|
if ($graph =~ m/^_:(.*)/) |
2431
|
|
|
|
|
|
|
{ |
2432
|
|
|
|
|
|
|
$tg = RDF::Trine::Node::Blank->new($1); |
2433
|
|
|
|
|
|
|
} |
2434
|
|
|
|
|
|
|
else |
2435
|
|
|
|
|
|
|
{ |
2436
|
|
|
|
|
|
|
$tg = RDF::Trine::Node::Resource->new($graph); |
2437
|
|
|
|
|
|
|
} |
2438
|
|
|
|
|
|
|
|
2439
|
|
|
|
|
|
|
$statement = RDF::Trine::Statement::Quad->new($ts, $tp, $to, $tg); |
2440
|
|
|
|
|
|
|
} |
2441
|
|
|
|
|
|
|
# If no graph name, just add triples |
2442
|
|
|
|
|
|
|
else |
2443
|
|
|
|
|
|
|
{ |
2444
|
|
|
|
|
|
|
$statement = RDF::Trine::Statement->new($ts, $tp, $to); |
2445
|
|
|
|
|
|
|
} |
2446
|
|
|
|
|
|
|
|
2447
|
|
|
|
|
|
|
my $suppress_triple = 0; |
2448
|
|
|
|
|
|
|
$suppress_triple = $self->{'sub'}->{'ontriple'}($self, $element, $statement) |
2449
|
|
|
|
|
|
|
if ($self->{'sub'}->{'ontriple'}); |
2450
|
|
|
|
|
|
|
return if $suppress_triple; |
2451
|
|
|
|
|
|
|
|
2452
|
|
|
|
|
|
|
$self->{model}->add_statement($statement); |
2453
|
|
|
|
|
|
|
} |
2454
|
|
|
|
|
|
|
|
2455
|
|
|
|
|
|
|
sub _atom_magic |
2456
|
|
|
|
|
|
|
{ |
2457
|
|
|
|
|
|
|
my $self = shift; |
2458
|
|
|
|
|
|
|
my $element = shift; |
2459
|
|
|
|
|
|
|
|
2460
|
|
|
|
|
|
|
return $self->bnode($element, 1); |
2461
|
|
|
|
|
|
|
} |
2462
|
|
|
|
|
|
|
|
2463
|
|
|
|
|
|
|
# Splits things like property="foaf:name rdfs:label" |
2464
|
|
|
|
|
|
|
sub _split_tokens |
2465
|
|
|
|
|
|
|
{ |
2466
|
|
|
|
|
|
|
my ($self, $string) = @_; |
2467
|
|
|
|
|
|
|
$string ||= ''; |
2468
|
|
|
|
|
|
|
$string =~ s/(^\s+|\s+$)//g; |
2469
|
|
|
|
|
|
|
my @return = split /\s+/, $string; |
2470
|
|
|
|
|
|
|
return @return; |
2471
|
|
|
|
|
|
|
} |
2472
|
|
|
|
|
|
|
|
2473
|
|
|
|
|
|
|
sub _element_to_bookmarked_string |
2474
|
|
|
|
|
|
|
{ |
2475
|
|
|
|
|
|
|
my ($self, $bookmark) = @_; |
2476
|
|
|
|
|
|
|
|
2477
|
|
|
|
|
|
|
my @name_attribute; |
2478
|
|
|
|
|
|
|
if ($self->{'options'}->{'bookmark_name'} =~ /^\{(.*)\}(.+)$/) |
2479
|
|
|
|
|
|
|
{ |
2480
|
|
|
|
|
|
|
@name_attribute = $1 ? ($1, $2) : (undef, $2); |
2481
|
|
|
|
|
|
|
} |
2482
|
|
|
|
|
|
|
else |
2483
|
|
|
|
|
|
|
{ |
2484
|
|
|
|
|
|
|
@name_attribute = (undef, $self->{'options'}->{'bookmark_name'}); |
2485
|
|
|
|
|
|
|
} |
2486
|
|
|
|
|
|
|
|
2487
|
|
|
|
|
|
|
my ($endtag_namespace, $endtag_localname); |
2488
|
|
|
|
|
|
|
if ($self->{'options'}->{'bookmark_end'} =~ /^\{(.*)\}(.+)$/) |
2489
|
|
|
|
|
|
|
{ |
2490
|
|
|
|
|
|
|
($endtag_namespace, $endtag_localname) = $1 ? ($1, $2) : (undef, $2); |
2491
|
|
|
|
|
|
|
} |
2492
|
|
|
|
|
|
|
else |
2493
|
|
|
|
|
|
|
{ |
2494
|
|
|
|
|
|
|
($endtag_namespace, $endtag_localname) = (undef, $self->{'options'}->{'bookmark_end'}); |
2495
|
|
|
|
|
|
|
} |
2496
|
|
|
|
|
|
|
|
2497
|
|
|
|
|
|
|
my $string = ''; |
2498
|
|
|
|
|
|
|
my $current = $bookmark; |
2499
|
|
|
|
|
|
|
while ($current) |
2500
|
|
|
|
|
|
|
{ |
2501
|
|
|
|
|
|
|
$current = $self->_find_next_node($current); |
2502
|
|
|
|
|
|
|
|
2503
|
|
|
|
|
|
|
if (defined $current |
2504
|
|
|
|
|
|
|
&& $current->nodeType == XML_TEXT_NODE) |
2505
|
|
|
|
|
|
|
{ |
2506
|
|
|
|
|
|
|
$string .= $current->getData; |
2507
|
|
|
|
|
|
|
} |
2508
|
|
|
|
|
|
|
if (defined $current |
2509
|
|
|
|
|
|
|
&& $current->nodeType == XML_ELEMENT_NODE |
2510
|
|
|
|
|
|
|
&& $current->localname eq $endtag_localname |
2511
|
|
|
|
|
|
|
&& $current->namespaceURI eq $endtag_namespace |
2512
|
|
|
|
|
|
|
&& $current->getAttributeNsSafe(@name_attribute) eq $bookmark->getAttributeNsSafe(@name_attribute)) |
2513
|
|
|
|
|
|
|
{ |
2514
|
|
|
|
|
|
|
$current = undef; |
2515
|
|
|
|
|
|
|
} |
2516
|
|
|
|
|
|
|
} |
2517
|
|
|
|
|
|
|
|
2518
|
|
|
|
|
|
|
return $string; |
2519
|
|
|
|
|
|
|
} |
2520
|
|
|
|
|
|
|
|
2521
|
|
|
|
|
|
|
sub _find_next_node |
2522
|
|
|
|
|
|
|
{ |
2523
|
|
|
|
|
|
|
my ($self, $node) = @_; |
2524
|
|
|
|
|
|
|
|
2525
|
|
|
|
|
|
|
if ($node->nodeType == XML_ELEMENT_NODE) |
2526
|
|
|
|
|
|
|
{ |
2527
|
|
|
|
|
|
|
my @kids = $node->childNodes; |
2528
|
|
|
|
|
|
|
return $kids[0] if @kids; |
2529
|
|
|
|
|
|
|
} |
2530
|
|
|
|
|
|
|
|
2531
|
|
|
|
|
|
|
my $ancestor = $node; |
2532
|
|
|
|
|
|
|
while ($ancestor) |
2533
|
|
|
|
|
|
|
{ |
2534
|
|
|
|
|
|
|
return $ancestor->nextSibling if $ancestor->nextSibling; |
2535
|
|
|
|
|
|
|
$ancestor = $ancestor->parentNode; |
2536
|
|
|
|
|
|
|
} |
2537
|
|
|
|
|
|
|
|
2538
|
|
|
|
|
|
|
return undef; |
2539
|
|
|
|
|
|
|
} |
2540
|
|
|
|
|
|
|
|
2541
|
|
|
|
|
|
|
sub _element_to_string |
2542
|
|
|
|
|
|
|
{ |
2543
|
|
|
|
|
|
|
my $self = shift; |
2544
|
|
|
|
|
|
|
my $dom = shift; |
2545
|
|
|
|
|
|
|
|
2546
|
|
|
|
|
|
|
if ($dom->nodeType == XML_TEXT_NODE) |
2547
|
|
|
|
|
|
|
{ |
2548
|
|
|
|
|
|
|
return $dom->getData; |
2549
|
|
|
|
|
|
|
} |
2550
|
|
|
|
|
|
|
elsif ($dom->nodeType == XML_ELEMENT_NODE) |
2551
|
|
|
|
|
|
|
{ |
2552
|
|
|
|
|
|
|
my $rv = ''; |
2553
|
|
|
|
|
|
|
foreach my $kid ($dom->childNodes) |
2554
|
|
|
|
|
|
|
{ $rv .= $self->_element_to_string($kid); } |
2555
|
|
|
|
|
|
|
return $rv; |
2556
|
|
|
|
|
|
|
} |
2557
|
|
|
|
|
|
|
|
2558
|
|
|
|
|
|
|
return ''; |
2559
|
|
|
|
|
|
|
} |
2560
|
|
|
|
|
|
|
|
2561
|
|
|
|
|
|
|
sub _element_to_xml |
2562
|
|
|
|
|
|
|
{ |
2563
|
|
|
|
|
|
|
my $self = shift; |
2564
|
|
|
|
|
|
|
my $dom = shift; |
2565
|
|
|
|
|
|
|
my $lang = shift; |
2566
|
|
|
|
|
|
|
my $rv; |
2567
|
|
|
|
|
|
|
|
2568
|
|
|
|
|
|
|
foreach my $kid ($dom->childNodes) |
2569
|
|
|
|
|
|
|
{ |
2570
|
|
|
|
|
|
|
my $fakelang = 0; |
2571
|
|
|
|
|
|
|
if (($kid->nodeType == XML_ELEMENT_NODE) && defined $lang) |
2572
|
|
|
|
|
|
|
{ |
2573
|
|
|
|
|
|
|
unless ($kid->hasAttributeNS(XML_XML_NS, 'lang')) |
2574
|
|
|
|
|
|
|
{ |
2575
|
|
|
|
|
|
|
$kid->setAttributeNS(XML_XML_NS, 'lang', $lang); |
2576
|
|
|
|
|
|
|
$fakelang++; |
2577
|
|
|
|
|
|
|
} |
2578
|
|
|
|
|
|
|
} |
2579
|
|
|
|
|
|
|
|
2580
|
|
|
|
|
|
|
$rv .= $kid->toStringEC14N(1); |
2581
|
|
|
|
|
|
|
|
2582
|
|
|
|
|
|
|
if ($fakelang) |
2583
|
|
|
|
|
|
|
{ |
2584
|
|
|
|
|
|
|
$kid->removeAttributeNS(XML_XML_NS, 'lang'); |
2585
|
|
|
|
|
|
|
} |
2586
|
|
|
|
|
|
|
} |
2587
|
|
|
|
|
|
|
|
2588
|
|
|
|
|
|
|
return $rv; |
2589
|
|
|
|
|
|
|
} |
2590
|
|
|
|
|
|
|
|
2591
|
|
|
|
|
|
|
sub bnode |
2592
|
|
|
|
|
|
|
{ |
2593
|
|
|
|
|
|
|
my $self = shift; |
2594
|
|
|
|
|
|
|
my $element = shift; |
2595
|
|
|
|
|
|
|
my $save_me = shift || 0; |
2596
|
|
|
|
|
|
|
my $ident = shift || undef; |
2597
|
|
|
|
|
|
|
|
2598
|
|
|
|
|
|
|
if (defined $element |
2599
|
|
|
|
|
|
|
and $self->{'saved_bnodes'}->{ $element->nodePath }) |
2600
|
|
|
|
|
|
|
{ |
2601
|
|
|
|
|
|
|
return $self->{'saved_bnodes'}->{ $element->nodePath }; |
2602
|
|
|
|
|
|
|
} |
2603
|
|
|
|
|
|
|
|
2604
|
|
|
|
|
|
|
elsif (defined $ident |
2605
|
|
|
|
|
|
|
and $self->{'saved_bnodes'}->{ $ident }) |
2606
|
|
|
|
|
|
|
{ |
2607
|
|
|
|
|
|
|
return $self->{'saved_bnodes'}->{ $ident }; |
2608
|
|
|
|
|
|
|
} |
2609
|
|
|
|
|
|
|
|
2610
|
|
|
|
|
|
|
return sprintf('http://thing-described-by.org/?%s#%s', |
2611
|
|
|
|
|
|
|
$self->uri, |
2612
|
|
|
|
|
|
|
$self->{element}->getAttribute('id')) |
2613
|
|
|
|
|
|
|
if ($self->{options}->{tdb_service} && $element && length $element->getAttribute('id')); |
2614
|
|
|
|
|
|
|
|
2615
|
|
|
|
|
|
|
unless (defined $self->{bnode_prefix}) |
2616
|
|
|
|
|
|
|
{ |
2617
|
|
|
|
|
|
|
$self->{bnode_prefix} = Data::UUID->new->create_str; |
2618
|
|
|
|
|
|
|
$self->{bnode_prefix} =~ s/-//g; |
2619
|
|
|
|
|
|
|
} |
2620
|
|
|
|
|
|
|
|
2621
|
|
|
|
|
|
|
my $rv; |
2622
|
|
|
|
|
|
|
if ($self->{options}->{skolemize}) |
2623
|
|
|
|
|
|
|
{ |
2624
|
|
|
|
|
|
|
$rv = sprintf('tag:buzzword.org.uk,2010:RDF-RDFa-Parser:skolem:%s:%04d', $self->{bnode_prefix}, $self->{bnodes}++); |
2625
|
|
|
|
|
|
|
} |
2626
|
|
|
|
|
|
|
else |
2627
|
|
|
|
|
|
|
{ |
2628
|
|
|
|
|
|
|
$rv = sprintf('_:rdfa%snode%04d', $self->{bnode_prefix}, $self->{bnodes}++); |
2629
|
|
|
|
|
|
|
} |
2630
|
|
|
|
|
|
|
|
2631
|
|
|
|
|
|
|
if ($save_me and defined $element) |
2632
|
|
|
|
|
|
|
{ |
2633
|
|
|
|
|
|
|
$self->{'saved_bnodes'}->{ $element->nodePath } = $rv; |
2634
|
|
|
|
|
|
|
} |
2635
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
if (defined $ident) |
2637
|
|
|
|
|
|
|
{ |
2638
|
|
|
|
|
|
|
$self->{'saved_bnodes'}->{ $ident } = $rv; |
2639
|
|
|
|
|
|
|
} |
2640
|
|
|
|
|
|
|
|
2641
|
|
|
|
|
|
|
return $rv; |
2642
|
|
|
|
|
|
|
} |
2643
|
|
|
|
|
|
|
|
2644
|
|
|
|
|
|
|
sub _valid_lang |
2645
|
|
|
|
|
|
|
{ |
2646
|
|
|
|
|
|
|
my ($self, $value_to_test) = @_; |
2647
|
|
|
|
|
|
|
|
2648
|
|
|
|
|
|
|
return 1 if (defined $value_to_test) && ($value_to_test eq ''); |
2649
|
|
|
|
|
|
|
return 0 unless defined $value_to_test; |
2650
|
|
|
|
|
|
|
|
2651
|
|
|
|
|
|
|
# Regex for recognizing RFC 4646 well-formed tags |
2652
|
|
|
|
|
|
|
# http://www.rfc-editor.org/rfc/rfc4646.txt |
2653
|
|
|
|
|
|
|
# http://tools.ietf.org/html/draft-ietf-ltru-4646bis-21 |
2654
|
|
|
|
|
|
|
|
2655
|
|
|
|
|
|
|
# The structure requires no forward references, so it reverses the order. |
2656
|
|
|
|
|
|
|
# It uses Java/Perl syntax instead of the old ABNF |
2657
|
|
|
|
|
|
|
# The uppercase comments are fragments copied from RFC 4646 |
2658
|
|
|
|
|
|
|
|
2659
|
|
|
|
|
|
|
# Note: the tool requires that any real "=" or "#" or ";" in the regex be escaped. |
2660
|
|
|
|
|
|
|
|
2661
|
|
|
|
|
|
|
my $alpha = '[a-z]'; # ALPHA |
2662
|
|
|
|
|
|
|
my $digit = '[0-9]'; # DIGIT |
2663
|
|
|
|
|
|
|
my $alphanum = '[a-z0-9]'; # ALPHA / DIGIT |
2664
|
|
|
|
|
|
|
my $x = 'x'; # private use singleton |
2665
|
|
|
|
|
|
|
my $singleton = '[a-wyz]'; # other singleton |
2666
|
|
|
|
|
|
|
my $s = '[_-]'; # separator -- lenient parsers will use [_-] -- strict will use [-] |
2667
|
|
|
|
|
|
|
|
2668
|
|
|
|
|
|
|
# Now do the components. The structure is slightly different to allow for capturing the right components. |
2669
|
|
|
|
|
|
|
# The notation (?:....) is a non-capturing version of (...): so the "?:" can be deleted if someone doesn't care about capturing. |
2670
|
|
|
|
|
|
|
|
2671
|
|
|
|
|
|
|
my $language = '([a-z]{2,8}) | ([a-z]{2,3} $s [a-z]{3})'; |
2672
|
|
|
|
|
|
|
|
2673
|
|
|
|
|
|
|
# ABNF (2*3ALPHA) / 4ALPHA / 5*8ALPHA --- note: because of how | works in regex, don't use $alpha{2,3} | $alpha{4,8} |
2674
|
|
|
|
|
|
|
# We don't have to have the general case of extlang, because there can be only one extlang (except for zh-min-nan). |
2675
|
|
|
|
|
|
|
|
2676
|
|
|
|
|
|
|
# Note: extlang invalid in Unicode language tags |
2677
|
|
|
|
|
|
|
|
2678
|
|
|
|
|
|
|
my $script = '[a-z]{4}' ; # 4ALPHA |
2679
|
|
|
|
|
|
|
|
2680
|
|
|
|
|
|
|
my $region = '(?: [a-z]{2}|[0-9]{3})' ; # 2ALPHA / 3DIGIT |
2681
|
|
|
|
|
|
|
|
2682
|
|
|
|
|
|
|
my $variant = '(?: [a-z0-9]{5,8} | [0-9] [a-z0-9]{3} )' ; # 5*8alphanum / (DIGIT 3alphanum) |
2683
|
|
|
|
|
|
|
|
2684
|
|
|
|
|
|
|
my $extension = '(?: [a-wyz] (?: [_-] [a-z0-9]{2,8} )+ )' ; # singleton 1*("-" (2*8alphanum)) |
2685
|
|
|
|
|
|
|
|
2686
|
|
|
|
|
|
|
my $privateUse = '(?: x (?: [_-] [a-z0-9]{1,8} )+ )' ; # "x" 1*("-" (1*8alphanum)) |
2687
|
|
|
|
|
|
|
|
2688
|
|
|
|
|
|
|
# Define certain grandfathered codes, since otherwise the regex is pretty useless. |
2689
|
|
|
|
|
|
|
# Since these are limited, this is safe even later changes to the registry -- |
2690
|
|
|
|
|
|
|
# the only oddity is that it might change the type of the tag, and thus |
2691
|
|
|
|
|
|
|
# the results from the capturing groups. |
2692
|
|
|
|
|
|
|
# http://www.iana.org/assignments/language-subtag-registry |
2693
|
|
|
|
|
|
|
# Note that these have to be compared case insensitively, requiring (?i) below. |
2694
|
|
|
|
|
|
|
|
2695
|
|
|
|
|
|
|
my $grandfathered = '(?: |
2696
|
|
|
|
|
|
|
(en [_-] GB [_-] oed) |
2697
|
|
|
|
|
|
|
| (i [_-] (?: ami | bnn | default | enochian | hak | klingon | lux | mingo | navajo | pwn | tao | tay | tsu )) |
2698
|
|
|
|
|
|
|
| (no [_-] (?: bok | nyn )) |
2699
|
|
|
|
|
|
|
| (sgn [_-] (?: BE [_-] (?: fr | nl) | CH [_-] de )) |
2700
|
|
|
|
|
|
|
| (zh [_-] min [_-] nan) |
2701
|
|
|
|
|
|
|
)'; |
2702
|
|
|
|
|
|
|
|
2703
|
|
|
|
|
|
|
# old: | zh $s (?: cmn (?: $s Hans | $s Hant )? | gan | min (?: $s nan)? | wuu | yue ); |
2704
|
|
|
|
|
|
|
# For well-formedness, we don't need the ones that would otherwise pass. |
2705
|
|
|
|
|
|
|
# For validity, they need to be checked. |
2706
|
|
|
|
|
|
|
|
2707
|
|
|
|
|
|
|
# $grandfatheredWellFormed = (?: |
2708
|
|
|
|
|
|
|
# art $s lojban |
2709
|
|
|
|
|
|
|
# | cel $s gaulish |
2710
|
|
|
|
|
|
|
# | zh $s (?: guoyu | hakka | xiang ) |
2711
|
|
|
|
|
|
|
# ); |
2712
|
|
|
|
|
|
|
|
2713
|
|
|
|
|
|
|
# Unicode locales: but we are shifting to a compatible form |
2714
|
|
|
|
|
|
|
# $keyvalue = (?: $alphanum+ \= $alphanum+); |
2715
|
|
|
|
|
|
|
# $keywords = ($keyvalue (?: \; $keyvalue)*); |
2716
|
|
|
|
|
|
|
|
2717
|
|
|
|
|
|
|
# We separate items that we want to capture as a single group |
2718
|
|
|
|
|
|
|
|
2719
|
|
|
|
|
|
|
my $variantList = $variant . '(?:' . $s . $variant . ')*' ; # special for multiples |
2720
|
|
|
|
|
|
|
my $extensionList = $extension . '(?:' . $s . $extension . ')*' ; # special for multiples |
2721
|
|
|
|
|
|
|
|
2722
|
|
|
|
|
|
|
my $langtag = " |
2723
|
|
|
|
|
|
|
($language) |
2724
|
|
|
|
|
|
|
($s ( $script ) )? |
2725
|
|
|
|
|
|
|
($s ( $region ) )? |
2726
|
|
|
|
|
|
|
($s ( $variantList ) )? |
2727
|
|
|
|
|
|
|
($s ( $extensionList ) )? |
2728
|
|
|
|
|
|
|
($s ( $privateUse ) )? |
2729
|
|
|
|
|
|
|
"; |
2730
|
|
|
|
|
|
|
|
2731
|
|
|
|
|
|
|
# Here is the final breakdown, with capturing groups for each of these components |
2732
|
|
|
|
|
|
|
# The variants, extensions, grandfathered, and private-use may have interior '-' |
2733
|
|
|
|
|
|
|
|
2734
|
|
|
|
|
|
|
my $r = ($value_to_test =~ |
2735
|
|
|
|
|
|
|
/^( |
2736
|
|
|
|
|
|
|
($langtag) |
2737
|
|
|
|
|
|
|
| ($privateUse) |
2738
|
|
|
|
|
|
|
| ($grandfathered) |
2739
|
|
|
|
|
|
|
)$/xi); |
2740
|
|
|
|
|
|
|
return $r; |
2741
|
|
|
|
|
|
|
} |
2742
|
|
|
|
|
|
|
|
2743
|
|
|
|
|
|
|
sub _expand_curie |
2744
|
|
|
|
|
|
|
{ |
2745
|
|
|
|
|
|
|
my ($self, $token, %args) = @_; |
2746
|
|
|
|
|
|
|
my $r = $self->__expand_curie($token, %args); |
2747
|
|
|
|
|
|
|
|
2748
|
|
|
|
|
|
|
if (defined $self->{'sub'}->{'ontoken'}) |
2749
|
|
|
|
|
|
|
{ |
2750
|
|
|
|
|
|
|
return $self->{'sub'}->{'ontoken'}($self, $args{element}, $token, $r); |
2751
|
|
|
|
|
|
|
} |
2752
|
|
|
|
|
|
|
|
2753
|
|
|
|
|
|
|
return $r; |
2754
|
|
|
|
|
|
|
} |
2755
|
|
|
|
|
|
|
|
2756
|
|
|
|
|
|
|
sub __expand_curie |
2757
|
|
|
|
|
|
|
{ |
2758
|
|
|
|
|
|
|
my ($self, $token, %args) = @_; |
2759
|
|
|
|
|
|
|
|
2760
|
|
|
|
|
|
|
# Blank nodes |
2761
|
|
|
|
|
|
|
{ |
2762
|
|
|
|
|
|
|
my $bnode; |
2763
|
|
|
|
|
|
|
if ($token eq '_:' || $token eq '[_:]') |
2764
|
|
|
|
|
|
|
{ $bnode = $self->bnode(undef, undef, '_:'); } |
2765
|
|
|
|
|
|
|
elsif ($token =~ /^_:(.+)$/i || $token =~ /^\[_:(.+)\]$/i) |
2766
|
|
|
|
|
|
|
{ $bnode = $self->bnode(undef, undef, '_:'.$1); } |
2767
|
|
|
|
|
|
|
|
2768
|
|
|
|
|
|
|
if (defined $bnode) |
2769
|
|
|
|
|
|
|
{ |
2770
|
|
|
|
|
|
|
if ($args{'attribute'} =~ /^(rel|rev|property|datatype)$/i) |
2771
|
|
|
|
|
|
|
{ |
2772
|
|
|
|
|
|
|
$self->_log_error( |
2773
|
|
|
|
|
|
|
ERR_ERROR, |
2774
|
|
|
|
|
|
|
ERR_CODE_BNODE_WRONGPLACE, |
2775
|
|
|
|
|
|
|
"Blank node found in $args{attribute} where URIs are expected as values.", |
2776
|
|
|
|
|
|
|
token => $token, |
2777
|
|
|
|
|
|
|
element => $args{element}, |
2778
|
|
|
|
|
|
|
attribute => $args{attribute}, |
2779
|
|
|
|
|
|
|
); |
2780
|
|
|
|
|
|
|
|
2781
|
|
|
|
|
|
|
return $1 if $token =~ /^\[_:(.+)\]$/i; |
2782
|
|
|
|
|
|
|
return $token; |
2783
|
|
|
|
|
|
|
} |
2784
|
|
|
|
|
|
|
|
2785
|
|
|
|
|
|
|
return $bnode; |
2786
|
|
|
|
|
|
|
} |
2787
|
|
|
|
|
|
|
} |
2788
|
|
|
|
|
|
|
|
2789
|
|
|
|
|
|
|
my $is_safe = 0; |
2790
|
|
|
|
|
|
|
if ($token =~ /^\[(.*)\]$/) |
2791
|
|
|
|
|
|
|
{ |
2792
|
|
|
|
|
|
|
$is_safe = 1; |
2793
|
|
|
|
|
|
|
$token = $1; |
2794
|
|
|
|
|
|
|
} |
2795
|
|
|
|
|
|
|
|
2796
|
|
|
|
|
|
|
# CURIEs - default vocab |
2797
|
|
|
|
|
|
|
if ($token =~ /^($XML::RegExp::NCName)$/ |
2798
|
|
|
|
|
|
|
and ($is_safe || $args{'attribute'} =~ /^(rel|rev|property|typeof|datatype|role)$/i || $args{'allow_unsafe_default_vocab'})) |
2799
|
|
|
|
|
|
|
{ |
2800
|
|
|
|
|
|
|
my $suffix = $token; |
2801
|
|
|
|
|
|
|
|
2802
|
|
|
|
|
|
|
if ($args{'attribute'} eq 'role') |
2803
|
|
|
|
|
|
|
{ return 'http://www.w3.org/1999/xhtml/vocab#' . $suffix; } |
2804
|
|
|
|
|
|
|
elsif (defined $args{'prefixes'}{'(VOCAB)'}) |
2805
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'(VOCAB)'} . $suffix; } |
2806
|
|
|
|
|
|
|
|
2807
|
|
|
|
|
|
|
return undef if $is_safe; |
2808
|
|
|
|
|
|
|
} |
2809
|
|
|
|
|
|
|
|
2810
|
|
|
|
|
|
|
|
2811
|
|
|
|
|
|
|
# Keywords / terms / whatever-they're-called |
2812
|
|
|
|
|
|
|
if ($token =~ /^($XML::RegExp::NCName)$/ |
2813
|
|
|
|
|
|
|
and ($is_safe || $args{'attribute'} =~ /^(rel|rev|property|typeof|datatype|role)$/i || $args{'allow_unsafe_term'})) |
2814
|
|
|
|
|
|
|
{ |
2815
|
|
|
|
|
|
|
my $terms = $args{'terms'}; |
2816
|
|
|
|
|
|
|
my $attr = $args{'attribute'}; |
2817
|
|
|
|
|
|
|
|
2818
|
|
|
|
|
|
|
return $terms->{'sensitive'}{$attr}{$token} |
2819
|
|
|
|
|
|
|
if defined $terms->{'sensitive'}{ $attr }{$token}; |
2820
|
|
|
|
|
|
|
|
2821
|
|
|
|
|
|
|
return $terms->{'sensitive'}{'*'}{$token} |
2822
|
|
|
|
|
|
|
if defined $terms->{'sensitive'}{'*'}{$token}; |
2823
|
|
|
|
|
|
|
|
2824
|
|
|
|
|
|
|
return $terms->{'insensitive'}{$attr}{lc $token} |
2825
|
|
|
|
|
|
|
if defined $terms->{'insensitive'}{$attr}{lc $token}; |
2826
|
|
|
|
|
|
|
|
2827
|
|
|
|
|
|
|
return $terms->{'insensitive'}{'*'}{lc $token} |
2828
|
|
|
|
|
|
|
if defined $terms->{'insensitive'}{'*'}{lc $token}; |
2829
|
|
|
|
|
|
|
} |
2830
|
|
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
# CURIEs - prefixed |
2832
|
|
|
|
|
|
|
if ($token =~ /^($XML::RegExp::NCName)?:(\S*)$/ |
2833
|
|
|
|
|
|
|
and ( |
2834
|
|
|
|
|
|
|
$is_safe |
2835
|
|
|
|
|
|
|
or $args{attribute} =~ /^(rel|rev|property|typeof|datatype|role)$/i |
2836
|
|
|
|
|
|
|
or $self->{options}{safe_optional} |
2837
|
|
|
|
|
|
|
)) |
2838
|
|
|
|
|
|
|
{ |
2839
|
|
|
|
|
|
|
$token =~ /^($XML::RegExp::NCName)?:(\S*)$/; |
2840
|
|
|
|
|
|
|
my $prefix = (defined $1 && length $1) ? $1 : '(DEFAULT PREFIX)'; |
2841
|
|
|
|
|
|
|
my $suffix = $2; |
2842
|
|
|
|
|
|
|
|
2843
|
|
|
|
|
|
|
if (defined $args{'prefixes'}{'(DEFAULT PREFIX)'} && $prefix eq '(DEFAULT PREFIX)') |
2844
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'(DEFAULT PREFIX)'} . $suffix; } |
2845
|
|
|
|
|
|
|
elsif (defined $args{'prefixes'}{'sensitive'}{$prefix}) |
2846
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'sensitive'}{$prefix} . $suffix; } |
2847
|
|
|
|
|
|
|
elsif (defined $args{'prefixes'}{'insensitive'}{lc $prefix}) |
2848
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'insensitive'}{lc $prefix} . $suffix; } |
2849
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
if ($is_safe) |
2851
|
|
|
|
|
|
|
{ |
2852
|
|
|
|
|
|
|
$prefix = ($prefix eq '(DEFAULT PREFIX)') ? '' : $prefix; |
2853
|
|
|
|
|
|
|
$self->_log_error( |
2854
|
|
|
|
|
|
|
ERR_WARNING, |
2855
|
|
|
|
|
|
|
ERR_CODE_CURIE_UNDEFINED, |
2856
|
|
|
|
|
|
|
"CURIE '$token' used in safe CURIE, but '$prefix' is undefined.", |
2857
|
|
|
|
|
|
|
token => $token, |
2858
|
|
|
|
|
|
|
element => $args{element}, |
2859
|
|
|
|
|
|
|
attribute => $args{attribute}, |
2860
|
|
|
|
|
|
|
prefix => $prefix, |
2861
|
|
|
|
|
|
|
); |
2862
|
|
|
|
|
|
|
return undef; |
2863
|
|
|
|
|
|
|
} |
2864
|
|
|
|
|
|
|
} |
2865
|
|
|
|
|
|
|
|
2866
|
|
|
|
|
|
|
# CURIEs - bare prefixes |
2867
|
|
|
|
|
|
|
if ($self->{options}{prefix_bare} |
2868
|
|
|
|
|
|
|
and $token =~ /^($XML::RegExp::NCName)$/ |
2869
|
|
|
|
|
|
|
and ( |
2870
|
|
|
|
|
|
|
$is_safe |
2871
|
|
|
|
|
|
|
or $args{attribute} =~ /^(rel|rev|property|typeof|datatype|role)$/i |
2872
|
|
|
|
|
|
|
or $self->{options}{safe_optional} |
2873
|
|
|
|
|
|
|
)) |
2874
|
|
|
|
|
|
|
{ |
2875
|
|
|
|
|
|
|
my $prefix = $token; |
2876
|
|
|
|
|
|
|
my $suffix = ''; |
2877
|
|
|
|
|
|
|
|
2878
|
|
|
|
|
|
|
if (defined $args{'prefixes'}{'sensitive'}{$prefix}) |
2879
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'sensitive'}{$prefix} . $suffix; } |
2880
|
|
|
|
|
|
|
elsif (defined $args{'prefixes'}{'insensitive'}{lc $prefix}) |
2881
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'insensitive'}{lc $prefix} . $suffix; } |
2882
|
|
|
|
|
|
|
} |
2883
|
|
|
|
|
|
|
|
2884
|
|
|
|
|
|
|
# Absolute URIs |
2885
|
|
|
|
|
|
|
if ($token =~ /^[A-Z][A-Z0-9\.\+-]*:/i and !$is_safe |
2886
|
|
|
|
|
|
|
and ($self->{'options'}{'full_uris'} || $args{'attribute'} =~ /^(about|resource|graph)$/i)) |
2887
|
|
|
|
|
|
|
{ |
2888
|
|
|
|
|
|
|
return $token; |
2889
|
|
|
|
|
|
|
} |
2890
|
|
|
|
|
|
|
|
2891
|
|
|
|
|
|
|
# Relative URIs |
2892
|
|
|
|
|
|
|
if (!$is_safe and ($args{'attribute'} =~ /^(about|resource|graph)$/i || $args{'allow_relative'})) |
2893
|
|
|
|
|
|
|
{ |
2894
|
|
|
|
|
|
|
return $self->uri($token, {'element'=>$args{'element'}, 'xml_base'=>$args{'xml_base'}}); |
2895
|
|
|
|
|
|
|
} |
2896
|
|
|
|
|
|
|
|
2897
|
|
|
|
|
|
|
$self->_log_error( |
2898
|
|
|
|
|
|
|
ERR_WARNING, |
2899
|
|
|
|
|
|
|
ERR_CODE_CURIE_FELLTHROUGH, |
2900
|
|
|
|
|
|
|
"Couldn't make sense of token '$token'.", |
2901
|
|
|
|
|
|
|
token => $token, |
2902
|
|
|
|
|
|
|
element => $args{element}, |
2903
|
|
|
|
|
|
|
attribute => $args{attribute}, |
2904
|
|
|
|
|
|
|
); |
2905
|
|
|
|
|
|
|
|
2906
|
|
|
|
|
|
|
return undef; |
2907
|
|
|
|
|
|
|
} |
2908
|
|
|
|
|
|
|
|
2909
|
|
|
|
|
|
|
__PACKAGE__ |
2910
|
|
|
|
|
|
|
__END__ |
2911
|
|
|
|
|
|
|
|
2912
|
|
|
|
|
|
|
=head1 NAME |
2913
|
|
|
|
|
|
|
|
2914
|
|
|
|
|
|
|
RDF::RDFa::Parser - flexible RDFa parser |
2915
|
|
|
|
|
|
|
|
2916
|
|
|
|
|
|
|
=head1 SYNOPSIS |
2917
|
|
|
|
|
|
|
|
2918
|
|
|
|
|
|
|
If you're wanting to work with an RDF::Trine::Model that can be queried with SPARQL, etc: |
2919
|
|
|
|
|
|
|
|
2920
|
|
|
|
|
|
|
use RDF::RDFa::Parser; |
2921
|
|
|
|
|
|
|
my $url = 'http://example.com/document.html'; |
2922
|
|
|
|
|
|
|
my $options = RDF::RDFa::Parser::Config->new('xhtml', '1.1'); |
2923
|
|
|
|
|
|
|
my $rdfa = RDF::RDFa::Parser->new_from_url($url, $options); |
2924
|
|
|
|
|
|
|
my $model = $rdfa->graph; |
2925
|
|
|
|
|
|
|
|
2926
|
|
|
|
|
|
|
For dealing with local data: |
2927
|
|
|
|
|
|
|
|
2928
|
|
|
|
|
|
|
use RDF::RDFa::Parser; |
2929
|
|
|
|
|
|
|
my $base_url = 'http://example.com/document.html'; |
2930
|
|
|
|
|
|
|
my $options = RDF::RDFa::Parser::Config->new('xhtml', '1.1'); |
2931
|
|
|
|
|
|
|
my $rdfa = RDF::RDFa::Parser->new($markup, $base_url, $options); |
2932
|
|
|
|
|
|
|
my $model = $rdfa->graph; |
2933
|
|
|
|
|
|
|
|
2934
|
|
|
|
|
|
|
A simple set of operations for working with Open Graph Protocol data: |
2935
|
|
|
|
|
|
|
|
2936
|
|
|
|
|
|
|
use RDF::RDFa::Parser; |
2937
|
|
|
|
|
|
|
my $url = 'http://www.rottentomatoes.com/m/net/'; |
2938
|
|
|
|
|
|
|
my $options = RDF::RDFa::Parser::Config->tagsoup; |
2939
|
|
|
|
|
|
|
my $rdfa = RDF::RDFa::Parser->new_from_url($url, $options); |
2940
|
|
|
|
|
|
|
print $rdfa->opengraph('title') . "\n"; |
2941
|
|
|
|
|
|
|
print $rdfa->opengraph('image') . "\n"; |
2942
|
|
|
|
|
|
|
|
2943
|
|
|
|
|
|
|
=head1 DESCRIPTION |
2944
|
|
|
|
|
|
|
|
2945
|
|
|
|
|
|
|
L<RDF::TrineX::Parser::RDFa> provides a saner interface for this module. |
2946
|
|
|
|
|
|
|
If you are new to parsing RDFa with Perl, then that's the best place to |
2947
|
|
|
|
|
|
|
start. |
2948
|
|
|
|
|
|
|
|
2949
|
|
|
|
|
|
|
=head2 Forthcoming API Changes |
2950
|
|
|
|
|
|
|
|
2951
|
|
|
|
|
|
|
Some of the logic regarding host language and RDFa version guessing |
2952
|
|
|
|
|
|
|
is likely to be removed from RDF::RDFa::Parser and |
2953
|
|
|
|
|
|
|
RDF::RDFa::Parser::Config, and shifted into RDF::TrineX::Parser::RDFa |
2954
|
|
|
|
|
|
|
instead. |
2955
|
|
|
|
|
|
|
|
2956
|
|
|
|
|
|
|
=head2 Constructors |
2957
|
|
|
|
|
|
|
|
2958
|
|
|
|
|
|
|
=over 4 |
2959
|
|
|
|
|
|
|
|
2960
|
|
|
|
|
|
|
=item C<< $p = RDF::RDFa::Parser->new($markup, $base, [$config], [$storage]) >> |
2961
|
|
|
|
|
|
|
|
2962
|
|
|
|
|
|
|
This method creates a new RDF::RDFa::Parser object and returns it. |
2963
|
|
|
|
|
|
|
|
2964
|
|
|
|
|
|
|
The $markup variable may contain an XHTML/XML string, or a |
2965
|
|
|
|
|
|
|
XML::LibXML::Document. If a string, the document is parsed using |
2966
|
|
|
|
|
|
|
XML::LibXML::Parser or HTML::HTML5::Parser, depending on the |
2967
|
|
|
|
|
|
|
configuration in $config. XML well-formedness errors will cause the |
2968
|
|
|
|
|
|
|
function to die. |
2969
|
|
|
|
|
|
|
|
2970
|
|
|
|
|
|
|
$base is a URL used to resolve relative links found in the document. |
2971
|
|
|
|
|
|
|
|
2972
|
|
|
|
|
|
|
$config optionally holds an RDF::RDFa::Parser::Config object which |
2973
|
|
|
|
|
|
|
determines the set of rules used to parse the RDFa. It defaults to |
2974
|
|
|
|
|
|
|
XHTML+RDFa 1.1. |
2975
|
|
|
|
|
|
|
|
2976
|
|
|
|
|
|
|
B<Advanced usage note:> $storage optionally holds an RDF::Trine::Store |
2977
|
|
|
|
|
|
|
object. If undef, then a new temporary store is created. |
2978
|
|
|
|
|
|
|
|
2979
|
|
|
|
|
|
|
=item C<< $p = RDF::RDFa::Parser->new_from_url($url, [$config], [$storage]) >> |
2980
|
|
|
|
|
|
|
|
2981
|
|
|
|
|
|
|
=item C<< $p = RDF::RDFa::Parser->new_from_uri($url, [$config], [$storage]) >> |
2982
|
|
|
|
|
|
|
|
2983
|
|
|
|
|
|
|
$url is a URL to fetch and parse, or an HTTP::Response object. |
2984
|
|
|
|
|
|
|
|
2985
|
|
|
|
|
|
|
$config optionally holds an RDF::RDFa::Parser::Config object which |
2986
|
|
|
|
|
|
|
determines the set of rules used to parse the RDFa. The default is |
2987
|
|
|
|
|
|
|
to determine the configuration by looking at the HTTP response |
2988
|
|
|
|
|
|
|
Content-Type header; it's probably sensible to keep the default. |
2989
|
|
|
|
|
|
|
|
2990
|
|
|
|
|
|
|
$storage optionally holds an RDF::Trine::Store object. If undef, then |
2991
|
|
|
|
|
|
|
a new temporary store is created. |
2992
|
|
|
|
|
|
|
|
2993
|
|
|
|
|
|
|
This function can also be called as C<new_from_url> or C<new_from_uri>. |
2994
|
|
|
|
|
|
|
Same thing. |
2995
|
|
|
|
|
|
|
|
2996
|
|
|
|
|
|
|
=item C<< $p = RDF::RDFa::Parser->new_from_response($response, [$config], [$storage]) >> |
2997
|
|
|
|
|
|
|
|
2998
|
|
|
|
|
|
|
$response is an C<HTTP::Response> object. |
2999
|
|
|
|
|
|
|
|
3000
|
|
|
|
|
|
|
Otherwise the same as C<new_from_url>. |
3001
|
|
|
|
|
|
|
|
3002
|
|
|
|
|
|
|
=back |
3003
|
|
|
|
|
|
|
|
3004
|
|
|
|
|
|
|
=head2 Public Methods |
3005
|
|
|
|
|
|
|
|
3006
|
|
|
|
|
|
|
=over 4 |
3007
|
|
|
|
|
|
|
|
3008
|
|
|
|
|
|
|
=item C<< $p->graph >> |
3009
|
|
|
|
|
|
|
|
3010
|
|
|
|
|
|
|
This will return an RDF::Trine::Model containing all the RDFa |
3011
|
|
|
|
|
|
|
data found on the page. |
3012
|
|
|
|
|
|
|
|
3013
|
|
|
|
|
|
|
B<Advanced usage note:> If passed a graph URI as a parameter, |
3014
|
|
|
|
|
|
|
will return a single named graph from within the page. This |
3015
|
|
|
|
|
|
|
feature is only useful if you're using named graphs. |
3016
|
|
|
|
|
|
|
|
3017
|
|
|
|
|
|
|
=item C<< $p->graphs >> |
3018
|
|
|
|
|
|
|
|
3019
|
|
|
|
|
|
|
B<Advanced usage only.> |
3020
|
|
|
|
|
|
|
|
3021
|
|
|
|
|
|
|
Will return a hashref of all named graphs, where the graph name is a |
3022
|
|
|
|
|
|
|
key and the value is a RDF::Trine::Model tied to a temporary storage. |
3023
|
|
|
|
|
|
|
|
3024
|
|
|
|
|
|
|
This method is only useful if you're using named graphs. |
3025
|
|
|
|
|
|
|
|
3026
|
|
|
|
|
|
|
=item C<< $p->opengraph([$property]) >> |
3027
|
|
|
|
|
|
|
|
3028
|
|
|
|
|
|
|
If $property is provided, will return the value or list of values (if |
3029
|
|
|
|
|
|
|
called in list context) for that Open Graph Protocol property. (In pure |
3030
|
|
|
|
|
|
|
RDF terms, it returns the non-bnode objects of triples where the |
3031
|
|
|
|
|
|
|
subject is the document base URI; and the predicate is $property, |
3032
|
|
|
|
|
|
|
with non-URI $property strings taken as having the implicit prefix |
3033
|
|
|
|
|
|
|
'http://ogp.me/ns#'. There is no distinction between literal and |
3034
|
|
|
|
|
|
|
non-literal values; literal datatypes and languages are dropped.) |
3035
|
|
|
|
|
|
|
|
3036
|
|
|
|
|
|
|
If $property is omitted, returns a list of possible properties. |
3037
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
Example: |
3039
|
|
|
|
|
|
|
|
3040
|
|
|
|
|
|
|
foreach my $property (sort $p->opengraph) |
3041
|
|
|
|
|
|
|
{ |
3042
|
|
|
|
|
|
|
print "$property :\n"; |
3043
|
|
|
|
|
|
|
foreach my $val (sort $p->opengraph($property)) |
3044
|
|
|
|
|
|
|
{ |
3045
|
|
|
|
|
|
|
print " * $val\n"; |
3046
|
|
|
|
|
|
|
} |
3047
|
|
|
|
|
|
|
} |
3048
|
|
|
|
|
|
|
|
3049
|
|
|
|
|
|
|
See also: L<http://opengraphprotocol.org/>. |
3050
|
|
|
|
|
|
|
|
3051
|
|
|
|
|
|
|
=item C<< $p->dom >> |
3052
|
|
|
|
|
|
|
|
3053
|
|
|
|
|
|
|
Returns the parsed XML::LibXML::Document. |
3054
|
|
|
|
|
|
|
|
3055
|
|
|
|
|
|
|
=item C<< $p->uri( [$other_uri] ) >> |
3056
|
|
|
|
|
|
|
|
3057
|
|
|
|
|
|
|
Returns the base URI of the document being parsed. This will usually be the |
3058
|
|
|
|
|
|
|
same as the base URI provided to the constructor, but may differ if the |
3059
|
|
|
|
|
|
|
document contains a <base> HTML element. |
3060
|
|
|
|
|
|
|
|
3061
|
|
|
|
|
|
|
Optionally it may be passed a parameter - an absolute or relative URI - in |
3062
|
|
|
|
|
|
|
which case it returns the same URI which it was passed as a parameter, but |
3063
|
|
|
|
|
|
|
as an absolute URI, resolved relative to the document's base URI. |
3064
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
This seems like two unrelated functions, but if you consider the consequence |
3066
|
|
|
|
|
|
|
of passing a relative URI consisting of a zero-length string, it in fact makes |
3067
|
|
|
|
|
|
|
sense. |
3068
|
|
|
|
|
|
|
|
3069
|
|
|
|
|
|
|
=item C<< $p->errors >> |
3070
|
|
|
|
|
|
|
|
3071
|
|
|
|
|
|
|
Returns a list of errors and warnings that occurred during parsing. |
3072
|
|
|
|
|
|
|
|
3073
|
|
|
|
|
|
|
=item C<< $p->processor_graph >> |
3074
|
|
|
|
|
|
|
|
3075
|
|
|
|
|
|
|
As per C<< $p->errors >> but returns data as an RDF model. |
3076
|
|
|
|
|
|
|
|
3077
|
|
|
|
|
|
|
=item C<< $p->output_graph >> |
3078
|
|
|
|
|
|
|
|
3079
|
|
|
|
|
|
|
An alias for C<graph>, but does not accept a parameter. |
3080
|
|
|
|
|
|
|
|
3081
|
|
|
|
|
|
|
=item C<< $p->processor_and_output_graph >> |
3082
|
|
|
|
|
|
|
|
3083
|
|
|
|
|
|
|
Union of the above two graphs. |
3084
|
|
|
|
|
|
|
|
3085
|
|
|
|
|
|
|
=item C<< $p->consume >> |
3086
|
|
|
|
|
|
|
|
3087
|
|
|
|
|
|
|
B<Advanced usage only.> |
3088
|
|
|
|
|
|
|
|
3089
|
|
|
|
|
|
|
The document is parsed for RDFa. As of RDF::RDFa::Parser 1.09x, |
3090
|
|
|
|
|
|
|
this is called automatically when needed; you probably don't need |
3091
|
|
|
|
|
|
|
to touch it unless you're doing interesting things with callbacks. |
3092
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
Calling C<< $p->consume(survive => 1) >> will avoid crashing (e.g. |
3094
|
|
|
|
|
|
|
when the markup provided cannot be parsed), and instead make more |
3095
|
|
|
|
|
|
|
errors available in C<< $p->errors >>. |
3096
|
|
|
|
|
|
|
|
3097
|
|
|
|
|
|
|
=item C<< $p->set_callbacks(\%callbacks) >> |
3098
|
|
|
|
|
|
|
|
3099
|
|
|
|
|
|
|
B<Advanced usage only.> |
3100
|
|
|
|
|
|
|
|
3101
|
|
|
|
|
|
|
Set callback functions for the parser to call on certain events. These are only necessary if |
3102
|
|
|
|
|
|
|
you want to do something especially unusual. |
3103
|
|
|
|
|
|
|
|
3104
|
|
|
|
|
|
|
$p->set_callbacks({ |
3105
|
|
|
|
|
|
|
'pretriple_resource' => sub { ... } , |
3106
|
|
|
|
|
|
|
'pretriple_literal' => sub { ... } , |
3107
|
|
|
|
|
|
|
'ontriple' => undef , |
3108
|
|
|
|
|
|
|
'onprefix' => \&some_function , |
3109
|
|
|
|
|
|
|
}); |
3110
|
|
|
|
|
|
|
|
3111
|
|
|
|
|
|
|
Either of the two pretriple callbacks can be set to the string 'print' instead of a coderef. |
3112
|
|
|
|
|
|
|
This enables built-in callbacks for printing Turtle to STDOUT. |
3113
|
|
|
|
|
|
|
|
3114
|
|
|
|
|
|
|
For details of the callback functions, see the section CALLBACKS. If used, C<set_callbacks> |
3115
|
|
|
|
|
|
|
must be called I<before> C<consume>. C<set_callbacks> returns a reference to the parser |
3116
|
|
|
|
|
|
|
object itself. |
3117
|
|
|
|
|
|
|
|
3118
|
|
|
|
|
|
|
=item C<< $p->element_subjects >> |
3119
|
|
|
|
|
|
|
|
3120
|
|
|
|
|
|
|
B<Advanced usage only.> |
3121
|
|
|
|
|
|
|
|
3122
|
|
|
|
|
|
|
Gets/sets a hashref of { xpath => RDF::Trine::Node } mappings. |
3123
|
|
|
|
|
|
|
|
3124
|
|
|
|
|
|
|
This is not touched during normal RDFa parsing, only being used by the @role and |
3125
|
|
|
|
|
|
|
@cite features where RDF resources (i.e. URIs and blank nodes) are needed to |
3126
|
|
|
|
|
|
|
represent XML elements themselves. |
3127
|
|
|
|
|
|
|
|
3128
|
|
|
|
|
|
|
=back |
3129
|
|
|
|
|
|
|
|
3130
|
|
|
|
|
|
|
=head1 CALLBACKS |
3131
|
|
|
|
|
|
|
|
3132
|
|
|
|
|
|
|
Several callback functions are provided. These may be set using the C<set_callbacks> function, |
3133
|
|
|
|
|
|
|
which takes a hashref of keys pointing to coderefs. The keys are named for the event to fire the |
3134
|
|
|
|
|
|
|
callback on. |
3135
|
|
|
|
|
|
|
|
3136
|
|
|
|
|
|
|
=head2 ontriple |
3137
|
|
|
|
|
|
|
|
3138
|
|
|
|
|
|
|
This is called once a triple is ready to be added to the graph. (After the pretriple |
3139
|
|
|
|
|
|
|
callbacks.) The parameters passed to the callback function are: |
3140
|
|
|
|
|
|
|
|
3141
|
|
|
|
|
|
|
=over 4 |
3142
|
|
|
|
|
|
|
|
3143
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
3144
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
=item * A hashref of relevant C<XML::LibXML::Element> objects (subject, predicate, object, graph, current) |
3146
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
=item * An RDF::Trine::Statement object. |
3148
|
|
|
|
|
|
|
|
3149
|
|
|
|
|
|
|
=back |
3150
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
The callback should return 1 to tell the parser to skip this triple (not add it to |
3152
|
|
|
|
|
|
|
the graph); return 0 otherwise. The callback may modify the RDF::Trine::Statement |
3153
|
|
|
|
|
|
|
object. |
3154
|
|
|
|
|
|
|
|
3155
|
|
|
|
|
|
|
=head2 onprefix |
3156
|
|
|
|
|
|
|
|
3157
|
|
|
|
|
|
|
This is called when a new CURIE prefix is discovered. The parameters passed |
3158
|
|
|
|
|
|
|
to the callback function are: |
3159
|
|
|
|
|
|
|
|
3160
|
|
|
|
|
|
|
=over 4 |
3161
|
|
|
|
|
|
|
|
3162
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
3163
|
|
|
|
|
|
|
|
3164
|
|
|
|
|
|
|
=item * A reference to the C<XML::LibXML::Element> being parsed |
3165
|
|
|
|
|
|
|
|
3166
|
|
|
|
|
|
|
=item * The prefix (string, e.g. "foaf") |
3167
|
|
|
|
|
|
|
|
3168
|
|
|
|
|
|
|
=item * The expanded URI (string, e.g. "http://xmlns.com/foaf/0.1/") |
3169
|
|
|
|
|
|
|
|
3170
|
|
|
|
|
|
|
=back |
3171
|
|
|
|
|
|
|
|
3172
|
|
|
|
|
|
|
The return value of this callback is currently ignored, but you should return |
3173
|
|
|
|
|
|
|
0 in case future versions of this module assign significance to the return value. |
3174
|
|
|
|
|
|
|
|
3175
|
|
|
|
|
|
|
=head2 ontoken |
3176
|
|
|
|
|
|
|
|
3177
|
|
|
|
|
|
|
This is called when a CURIE or term has been expanded. The parameters are: |
3178
|
|
|
|
|
|
|
|
3179
|
|
|
|
|
|
|
=over 4 |
3180
|
|
|
|
|
|
|
|
3181
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
3182
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
=item * A reference to the C<XML::LibXML::Element> being parsed |
3184
|
|
|
|
|
|
|
|
3185
|
|
|
|
|
|
|
=item * The CURIE or token as a string (e.g. "foaf:name" or "Stylesheet") |
3186
|
|
|
|
|
|
|
|
3187
|
|
|
|
|
|
|
=item * The fully expanded URI |
3188
|
|
|
|
|
|
|
|
3189
|
|
|
|
|
|
|
=back |
3190
|
|
|
|
|
|
|
|
3191
|
|
|
|
|
|
|
The callback function must return a fully expanded URI, or if it |
3192
|
|
|
|
|
|
|
wants the CURIE to be ignored, undef. |
3193
|
|
|
|
|
|
|
|
3194
|
|
|
|
|
|
|
=head2 onerror |
3195
|
|
|
|
|
|
|
|
3196
|
|
|
|
|
|
|
This is called when an error occurs: |
3197
|
|
|
|
|
|
|
|
3198
|
|
|
|
|
|
|
=over 4 |
3199
|
|
|
|
|
|
|
|
3200
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
3201
|
|
|
|
|
|
|
|
3202
|
|
|
|
|
|
|
=item * The error level (RDF::RDFa::Parser::ERR_ERROR or |
3203
|
|
|
|
|
|
|
RDF::RDFa::Parser::ERR_WARNING) |
3204
|
|
|
|
|
|
|
|
3205
|
|
|
|
|
|
|
=item * An error code |
3206
|
|
|
|
|
|
|
|
3207
|
|
|
|
|
|
|
=item * An error message |
3208
|
|
|
|
|
|
|
|
3209
|
|
|
|
|
|
|
=item * A hash of other information |
3210
|
|
|
|
|
|
|
|
3211
|
|
|
|
|
|
|
=back |
3212
|
|
|
|
|
|
|
|
3213
|
|
|
|
|
|
|
The return value of this callback is currently ignored, but you should return |
3214
|
|
|
|
|
|
|
0 in case future versions of this module assign significance to the return value. |
3215
|
|
|
|
|
|
|
|
3216
|
|
|
|
|
|
|
If you do not define an onerror callback, then errors will be output via STDERR |
3217
|
|
|
|
|
|
|
and warnings will be silent. Either way, you can retrieve errors after parsing |
3218
|
|
|
|
|
|
|
using the C<errors> method. |
3219
|
|
|
|
|
|
|
|
3220
|
|
|
|
|
|
|
=head2 pretriple_resource |
3221
|
|
|
|
|
|
|
|
3222
|
|
|
|
|
|
|
B<This callback is deprecated - use ontriple instead.> |
3223
|
|
|
|
|
|
|
|
3224
|
|
|
|
|
|
|
This is called when a triple has been found, but before preparing the triple for |
3225
|
|
|
|
|
|
|
adding to the model. It is only called for triples with a non-literal object value. |
3226
|
|
|
|
|
|
|
|
3227
|
|
|
|
|
|
|
The parameters passed to the callback function are: |
3228
|
|
|
|
|
|
|
|
3229
|
|
|
|
|
|
|
=over 4 |
3230
|
|
|
|
|
|
|
|
3231
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
3232
|
|
|
|
|
|
|
|
3233
|
|
|
|
|
|
|
=item * A reference to the C<XML::LibXML::Element> being parsed |
3234
|
|
|
|
|
|
|
|
3235
|
|
|
|
|
|
|
=item * Subject URI or bnode (string) |
3236
|
|
|
|
|
|
|
|
3237
|
|
|
|
|
|
|
=item * Predicate URI (string) |
3238
|
|
|
|
|
|
|
|
3239
|
|
|
|
|
|
|
=item * Object URI or bnode (string) |
3240
|
|
|
|
|
|
|
|
3241
|
|
|
|
|
|
|
=item * Graph URI or bnode (string or undef) |
3242
|
|
|
|
|
|
|
|
3243
|
|
|
|
|
|
|
=back |
3244
|
|
|
|
|
|
|
|
3245
|
|
|
|
|
|
|
The callback should return 1 to tell the parser to skip this triple (not add it to |
3246
|
|
|
|
|
|
|
the graph); return 0 otherwise. |
3247
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
=head2 pretriple_literal |
3249
|
|
|
|
|
|
|
|
3250
|
|
|
|
|
|
|
B<This callback is deprecated - use ontriple instead.> |
3251
|
|
|
|
|
|
|
|
3252
|
|
|
|
|
|
|
This is the equivalent of pretriple_resource, but is only called for triples with a |
3253
|
|
|
|
|
|
|
literal object value. |
3254
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
The parameters passed to the callback function are: |
3256
|
|
|
|
|
|
|
|
3257
|
|
|
|
|
|
|
=over 4 |
3258
|
|
|
|
|
|
|
|
3259
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
3260
|
|
|
|
|
|
|
|
3261
|
|
|
|
|
|
|
=item * A reference to the C<XML::LibXML::Element> being parsed |
3262
|
|
|
|
|
|
|
|
3263
|
|
|
|
|
|
|
=item * Subject URI or bnode (string) |
3264
|
|
|
|
|
|
|
|
3265
|
|
|
|
|
|
|
=item * Predicate URI (string) |
3266
|
|
|
|
|
|
|
|
3267
|
|
|
|
|
|
|
=item * Object literal (string) |
3268
|
|
|
|
|
|
|
|
3269
|
|
|
|
|
|
|
=item * Datatype URI (string or undef) |
3270
|
|
|
|
|
|
|
|
3271
|
|
|
|
|
|
|
=item * Language (string or undef) |
3272
|
|
|
|
|
|
|
|
3273
|
|
|
|
|
|
|
=item * Graph URI or bnode (string or undef) |
3274
|
|
|
|
|
|
|
|
3275
|
|
|
|
|
|
|
=back |
3276
|
|
|
|
|
|
|
|
3277
|
|
|
|
|
|
|
Beware: sometimes both a datatype I<and> a language will be passed. |
3278
|
|
|
|
|
|
|
This goes beyond the normal RDF data model.) |
3279
|
|
|
|
|
|
|
|
3280
|
|
|
|
|
|
|
The callback should return 1 to tell the parser to skip this triple (not add it to |
3281
|
|
|
|
|
|
|
the graph); return 0 otherwise. |
3282
|
|
|
|
|
|
|
|
3283
|
|
|
|
|
|
|
=head1 FEATURES |
3284
|
|
|
|
|
|
|
|
3285
|
|
|
|
|
|
|
Most features are configurable using L<RDF::RDFa::Parser::Config>. |
3286
|
|
|
|
|
|
|
|
3287
|
|
|
|
|
|
|
=head2 RDFa Versions |
3288
|
|
|
|
|
|
|
|
3289
|
|
|
|
|
|
|
RDF::RDFa::Parser supports RDFa versions 1.0 and 1.1. |
3290
|
|
|
|
|
|
|
|
3291
|
|
|
|
|
|
|
1.1 is currently a moving target; support is experimental. |
3292
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
1.1 is the default, but this can be configured using RDF::RDFa::Parser::Config. |
3294
|
|
|
|
|
|
|
|
3295
|
|
|
|
|
|
|
=head2 Host Languages |
3296
|
|
|
|
|
|
|
|
3297
|
|
|
|
|
|
|
RDF::RDFa::Parser supports various different RDFa host languages: |
3298
|
|
|
|
|
|
|
|
3299
|
|
|
|
|
|
|
=over 4 |
3300
|
|
|
|
|
|
|
|
3301
|
|
|
|
|
|
|
=item * B<XHTML> |
3302
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
As per the XHTML+RDFa 1.0 and XHTML+RDFa 1.1 specifications. |
3304
|
|
|
|
|
|
|
|
3305
|
|
|
|
|
|
|
=item * B<HTML 4> |
3306
|
|
|
|
|
|
|
|
3307
|
|
|
|
|
|
|
Uses an HTML5 (sic) parser; uses @lang instead of @xml:lang; keeps prefixes |
3308
|
|
|
|
|
|
|
and terms case-insensitive; recognises the @rel relations defined in the HTML |
3309
|
|
|
|
|
|
|
4 specification. Otherwise the same as XHTML. |
3310
|
|
|
|
|
|
|
|
3311
|
|
|
|
|
|
|
=item * B<HTML5> |
3312
|
|
|
|
|
|
|
|
3313
|
|
|
|
|
|
|
Uses an HTML5 parser; uses @lang as well as @xml:lang; keeps prefixes |
3314
|
|
|
|
|
|
|
and terms case-insensitive; recognises the @rel relations defined in the HTML5 |
3315
|
|
|
|
|
|
|
draft specification. Otherwise the same as XHTML. |
3316
|
|
|
|
|
|
|
|
3317
|
|
|
|
|
|
|
=item * B<XML> |
3318
|
|
|
|
|
|
|
|
3319
|
|
|
|
|
|
|
This is implemented as per the RDFa Core 1.1 specification. There is also |
3320
|
|
|
|
|
|
|
support for "RDFa Core 1.0", for which no specification exists, but has been |
3321
|
|
|
|
|
|
|
reverse-engineered by applying the differences between XHTML+RDFa 1.1 and |
3322
|
|
|
|
|
|
|
RDFa Core 1.1 to the XHTML+RDFa 1.0 specification. |
3323
|
|
|
|
|
|
|
|
3324
|
|
|
|
|
|
|
Embedded chunks of RDF/XML within XML are supported. |
3325
|
|
|
|
|
|
|
|
3326
|
|
|
|
|
|
|
=item * B<SVG> |
3327
|
|
|
|
|
|
|
|
3328
|
|
|
|
|
|
|
For now, a synonym for XML. |
3329
|
|
|
|
|
|
|
|
3330
|
|
|
|
|
|
|
=item * B<Atom> |
3331
|
|
|
|
|
|
|
|
3332
|
|
|
|
|
|
|
The E<lt>feedE<gt> and E<lt>entryE<gt> elements are treated specially, setting |
3333
|
|
|
|
|
|
|
a new subject; IANA-registered rel keywords are recognised. |
3334
|
|
|
|
|
|
|
|
3335
|
|
|
|
|
|
|
By passing C<< atom_parser=>1 >> as a Config option, you can also handle |
3336
|
|
|
|
|
|
|
Atom's native semantics. (Uses L<XML::Atom::OWL>. If this module is not installed, |
3337
|
|
|
|
|
|
|
this option is silently ignored.) |
3338
|
|
|
|
|
|
|
|
3339
|
|
|
|
|
|
|
Otherwise, the same as XML. |
3340
|
|
|
|
|
|
|
|
3341
|
|
|
|
|
|
|
=item * B<DataRSS> |
3342
|
|
|
|
|
|
|
|
3343
|
|
|
|
|
|
|
Defines some default prefixes. Otherwise, the same as Atom. |
3344
|
|
|
|
|
|
|
|
3345
|
|
|
|
|
|
|
=item * B<OpenDocument XML> |
3346
|
|
|
|
|
|
|
|
3347
|
|
|
|
|
|
|
That is, XML content formatted along the lines of 'content.xml' in OpenDocument |
3348
|
|
|
|
|
|
|
files. |
3349
|
|
|
|
|
|
|
|
3350
|
|
|
|
|
|
|
Supports OpenDocument bookmarked ranges used as typed or plain object literals |
3351
|
|
|
|
|
|
|
(though not XML literals); expects RDFa attributes in the XHTML namespace |
3352
|
|
|
|
|
|
|
instead of in no namespace. Otherwise, the same as XML. |
3353
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
=item * B<OpenDocument> |
3355
|
|
|
|
|
|
|
|
3356
|
|
|
|
|
|
|
That is, a ZIP file containing OpenDocument XML files. RDF::RDFa::Parser |
3357
|
|
|
|
|
|
|
will do all the unzipping and combining for you, so you don't have to. |
3358
|
|
|
|
|
|
|
The unregistered "jar:" URI scheme is used to refer to files within the ZIP. |
3359
|
|
|
|
|
|
|
|
3360
|
|
|
|
|
|
|
=back |
3361
|
|
|
|
|
|
|
|
3362
|
|
|
|
|
|
|
=head2 Embedded RDF/XML |
3363
|
|
|
|
|
|
|
|
3364
|
|
|
|
|
|
|
Though a rarely used feature, XHTML allows other XML markup languages |
3365
|
|
|
|
|
|
|
to be directly embedded into it. In particular, chunks of RDF/XML can |
3366
|
|
|
|
|
|
|
be included in XHTML. While this is not common in XHTML, it's seen quite |
3367
|
|
|
|
|
|
|
often in SVG and other XML markup languages. |
3368
|
|
|
|
|
|
|
|
3369
|
|
|
|
|
|
|
When RDF::RDFa::Parser encounters a chunk of RDF/XML in a document |
3370
|
|
|
|
|
|
|
it's parsing (i.e. an element called 'RDF' with namespace |
3371
|
|
|
|
|
|
|
'http://www.w3.org/1999/02/22-rdf-syntax-ns#'), there are three different |
3372
|
|
|
|
|
|
|
courses of action it can take: |
3373
|
|
|
|
|
|
|
|
3374
|
|
|
|
|
|
|
=over 4 |
3375
|
|
|
|
|
|
|
|
3376
|
|
|
|
|
|
|
=item 0. Continue straight through it. |
3377
|
|
|
|
|
|
|
|
3378
|
|
|
|
|
|
|
This is the behaviour that XHTML+RDFa seems to suggest is the right |
3379
|
|
|
|
|
|
|
option. It should mostly not do any harm: triples encoded in RDF/XML |
3380
|
|
|
|
|
|
|
will be generally ignored (though the chunk itself could theoretically |
3381
|
|
|
|
|
|
|
end up as part of an XML literal). It will waste a bit of time though. |
3382
|
|
|
|
|
|
|
|
3383
|
|
|
|
|
|
|
=item 1. Parse the RDF/XML. |
3384
|
|
|
|
|
|
|
|
3385
|
|
|
|
|
|
|
The parser will parse the RDF/XML properly. If named graphs are |
3386
|
|
|
|
|
|
|
enabled, any triples will be added to a separate graph. This is |
3387
|
|
|
|
|
|
|
the behaviour that SVG Tiny 1.2 seems to suggest is the correct |
3388
|
|
|
|
|
|
|
thing to do. |
3389
|
|
|
|
|
|
|
|
3390
|
|
|
|
|
|
|
=item 2. Skip the chunk. |
3391
|
|
|
|
|
|
|
|
3392
|
|
|
|
|
|
|
This will skip over the RDF element entirely, and thus save you a |
3393
|
|
|
|
|
|
|
bit of time. |
3394
|
|
|
|
|
|
|
|
3395
|
|
|
|
|
|
|
=back |
3396
|
|
|
|
|
|
|
|
3397
|
|
|
|
|
|
|
You can decide which path to take by setting the 'embedded_rdfxml' |
3398
|
|
|
|
|
|
|
Config option. For HTML and XHTML, you probably want |
3399
|
|
|
|
|
|
|
to set embedded_rdfxml to '0' (the default) or '2' (a little faster). |
3400
|
|
|
|
|
|
|
For other XML markup languages (e.g. SVG or Atom), then you probably want to |
3401
|
|
|
|
|
|
|
set it to '1'. |
3402
|
|
|
|
|
|
|
|
3403
|
|
|
|
|
|
|
(There's also an option '3' which controls how embedded RDF/XML interacts |
3404
|
|
|
|
|
|
|
with named graphs, but this is only really intended for internal use, parsing |
3405
|
|
|
|
|
|
|
OpenDocument.) |
3406
|
|
|
|
|
|
|
|
3407
|
|
|
|
|
|
|
=head2 Named Graphs |
3408
|
|
|
|
|
|
|
|
3409
|
|
|
|
|
|
|
The parser has support for named graphs within a single RDFa |
3410
|
|
|
|
|
|
|
document. To switch this on, use the 'graph' Config option. |
3411
|
|
|
|
|
|
|
|
3412
|
|
|
|
|
|
|
See also L<http://buzzword.org.uk/2009/rdfa4/spec>. |
3413
|
|
|
|
|
|
|
|
3414
|
|
|
|
|
|
|
The name of the attribute which indicates graph URIs is by |
3415
|
|
|
|
|
|
|
default 'graph', but can be changed using the 'graph_attr' |
3416
|
|
|
|
|
|
|
Config option. This option accepts Clark Notation to specify a |
3417
|
|
|
|
|
|
|
namespaced attribute. By default, the attribute value is |
3418
|
|
|
|
|
|
|
interpreted as like the 'about' attribute (i.e. CURIEs, URIs, etc), |
3419
|
|
|
|
|
|
|
but if you set the 'graph_type' Config option to 'id', |
3420
|
|
|
|
|
|
|
it will be treated as setting a fragment identifier (like the 'id' |
3421
|
|
|
|
|
|
|
attribute). |
3422
|
|
|
|
|
|
|
|
3423
|
|
|
|
|
|
|
The 'graph_default' Config option allows you to set the default |
3424
|
|
|
|
|
|
|
graph URI/bnode identifier. |
3425
|
|
|
|
|
|
|
|
3426
|
|
|
|
|
|
|
Once you're using named graphs, the C<graphs> method becomes |
3427
|
|
|
|
|
|
|
useful: it returns a hashref of { graph_uri => trine_model } pairs. |
3428
|
|
|
|
|
|
|
The optional parameter to the C<graph> method also becomes useful. |
3429
|
|
|
|
|
|
|
|
3430
|
|
|
|
|
|
|
OpenDocument (ZIP) host language support makes internal use |
3431
|
|
|
|
|
|
|
of named graphs, so if you're parsing OpenDocument, tinker with |
3432
|
|
|
|
|
|
|
the graph Config options at your own risk! |
3433
|
|
|
|
|
|
|
|
3434
|
|
|
|
|
|
|
=head2 Auto Config |
3435
|
|
|
|
|
|
|
|
3436
|
|
|
|
|
|
|
RDF::RDFa::Parser has a lot of different Config options to play with. Sometimes it |
3437
|
|
|
|
|
|
|
might be useful to allow the page being parsed to control some of these options. |
3438
|
|
|
|
|
|
|
If you switch on the 'auto_config' Config option, pages can do this. |
3439
|
|
|
|
|
|
|
|
3440
|
|
|
|
|
|
|
A page can set options using a specially crafted E<lt>metaE<gt> tag: |
3441
|
|
|
|
|
|
|
|
3442
|
|
|
|
|
|
|
<meta name="http://search.cpan.org/dist/RDF-RDFa-Parser/#auto_config" |
3443
|
|
|
|
|
|
|
content="xhtml_lang=1&xml_lang=0" /> |
3444
|
|
|
|
|
|
|
|
3445
|
|
|
|
|
|
|
Note that the C<content> attribute is an application/x-www-form-urlencoded |
3446
|
|
|
|
|
|
|
string (which must then be HTML-escaped of course). Semicolons may be used |
3447
|
|
|
|
|
|
|
instead of ampersands, as these tend to look nicer: |
3448
|
|
|
|
|
|
|
|
3449
|
|
|
|
|
|
|
<meta name="http://search.cpan.org/dist/RDF-RDFa-Parser/#auto_config" |
3450
|
|
|
|
|
|
|
content="xhtml_lang=1;xml_lang=0" /> |
3451
|
|
|
|
|
|
|
|
3452
|
|
|
|
|
|
|
It's possible to use auto config outside XHTML (e.g. in Atom or |
3453
|
|
|
|
|
|
|
SVG) using namespaces: |
3454
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
<xhtml:meta xmlns:xhtml="http://www.w3.org/1999/xhtml" |
3456
|
|
|
|
|
|
|
name="http://search.cpan.org/dist/RDF-RDFa-Parser/#auto_config" |
3457
|
|
|
|
|
|
|
content="xhtml_lang=0;xml_base=2;atom_elements=1" /> |
3458
|
|
|
|
|
|
|
|
3459
|
|
|
|
|
|
|
Any Config option may be given using auto config, except 'use_rtnlx', 'dom_parser', |
3460
|
|
|
|
|
|
|
and of course 'auto_config' itself. |
3461
|
|
|
|
|
|
|
|
3462
|
|
|
|
|
|
|
=head2 Profiles |
3463
|
|
|
|
|
|
|
|
3464
|
|
|
|
|
|
|
Support for Profiles (an experimental RDFa 1.1 feature) was added in |
3465
|
|
|
|
|
|
|
version 1.09_00, but dropped after version 1.096, because the feature |
3466
|
|
|
|
|
|
|
was removed from draft specs. |
3467
|
|
|
|
|
|
|
|
3468
|
|
|
|
|
|
|
=head1 BUGS |
3469
|
|
|
|
|
|
|
|
3470
|
|
|
|
|
|
|
RDF::RDFa::Parser 0.21 passed all approved tests in the XHTML+RDFa |
3471
|
|
|
|
|
|
|
test suite at the time of its release. |
3472
|
|
|
|
|
|
|
|
3473
|
|
|
|
|
|
|
RDF::RDFa::Parser 0.22 (used in conjunction with HTML::HTML5::Parser |
3474
|
|
|
|
|
|
|
0.01 and HTML::HTML5::Sanity 0.01) additionally passes all approved |
3475
|
|
|
|
|
|
|
tests in the HTML4+RDFa and HTML5+RDFa test suites at the time of |
3476
|
|
|
|
|
|
|
its release; except test cases 0113 and 0121, which the author of |
3477
|
|
|
|
|
|
|
this module believes mandate incorrect HTML parsing. |
3478
|
|
|
|
|
|
|
|
3479
|
|
|
|
|
|
|
RDF::RDFa::Parser 1.096_01 passes all approved tests on the default |
3480
|
|
|
|
|
|
|
graph (not the processor graph) in the RDFa 1.1 test suite for language |
3481
|
|
|
|
|
|
|
versions 1.0 and host languages xhtml1, html4 and html5, with the |
3482
|
|
|
|
|
|
|
following exceptions which are skipped: |
3483
|
|
|
|
|
|
|
|
3484
|
|
|
|
|
|
|
=over |
3485
|
|
|
|
|
|
|
|
3486
|
|
|
|
|
|
|
=item * B<0140> - wilful violation, pending proof that the test is backed up by the spec. |
3487
|
|
|
|
|
|
|
|
3488
|
|
|
|
|
|
|
=item * B<0198> - an XML canonicalisation test that may be dropped in the future. |
3489
|
|
|
|
|
|
|
|
3490
|
|
|
|
|
|
|
=item * B<0212> - wilful violation, as passing this test would require regressing on the old RDFa 1.0 test suite. |
3491
|
|
|
|
|
|
|
|
3492
|
|
|
|
|
|
|
=item * B<0251> to B<0256> pass with RDFa 1.1 and are skipped in RDFa 1.0 because they use RDFa-1.1-specific syntax. |
3493
|
|
|
|
|
|
|
|
3494
|
|
|
|
|
|
|
=item * B<0256> is additionally skipped in HTML4 mode, as the author believes xml:lang should be ignored in HTML versions prior to HTML5. |
3495
|
|
|
|
|
|
|
|
3496
|
|
|
|
|
|
|
=item * B<0303> - wilful violation, as this feature is simply awful. |
3497
|
|
|
|
|
|
|
|
3498
|
|
|
|
|
|
|
=back |
3499
|
|
|
|
|
|
|
|
3500
|
|
|
|
|
|
|
Please report any bugs to L<http://rt.cpan.org/>. |
3501
|
|
|
|
|
|
|
|
3502
|
|
|
|
|
|
|
Common gotchas: |
3503
|
|
|
|
|
|
|
|
3504
|
|
|
|
|
|
|
=over 8 |
3505
|
|
|
|
|
|
|
|
3506
|
|
|
|
|
|
|
=item * Are you using the XML catalogue? |
3507
|
|
|
|
|
|
|
|
3508
|
|
|
|
|
|
|
RDF::RDFa::Parser maintains a locally cached version of the XHTML+RDFa |
3509
|
|
|
|
|
|
|
DTD. This will normally be within your Perl module directory, in a subdirectory |
3510
|
|
|
|
|
|
|
named "auto/share/dist/RDF-RDFa-Parser/catalogue/". |
3511
|
|
|
|
|
|
|
If this is missing, the parser should still work, but will be very slow. |
3512
|
|
|
|
|
|
|
|
3513
|
|
|
|
|
|
|
=back |
3514
|
|
|
|
|
|
|
|
3515
|
|
|
|
|
|
|
=head1 SEE ALSO |
3516
|
|
|
|
|
|
|
|
3517
|
|
|
|
|
|
|
L<RDF::TrineX::Parser::RDFa> provides a saner interface for this module. |
3518
|
|
|
|
|
|
|
|
3519
|
|
|
|
|
|
|
L<RDF::RDFa::Parser::Config>. |
3520
|
|
|
|
|
|
|
|
3521
|
|
|
|
|
|
|
L<XML::LibXML>, L<RDF::Trine>, L<HTML::HTML5::Parser>, L<HTML::HTML5::Sanity>, |
3522
|
|
|
|
|
|
|
L<RDF::RDFa::Generator>, L<RDF::RDFa::Linter>. |
3523
|
|
|
|
|
|
|
|
3524
|
|
|
|
|
|
|
L<http://www.perlrdf.org/>, L<http://rdfa.info>. |
3525
|
|
|
|
|
|
|
|
3526
|
|
|
|
|
|
|
=head1 AUTHOR |
3527
|
|
|
|
|
|
|
|
3528
|
|
|
|
|
|
|
Toby Inkster E<lt>tobyink@cpan.orgE<gt>. |
3529
|
|
|
|
|
|
|
|
3530
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
3531
|
|
|
|
|
|
|
|
3532
|
|
|
|
|
|
|
Kjetil Kjernsmo E<lt>kjetilk@cpan.orgE<gt> wrote much of the stuff for |
3533
|
|
|
|
|
|
|
building RDF::Trine models. Neubert Joachim taught me to use XML |
3534
|
|
|
|
|
|
|
catalogues, which massively speeds up parsing of XHTML files that have |
3535
|
|
|
|
|
|
|
DTDs. |
3536
|
|
|
|
|
|
|
|
3537
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENCE |
3538
|
|
|
|
|
|
|
|
3539
|
|
|
|
|
|
|
Copyright 2008-2012 Toby Inkster |
3540
|
|
|
|
|
|
|
|
3541
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
3542
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
3543
|
|
|
|
|
|
|
|
3544
|
|
|
|
|
|
|
=head1 DISCLAIMER OF WARRANTIES |
3545
|
|
|
|
|
|
|
|
3546
|
|
|
|
|
|
|
THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED |
3547
|
|
|
|
|
|
|
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF |
3548
|
|
|
|
|
|
|
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. |