| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package RDF::RDFa::Parser; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
BEGIN { |
|
4
|
9
|
|
|
9
|
|
306502
|
$RDF::RDFa::Parser::AUTHORITY = 'cpan:TOBYINK'; |
|
5
|
9
|
|
|
|
|
241
|
$RDF::RDFa::Parser::VERSION = '1.097'; |
|
6
|
|
|
|
|
|
|
} |
|
7
|
|
|
|
|
|
|
|
|
8
|
9
|
|
|
9
|
|
85
|
use Carp qw(); |
|
|
9
|
|
|
|
|
18
|
|
|
|
9
|
|
|
|
|
137
|
|
|
9
|
9
|
|
|
9
|
|
15595
|
use Data::UUID; |
|
|
9
|
|
|
|
|
11973
|
|
|
|
9
|
|
|
|
|
688
|
|
|
10
|
9
|
|
|
9
|
|
6018
|
use File::ShareDir qw(dist_file); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use HTML::HTML5::Parser; |
|
12
|
|
|
|
|
|
|
use HTML::HTML5::Sanity qw(fix_document); |
|
13
|
|
|
|
|
|
|
use LWP::UserAgent; |
|
14
|
|
|
|
|
|
|
use RDF::RDFa::Parser::Config; |
|
15
|
|
|
|
|
|
|
use RDF::RDFa::Parser::InitialContext; |
|
16
|
|
|
|
|
|
|
use RDF::RDFa::Parser::OpenDocumentObjectModel; |
|
17
|
|
|
|
|
|
|
use RDF::Trine 0.130; |
|
18
|
|
|
|
|
|
|
use Scalar::Util qw(blessed); |
|
19
|
|
|
|
|
|
|
use Storable qw(dclone); |
|
20
|
|
|
|
|
|
|
use URI::Escape; |
|
21
|
|
|
|
|
|
|
use URI; |
|
22
|
|
|
|
|
|
|
use XML::LibXML qw(:all); |
|
23
|
|
|
|
|
|
|
use XML::RegExp; |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use constant { |
|
26
|
|
|
|
|
|
|
ERR_WARNING => 'w', |
|
27
|
|
|
|
|
|
|
ERR_ERROR => 'e', |
|
28
|
|
|
|
|
|
|
}; |
|
29
|
|
|
|
|
|
|
use constant { |
|
30
|
|
|
|
|
|
|
ERR_CODE_HOST => 'HOST01', |
|
31
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MUDDLE => 'RDFX01', |
|
32
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MESS => 'RDFX02', |
|
33
|
|
|
|
|
|
|
ERR_CODE_PREFIX_BUILTIN => 'PRFX01', |
|
34
|
|
|
|
|
|
|
ERR_CODE_PREFIX_ILLEGAL => 'PRFX02', |
|
35
|
|
|
|
|
|
|
ERR_CODE_PREFIX_DISABLED => 'PRFX03', |
|
36
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_USED => 'INST01', |
|
37
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_OVERRULED => 'INST02', |
|
38
|
|
|
|
|
|
|
ERR_CODE_CURIE_FELLTHROUGH => 'CURI01', |
|
39
|
|
|
|
|
|
|
ERR_CODE_CURIE_UNDEFINED => 'CURI02', |
|
40
|
|
|
|
|
|
|
ERR_CODE_BNODE_WRONGPLACE => 'BNOD01', |
|
41
|
|
|
|
|
|
|
ERR_CODE_VOCAB_DISABLED => 'VOCA01', |
|
42
|
|
|
|
|
|
|
ERR_CODE_LANG_INVALID => 'LANG01', |
|
43
|
|
|
|
|
|
|
}; |
|
44
|
|
|
|
|
|
|
use constant { |
|
45
|
|
|
|
|
|
|
RDF_XMLLIT => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral', |
|
46
|
|
|
|
|
|
|
RDF_TYPE => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', |
|
47
|
|
|
|
|
|
|
RDF_FIRST => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first', |
|
48
|
|
|
|
|
|
|
RDF_REST => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest', |
|
49
|
|
|
|
|
|
|
RDF_NIL => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil', |
|
50
|
|
|
|
|
|
|
}; |
|
51
|
|
|
|
|
|
|
use common::sense; |
|
52
|
|
|
|
|
|
|
use 5.010; |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
our $HAS_AWOL; |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
BEGIN |
|
57
|
|
|
|
|
|
|
{ |
|
58
|
|
|
|
|
|
|
local $@; |
|
59
|
|
|
|
|
|
|
eval "use XML::Atom::OWL;"; |
|
60
|
|
|
|
|
|
|
$HAS_AWOL = $@ ? 0 : 1; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub new |
|
64
|
|
|
|
|
|
|
{ |
|
65
|
|
|
|
|
|
|
my ($class, $markup, $base_uri, $config, $store)= @_; |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# Rationalise $config |
|
68
|
|
|
|
|
|
|
# =================== |
|
69
|
|
|
|
|
|
|
# If $config is undefined, then use the default configuration |
|
70
|
|
|
|
|
|
|
if (!defined $config) |
|
71
|
|
|
|
|
|
|
{ $config = RDF::RDFa::Parser::Config->new; } |
|
72
|
|
|
|
|
|
|
# If $config is something sensible, then use it. |
|
73
|
|
|
|
|
|
|
elsif (blessed($config) && $config->isa('RDF::RDFa::Parser::Config')) |
|
74
|
|
|
|
|
|
|
{ 1; } |
|
75
|
|
|
|
|
|
|
# If it's a hashref (for backcompat), then use default plus those options |
|
76
|
|
|
|
|
|
|
elsif ('HASH' eq ref $config) |
|
77
|
|
|
|
|
|
|
{ $config = RDF::RDFa::Parser::Config->new(undef, undef, %$config); } |
|
78
|
|
|
|
|
|
|
# If it's something odd, then bail. |
|
79
|
|
|
|
|
|
|
else |
|
80
|
|
|
|
|
|
|
{ die "Unrecognised configuration\n"; } |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Rationalise $base_uri |
|
83
|
|
|
|
|
|
|
# ===================== |
|
84
|
|
|
|
|
|
|
unless ($base_uri =~ /^[a-z][a-z0-9\+\-\.]*:/i) |
|
85
|
|
|
|
|
|
|
{ die "Need a valid base URI.\n"; } |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# Rationalise $markup and set $dom |
|
88
|
|
|
|
|
|
|
# ================================ |
|
89
|
|
|
|
|
|
|
Carp::croak("Need to provide markup to parse.") unless defined $markup; |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $dom; |
|
92
|
|
|
|
|
|
|
eval { |
|
93
|
|
|
|
|
|
|
if (blessed($markup) && $markup->isa('XML::LibXML::Document')) |
|
94
|
|
|
|
|
|
|
{ |
|
95
|
|
|
|
|
|
|
$dom = $markup; |
|
96
|
|
|
|
|
|
|
$markup = $dom->toString; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
elsif ($config->{'dom_parser'} =~ /^(opendocument|opendoc|odf|od|odt)$/i) |
|
99
|
|
|
|
|
|
|
{ |
|
100
|
|
|
|
|
|
|
my $parser = RDF::RDFa::Parser::OpenDocumentObjectModel->new; |
|
101
|
|
|
|
|
|
|
$dom = $parser->parse_string($markup, $base_uri); |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
elsif ($config->{'dom_parser'} =~ /^(html|tagsoup|soup)$/i) |
|
104
|
|
|
|
|
|
|
{ |
|
105
|
|
|
|
|
|
|
my $parser = HTML::HTML5::Parser->new; |
|
106
|
|
|
|
|
|
|
$dom = fix_document( $parser->parse_string($markup) ); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
else |
|
109
|
|
|
|
|
|
|
{ |
|
110
|
|
|
|
|
|
|
my $parser = XML::LibXML->new; |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
my $catalogue = dist_file('RDF-RDFa-Parser', 'catalogue/index.xml'); |
|
113
|
|
|
|
|
|
|
$parser->load_catalog($catalogue) |
|
114
|
|
|
|
|
|
|
if -r $catalogue; |
|
115
|
|
|
|
|
|
|
$parser->validation(0); |
|
116
|
|
|
|
|
|
|
#$parser->recover(1); |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
$dom = $parser->parse_string($markup); |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
}; |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# Rationalise $store |
|
123
|
|
|
|
|
|
|
# ================== |
|
124
|
|
|
|
|
|
|
$store = RDF::Trine::Store::Memory->temporary_store |
|
125
|
|
|
|
|
|
|
unless defined $store; |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
my $self = bless { |
|
128
|
|
|
|
|
|
|
baseuri => $base_uri, |
|
129
|
|
|
|
|
|
|
origbase => $base_uri, |
|
130
|
|
|
|
|
|
|
dom => $dom, |
|
131
|
|
|
|
|
|
|
model => RDF::Trine::Model->new($store), |
|
132
|
|
|
|
|
|
|
bnodes => 0, |
|
133
|
|
|
|
|
|
|
sub => {}, |
|
134
|
|
|
|
|
|
|
options => $config, |
|
135
|
|
|
|
|
|
|
Graphs => {}, |
|
136
|
|
|
|
|
|
|
errors => [], |
|
137
|
|
|
|
|
|
|
consumed => 0, |
|
138
|
|
|
|
|
|
|
}, $class; |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
$config->auto_config($self); |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
$self->{options} = $config = $config->guess_rdfa_version($self) |
|
143
|
|
|
|
|
|
|
if $config->{guess_rdfa_version}; |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# HTML <base> element. |
|
146
|
|
|
|
|
|
|
if ($dom and $self->{options}{xhtml_base}) |
|
147
|
|
|
|
|
|
|
{ |
|
148
|
|
|
|
|
|
|
my @bases = $self->dom->getElementsByTagName('base'); |
|
149
|
|
|
|
|
|
|
my $base; |
|
150
|
|
|
|
|
|
|
foreach my $b (@bases) |
|
151
|
|
|
|
|
|
|
{ |
|
152
|
|
|
|
|
|
|
if ($b->hasAttribute('href')) |
|
153
|
|
|
|
|
|
|
{ |
|
154
|
|
|
|
|
|
|
$base = $b->getAttribute('href'); |
|
155
|
|
|
|
|
|
|
$base =~ s/#.*$//g; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
$self->{baseuri} = $self->uri($base) |
|
159
|
|
|
|
|
|
|
if defined $base && length $base; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
return $self; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub new_from_url |
|
166
|
|
|
|
|
|
|
{ |
|
167
|
|
|
|
|
|
|
my ($class, $url, $config, $store)= @_; |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
my $response = do |
|
170
|
|
|
|
|
|
|
{ |
|
171
|
|
|
|
|
|
|
if (blessed($url) && $url->isa('HTTP::Message')) |
|
172
|
|
|
|
|
|
|
{ |
|
173
|
|
|
|
|
|
|
$url; |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
else |
|
176
|
|
|
|
|
|
|
{ |
|
177
|
|
|
|
|
|
|
my $ua; |
|
178
|
|
|
|
|
|
|
if (blessed($config) and $config->isa('RDF::RDFa::Parser::Config')) |
|
179
|
|
|
|
|
|
|
{ $ua = $config->lwp_ua; } |
|
180
|
|
|
|
|
|
|
elsif (ref $config eq 'HASH') |
|
181
|
|
|
|
|
|
|
{ $ua = RDF::RDFa::Parser::Config->new('xml', undef, %$config)->lwp_ua; } |
|
182
|
|
|
|
|
|
|
else |
|
183
|
|
|
|
|
|
|
{ $ua = RDF::RDFa::Parser::Config->new('xml', undef)->lwp_ua; } |
|
184
|
|
|
|
|
|
|
$ua->get($url); |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
}; |
|
187
|
|
|
|
|
|
|
my $host = $response->content_type; |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
if (blessed($config) and $config->isa('RDF::RDFa::Parser::Config')) |
|
190
|
|
|
|
|
|
|
{ $config = $config->rehost($host); } |
|
191
|
|
|
|
|
|
|
elsif (ref $config eq 'HASH') |
|
192
|
|
|
|
|
|
|
{ $config = RDF::RDFa::Parser::Config->new($host, undef, %$config); } |
|
193
|
|
|
|
|
|
|
else |
|
194
|
|
|
|
|
|
|
{ $config = RDF::RDFa::Parser::Config->new($host, undef); } |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
return $class->new( |
|
197
|
|
|
|
|
|
|
$response->decoded_content, |
|
198
|
|
|
|
|
|
|
($response->base || $url).'', |
|
199
|
|
|
|
|
|
|
$config, |
|
200
|
|
|
|
|
|
|
$store, |
|
201
|
|
|
|
|
|
|
); |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
*new_from_uri = \&new_from_url; |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
*new_from_response = \&new_from_url; |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub graph |
|
209
|
|
|
|
|
|
|
{ |
|
210
|
|
|
|
|
|
|
my $self = shift; |
|
211
|
|
|
|
|
|
|
my $graph = shift; |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
$self->consume; |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
if (defined($graph)) |
|
216
|
|
|
|
|
|
|
{ |
|
217
|
|
|
|
|
|
|
my $tg; |
|
218
|
|
|
|
|
|
|
if ($graph =~ m/^_:(.*)/) |
|
219
|
|
|
|
|
|
|
{ |
|
220
|
|
|
|
|
|
|
$tg = RDF::Trine::Node::Blank->new($1); |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
else |
|
223
|
|
|
|
|
|
|
{ |
|
224
|
|
|
|
|
|
|
$tg = RDF::Trine::Node::Resource->new($graph, $self->{baseuri}); |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
my $m = RDF::Trine::Model->temporary_model; |
|
227
|
|
|
|
|
|
|
my $i = $self->{model}->get_statements(undef, undef, undef, $tg); |
|
228
|
|
|
|
|
|
|
while (my $statement = $i->next) |
|
229
|
|
|
|
|
|
|
{ |
|
230
|
|
|
|
|
|
|
$m->add_statement($statement); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
return $m; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
else |
|
235
|
|
|
|
|
|
|
{ |
|
236
|
|
|
|
|
|
|
return $self->{model}; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
sub output_graph |
|
241
|
|
|
|
|
|
|
{ |
|
242
|
|
|
|
|
|
|
shift->graph; |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
sub graphs |
|
246
|
|
|
|
|
|
|
{ |
|
247
|
|
|
|
|
|
|
my $self = shift; |
|
248
|
|
|
|
|
|
|
$self->consume; |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
my @graphs = keys(%{$self->{Graphs}}); |
|
251
|
|
|
|
|
|
|
my %result; |
|
252
|
|
|
|
|
|
|
foreach my $graph (@graphs) |
|
253
|
|
|
|
|
|
|
{ |
|
254
|
|
|
|
|
|
|
$result{$graph} = $self->graph($graph); |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
return \%result; |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
sub opengraph |
|
260
|
|
|
|
|
|
|
{ |
|
261
|
|
|
|
|
|
|
my ($self, $property, %opts) = @_; |
|
262
|
|
|
|
|
|
|
$self->consume; |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
$property = $1 |
|
265
|
|
|
|
|
|
|
if defined $property && $property =~ m'^http://opengraphprotocol\.org/schema/(.*)$'; |
|
266
|
|
|
|
|
|
|
$property = $1 |
|
267
|
|
|
|
|
|
|
if defined $property && $property =~ m'^http://ogp\.me/ns#(.*)$'; |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
my $rtp; |
|
270
|
|
|
|
|
|
|
if (defined $property && $property =~ /^[a-z][a-z0-9\-\.\+]*:/i) |
|
271
|
|
|
|
|
|
|
{ |
|
272
|
|
|
|
|
|
|
$rtp = [ RDF::Trine::Node::Resource->new($property) ]; |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
elsif (defined $property) |
|
275
|
|
|
|
|
|
|
{ |
|
276
|
|
|
|
|
|
|
$rtp = [ |
|
277
|
|
|
|
|
|
|
RDF::Trine::Node::Resource->new('http://ogp.me/ns#'.$property), |
|
278
|
|
|
|
|
|
|
RDF::Trine::Node::Resource->new('http://opengraphprotocol.org/schema/'.$property), |
|
279
|
|
|
|
|
|
|
]; |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
my $data = {}; |
|
283
|
|
|
|
|
|
|
if ($rtp) |
|
284
|
|
|
|
|
|
|
{ |
|
285
|
|
|
|
|
|
|
foreach my $rtp2 (@$rtp) |
|
286
|
|
|
|
|
|
|
{ |
|
287
|
|
|
|
|
|
|
my $iter = $self->graph->get_statements( |
|
288
|
|
|
|
|
|
|
RDF::Trine::Node::Resource->new($self->uri), $rtp2, undef); |
|
289
|
|
|
|
|
|
|
while (my $st = $iter->next) |
|
290
|
|
|
|
|
|
|
{ |
|
291
|
|
|
|
|
|
|
my $propkey = $st->predicate->uri; |
|
292
|
|
|
|
|
|
|
$propkey = $1 |
|
293
|
|
|
|
|
|
|
if $propkey =~ m'^http://ogp\.me/ns#(.*)$' |
|
294
|
|
|
|
|
|
|
|| $propkey =~ m'^http://opengraphprotocol\.org/schema/(.*)$'; |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
if ($st->object->is_resource) |
|
297
|
|
|
|
|
|
|
{ push @{ $data->{$propkey} }, $st->object->uri; } |
|
298
|
|
|
|
|
|
|
elsif ($st->object->is_literal) |
|
299
|
|
|
|
|
|
|
{ push @{ $data->{$propkey} }, $st->object->literal_value; } |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
} |
|
303
|
|
|
|
|
|
|
else |
|
304
|
|
|
|
|
|
|
{ |
|
305
|
|
|
|
|
|
|
my $iter = $self->graph->get_statements( |
|
306
|
|
|
|
|
|
|
RDF::Trine::Node::Resource->new($self->uri), undef, undef); |
|
307
|
|
|
|
|
|
|
while (my $st = $iter->next) |
|
308
|
|
|
|
|
|
|
{ |
|
309
|
|
|
|
|
|
|
my $propkey = $st->predicate->uri; |
|
310
|
|
|
|
|
|
|
$propkey = $1 |
|
311
|
|
|
|
|
|
|
if $propkey =~ m'^http://ogp\.me/ns#(.*)$' |
|
312
|
|
|
|
|
|
|
|| $propkey =~ m'^http://opengraphprotocol\.org/schema/(.*)$'; |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
if ($st->object->is_resource) |
|
315
|
|
|
|
|
|
|
{ push @{ $data->{$propkey} }, $st->object->uri; } |
|
316
|
|
|
|
|
|
|
elsif ($st->object->is_literal) |
|
317
|
|
|
|
|
|
|
{ push @{ $data->{$propkey} }, $st->object->literal_value; } |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
my @return; |
|
322
|
|
|
|
|
|
|
if (defined $property) |
|
323
|
|
|
|
|
|
|
{ @return = @{$data->{$property}} if defined $data->{$property}; } |
|
324
|
|
|
|
|
|
|
else |
|
325
|
|
|
|
|
|
|
{ @return = keys %$data; } |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
return wantarray ? @return : $return[0]; |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
sub dom |
|
331
|
|
|
|
|
|
|
{ |
|
332
|
|
|
|
|
|
|
my $self = shift; |
|
333
|
|
|
|
|
|
|
return $self->{dom}; |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
sub uri |
|
337
|
|
|
|
|
|
|
{ |
|
338
|
|
|
|
|
|
|
my $self = shift; |
|
339
|
|
|
|
|
|
|
my $param = shift || ''; |
|
340
|
|
|
|
|
|
|
my $opts = shift || {}; |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
if ((ref $opts) =~ /^XML::LibXML/) |
|
343
|
|
|
|
|
|
|
{ |
|
344
|
|
|
|
|
|
|
my $x = {'element' => $opts}; |
|
345
|
|
|
|
|
|
|
$opts = $x; |
|
346
|
|
|
|
|
|
|
} |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
if ($param =~ /^([a-z][a-z0-9\+\.\-]*)\:/i) |
|
349
|
|
|
|
|
|
|
{ |
|
350
|
|
|
|
|
|
|
# seems to be an absolute URI, so can safely return "as is". |
|
351
|
|
|
|
|
|
|
return $param; |
|
352
|
|
|
|
|
|
|
} |
|
353
|
|
|
|
|
|
|
elsif ($opts->{'require-absolute'}) |
|
354
|
|
|
|
|
|
|
{ |
|
355
|
|
|
|
|
|
|
return undef; |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
my $base = $self->{baseuri}; |
|
359
|
|
|
|
|
|
|
if ($self->{'options'}->{'xml_base'}) |
|
360
|
|
|
|
|
|
|
{ |
|
361
|
|
|
|
|
|
|
$base = $opts->{'xml_base'} || $self->{baseuri}; |
|
362
|
|
|
|
|
|
|
} |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
my $rv = $self->{options}{uri_class}->new_abs($param, $base); |
|
365
|
|
|
|
|
|
|
return "$rv"; |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub errors |
|
369
|
|
|
|
|
|
|
{ |
|
370
|
|
|
|
|
|
|
my $self = shift; |
|
371
|
|
|
|
|
|
|
return @{$self->{errors}}; |
|
372
|
|
|
|
|
|
|
} |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub processor_graph |
|
375
|
|
|
|
|
|
|
{ |
|
376
|
|
|
|
|
|
|
my ($self, $model, $context) = @_; |
|
377
|
|
|
|
|
|
|
$model ||= RDF::Trine::Model->new( RDF::Trine::Store->temporary_store ); |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
my $RDF = RDF::Trine::Namespace->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#'); |
|
380
|
|
|
|
|
|
|
my $RDFA = RDF::Trine::Namespace->new('http://www.w3.org/ns/rdfa#'); |
|
381
|
|
|
|
|
|
|
my $CNT = RDF::Trine::Namespace->new('http://www.w3.org/2011/content#'); |
|
382
|
|
|
|
|
|
|
my $PTR = RDF::Trine::Namespace->new('http://www.w3.org/2009/pointers#'); |
|
383
|
|
|
|
|
|
|
my $DC = RDF::Trine::Namespace->new('http://purl.org/dc/terms/'); |
|
384
|
|
|
|
|
|
|
my $ERR = RDF::Trine::Namespace->new('tag:buzzword.org.uk,2010:RDF-RDFa-Parser:error:'); |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
my $uuid = Data::UUID->new; |
|
387
|
|
|
|
|
|
|
my $mkuri = sub |
|
388
|
|
|
|
|
|
|
{ |
|
389
|
|
|
|
|
|
|
my $id = $uuid->create_str; |
|
390
|
|
|
|
|
|
|
return $ERR->$id; |
|
391
|
|
|
|
|
|
|
}; |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
my $st = sub |
|
394
|
|
|
|
|
|
|
{ |
|
395
|
|
|
|
|
|
|
my @n = map |
|
396
|
|
|
|
|
|
|
{ blessed($_) ? $_ : RDF::Trine::Node::Literal->new($_); } |
|
397
|
|
|
|
|
|
|
@_; |
|
398
|
|
|
|
|
|
|
if ($context) |
|
399
|
|
|
|
|
|
|
{ |
|
400
|
|
|
|
|
|
|
$model->add_statement( |
|
401
|
|
|
|
|
|
|
RDF::Trine::Statement::Quad->new(@n, $context) |
|
402
|
|
|
|
|
|
|
); |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
else |
|
405
|
|
|
|
|
|
|
{ |
|
406
|
|
|
|
|
|
|
$model->add_statement( |
|
407
|
|
|
|
|
|
|
RDF::Trine::Statement->new(@n) |
|
408
|
|
|
|
|
|
|
); |
|
409
|
|
|
|
|
|
|
} |
|
410
|
|
|
|
|
|
|
}; |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
my $typemap = {( |
|
413
|
|
|
|
|
|
|
ERR_CODE_HOST , 'DocumentError', |
|
414
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MUDDLE , '', |
|
415
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MESS , 'DocumentError', |
|
416
|
|
|
|
|
|
|
ERR_CODE_PREFIX_BUILTIN , 'DocumentError', |
|
417
|
|
|
|
|
|
|
ERR_CODE_PREFIX_ILLEGAL , 'DocumentError', |
|
418
|
|
|
|
|
|
|
ERR_CODE_PREFIX_DISABLED , '', |
|
419
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_USED , '', |
|
420
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_OVERRULED , '', |
|
421
|
|
|
|
|
|
|
ERR_CODE_CURIE_FELLTHROUGH , '', |
|
422
|
|
|
|
|
|
|
ERR_CODE_CURIE_UNDEFINED , 'UnresolvedCURIE', |
|
423
|
|
|
|
|
|
|
ERR_CODE_BNODE_WRONGPLACE , '', |
|
424
|
|
|
|
|
|
|
ERR_CODE_VOCAB_DISABLED , '', |
|
425
|
|
|
|
|
|
|
ERR_CODE_LANG_INVALID , 'DocumentError', |
|
426
|
|
|
|
|
|
|
)}; |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
foreach my $err ($self->errors) |
|
429
|
|
|
|
|
|
|
{ |
|
430
|
|
|
|
|
|
|
my $iri = $mkuri->(); |
|
431
|
|
|
|
|
|
|
my ($level, $code, $message, $args) = @$err; |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
if ($level eq ERR_WARNING) |
|
434
|
|
|
|
|
|
|
{ |
|
435
|
|
|
|
|
|
|
$st->($iri, $RDF->type, $RDFA->Warning); |
|
436
|
|
|
|
|
|
|
} |
|
437
|
|
|
|
|
|
|
elsif ($level eq ERR_ERROR) |
|
438
|
|
|
|
|
|
|
{ |
|
439
|
|
|
|
|
|
|
$st->($iri, $RDF->type, $RDFA->Error); |
|
440
|
|
|
|
|
|
|
} |
|
441
|
|
|
|
|
|
|
if (my $class = $typemap->{$code}) |
|
442
|
|
|
|
|
|
|
{ |
|
443
|
|
|
|
|
|
|
$st->($iri, $RDF->type, $RDFA->$class); |
|
444
|
|
|
|
|
|
|
} |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
$st->($iri, $DC->description, $message); |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
if (blessed($args->{element}) and $args->{element}->can('nodePath')) |
|
449
|
|
|
|
|
|
|
{ |
|
450
|
|
|
|
|
|
|
my $p_iri = $mkuri->(); |
|
451
|
|
|
|
|
|
|
$st->($iri, $RDFA->context, $p_iri); |
|
452
|
|
|
|
|
|
|
$st->($p_iri, $RDF->type, $PTR->XPathPointer); |
|
453
|
|
|
|
|
|
|
$st->($p_iri, $PTR->expression, $args->{element}->nodePath); |
|
454
|
|
|
|
|
|
|
} |
|
455
|
|
|
|
|
|
|
} |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
return $model; |
|
458
|
|
|
|
|
|
|
} |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
sub processor_and_output_graph |
|
461
|
|
|
|
|
|
|
{ |
|
462
|
|
|
|
|
|
|
my $self = shift; |
|
463
|
|
|
|
|
|
|
my $model = RDF::Trine::Model->new; |
|
464
|
|
|
|
|
|
|
$self->$_->get_statements->each(sub { $model->add_statement(+shift) }) |
|
465
|
|
|
|
|
|
|
foreach qw( processor_graph graph ); |
|
466
|
|
|
|
|
|
|
return $model; |
|
467
|
|
|
|
|
|
|
} |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
sub _log_error |
|
470
|
|
|
|
|
|
|
{ |
|
471
|
|
|
|
|
|
|
my ($self, $level, $code, $message, %args) = @_; |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
if (defined $self->{'sub'}->{'onerror'}) |
|
474
|
|
|
|
|
|
|
{ |
|
475
|
|
|
|
|
|
|
$self->{'sub'}->{'onerror'}(@_); |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
elsif ($level eq ERR_ERROR) |
|
478
|
|
|
|
|
|
|
{ |
|
479
|
|
|
|
|
|
|
Carp::carp(sprintf("%04X: %s\n", $code, $message)); |
|
480
|
|
|
|
|
|
|
Carp::carp(sprintf("... with URI <%s>\n", $args{'uri'})) |
|
481
|
|
|
|
|
|
|
if defined $args{'uri'}; |
|
482
|
|
|
|
|
|
|
Carp::carp(sprintf("... on element '%s' with path '%s'\n", $args{'element'}->localname, $args{'element'}->nodePath)) |
|
483
|
|
|
|
|
|
|
if blessed($args{'element'}) && $args{'element'}->isa('XML::LibXML::Node'); |
|
484
|
|
|
|
|
|
|
} |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
push @{$self->{errors}}, [$level, $code, $message, \%args]; |
|
487
|
|
|
|
|
|
|
} |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
sub consume |
|
490
|
|
|
|
|
|
|
{ |
|
491
|
|
|
|
|
|
|
my ($self, %args) = @_; |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
return if $self->{'consumed'}; |
|
494
|
|
|
|
|
|
|
$self->{'consumed'}++; |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
if (!$self->{dom}) |
|
497
|
|
|
|
|
|
|
{ |
|
498
|
|
|
|
|
|
|
if ($args{survive}) |
|
499
|
|
|
|
|
|
|
{ |
|
500
|
|
|
|
|
|
|
$self->_log_error( |
|
501
|
|
|
|
|
|
|
ERR_ERROR, |
|
502
|
|
|
|
|
|
|
ERR_CODE_HOST, |
|
503
|
|
|
|
|
|
|
'Input could not be parsed into a DOM!', |
|
504
|
|
|
|
|
|
|
); |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
else |
|
507
|
|
|
|
|
|
|
{ |
|
508
|
|
|
|
|
|
|
Carp::croak("Input could not be parsed into a DOM!"); |
|
509
|
|
|
|
|
|
|
} |
|
510
|
|
|
|
|
|
|
return $self; |
|
511
|
|
|
|
|
|
|
} |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
if ($self->{options}{graph}) |
|
514
|
|
|
|
|
|
|
{ |
|
515
|
|
|
|
|
|
|
$self->{options}{graph_attr} = 'graph' |
|
516
|
|
|
|
|
|
|
unless defined $self->{options}{graph_attr}; |
|
517
|
|
|
|
|
|
|
$self->{options}{graph_type} = 'about' |
|
518
|
|
|
|
|
|
|
unless defined $self->{options}{graph_type}; |
|
519
|
|
|
|
|
|
|
$self->{options}{graph_default} = $self->bnode |
|
520
|
|
|
|
|
|
|
unless defined $self->{options}{graph_default}; |
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
local *XML::LibXML::Element::getAttributeNsSafe = sub |
|
524
|
|
|
|
|
|
|
{ |
|
525
|
|
|
|
|
|
|
my ($element, $nsuri, $attribute) = @_; |
|
526
|
|
|
|
|
|
|
return defined $nsuri ? $element->getAttributeNS($nsuri, $attribute) : $element->getAttribute($attribute); |
|
527
|
|
|
|
|
|
|
}; |
|
528
|
|
|
|
|
|
|
local *XML::LibXML::Element::hasAttributeNsSafe = sub |
|
529
|
|
|
|
|
|
|
{ |
|
530
|
|
|
|
|
|
|
my ($element, $nsuri, $attribute) = @_; |
|
531
|
|
|
|
|
|
|
return defined $nsuri ? $element->hasAttributeNS($nsuri, $attribute) : $element->hasAttribute($attribute); |
|
532
|
|
|
|
|
|
|
}; |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
$self->_consume_element($self->dom->documentElement, { init => 1}); |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
if ($self->{options}{atom_parser} && $HAS_AWOL) |
|
537
|
|
|
|
|
|
|
{ |
|
538
|
|
|
|
|
|
|
my $awol = XML::Atom::OWL->new( $self->dom , $self->uri , undef, $self->{'model'} ); |
|
539
|
|
|
|
|
|
|
$awol->{'bnode_generator'} = $self; |
|
540
|
|
|
|
|
|
|
$awol->set_callbacks( $self->{'sub'} ); |
|
541
|
|
|
|
|
|
|
$awol->consume; |
|
542
|
|
|
|
|
|
|
} |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
return $self; |
|
545
|
|
|
|
|
|
|
} |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
sub _consume_element |
|
548
|
|
|
|
|
|
|
# http://www.w3.org/TR/rdfa-syntax/#sec_5.5. |
|
549
|
|
|
|
|
|
|
{ |
|
550
|
|
|
|
|
|
|
my $self = shift; |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# Processing begins by applying the processing rules below to the document |
|
553
|
|
|
|
|
|
|
# object, in the context of this initial [evaluation context]. All elements |
|
554
|
|
|
|
|
|
|
# in the tree are also processed according to the rules described below, |
|
555
|
|
|
|
|
|
|
# depth-first, although the [evaluation context] used for each set of rules |
|
556
|
|
|
|
|
|
|
# will be based on previous rules that may have been applied. |
|
557
|
|
|
|
|
|
|
my $current_element = shift; |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
# shouldn't happen, but return 0 if it does. |
|
560
|
|
|
|
|
|
|
return 0 unless $current_element->nodeType == XML_ELEMENT_NODE; |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# The evaluation context. |
|
563
|
|
|
|
|
|
|
my $args = shift; |
|
564
|
|
|
|
|
|
|
my ($base, $parent_subject, $parent_subject_elem, $parent_object, $parent_object_elem, |
|
565
|
|
|
|
|
|
|
$list_mappings, $uri_mappings, $term_mappings, $incomplete_triples, $language, |
|
566
|
|
|
|
|
|
|
$graph, $graph_elem, $xml_base); |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
if ($args->{'init'}) |
|
569
|
|
|
|
|
|
|
{ |
|
570
|
|
|
|
|
|
|
my $init = RDF::RDFa::Parser::InitialContext->new( |
|
571
|
|
|
|
|
|
|
$self->{options}{initial_context}, |
|
572
|
|
|
|
|
|
|
); |
|
573
|
|
|
|
|
|
|
# At the beginning of processing, an initial [evaluation context] is created |
|
574
|
|
|
|
|
|
|
$base = $self->uri; |
|
575
|
|
|
|
|
|
|
$parent_subject = $base; |
|
576
|
|
|
|
|
|
|
$parent_subject_elem = $self->dom->documentElement; |
|
577
|
|
|
|
|
|
|
$parent_object = undef; |
|
578
|
|
|
|
|
|
|
$parent_object_elem = undef; |
|
579
|
|
|
|
|
|
|
$uri_mappings = +{ insensitive => $init->uri_mappings }; |
|
580
|
|
|
|
|
|
|
$term_mappings = +{ insensitive => $init->term_mappings }; |
|
581
|
|
|
|
|
|
|
$incomplete_triples = []; |
|
582
|
|
|
|
|
|
|
$list_mappings = {}; |
|
583
|
|
|
|
|
|
|
$language = undef; |
|
584
|
|
|
|
|
|
|
$graph = $self->{options}{graph} ? $self->{options}{graph_default} : undef; |
|
585
|
|
|
|
|
|
|
$graph_elem = undef; |
|
586
|
|
|
|
|
|
|
$xml_base = undef; |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
if ($self->{options}{vocab_default}) |
|
589
|
|
|
|
|
|
|
{ |
|
590
|
|
|
|
|
|
|
$uri_mappings->{'(VOCAB)'} = $self->{options}{vocab_default}; |
|
591
|
|
|
|
|
|
|
} |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
if ($self->{options}{prefix_default}) |
|
594
|
|
|
|
|
|
|
{ |
|
595
|
|
|
|
|
|
|
$uri_mappings->{'(DEFAULT PREFIX)'} = $self->{options}{prefix_default}; |
|
596
|
|
|
|
|
|
|
} |
|
597
|
|
|
|
|
|
|
} |
|
598
|
|
|
|
|
|
|
else |
|
599
|
|
|
|
|
|
|
{ |
|
600
|
|
|
|
|
|
|
$base = $args->{'base'}; |
|
601
|
|
|
|
|
|
|
$parent_subject = $args->{'parent_subject'}; |
|
602
|
|
|
|
|
|
|
$parent_subject_elem = $args->{'parent_subject_elem'}; |
|
603
|
|
|
|
|
|
|
$parent_object = $args->{'parent_object'}; |
|
604
|
|
|
|
|
|
|
$parent_object_elem = $args->{'parent_object_elem'}; |
|
605
|
|
|
|
|
|
|
$uri_mappings = dclone($args->{'uri_mappings'}); |
|
606
|
|
|
|
|
|
|
$term_mappings = dclone($args->{'term_mappings'}); |
|
607
|
|
|
|
|
|
|
$incomplete_triples = $args->{'incomplete_triples'}; |
|
608
|
|
|
|
|
|
|
$list_mappings = $args->{'list_mappings'}; |
|
609
|
|
|
|
|
|
|
$language = $args->{'language'}; |
|
610
|
|
|
|
|
|
|
$graph = $args->{'graph'}; |
|
611
|
|
|
|
|
|
|
$graph_elem = $args->{'graph_elem'}; |
|
612
|
|
|
|
|
|
|
$xml_base = $args->{'xml_base'}; |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# Used by OpenDocument, otherwise usually undef. |
|
616
|
|
|
|
|
|
|
my $rdfans = $self->{options}{ns} || undef; |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
# First, the local values are initialized |
|
619
|
|
|
|
|
|
|
my $recurse = 1; |
|
620
|
|
|
|
|
|
|
my $skip_element = 0; |
|
621
|
|
|
|
|
|
|
my $new_subject = undef; |
|
622
|
|
|
|
|
|
|
my $new_subject_elem = undef; |
|
623
|
|
|
|
|
|
|
my $current_object_resource = undef; |
|
624
|
|
|
|
|
|
|
my $current_object_resource_elem = undef; |
|
625
|
|
|
|
|
|
|
my $typed_resource = undef; |
|
626
|
|
|
|
|
|
|
my $typed_resource_elem = undef; |
|
627
|
|
|
|
|
|
|
my $local_uri_mappings = $uri_mappings; |
|
628
|
|
|
|
|
|
|
my $local_term_mappings = $term_mappings; |
|
629
|
|
|
|
|
|
|
my $local_incomplete_triples = []; |
|
630
|
|
|
|
|
|
|
my $current_language = $language; |
|
631
|
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
my $activity = 0; |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
# MOVED THIS SLIGHTLY EARLIER IN THE PROCESSING so that it can apply |
|
635
|
|
|
|
|
|
|
# to RDF/XML chunks. |
|
636
|
|
|
|
|
|
|
# |
|
637
|
|
|
|
|
|
|
# The [current element] is also parsed for any language information, and |
|
638
|
|
|
|
|
|
|
# if present, [current language] is set accordingly. |
|
639
|
|
|
|
|
|
|
# Language information can be provided using the general-purpose XML |
|
640
|
|
|
|
|
|
|
# attribute @xml:lang . |
|
641
|
|
|
|
|
|
|
if ($self->{options}{xhtml_lang} |
|
642
|
|
|
|
|
|
|
&& $current_element->hasAttribute('lang')) |
|
643
|
|
|
|
|
|
|
{ |
|
644
|
|
|
|
|
|
|
if ($self->_valid_lang( $current_element->getAttribute('lang') )) |
|
645
|
|
|
|
|
|
|
{ |
|
646
|
|
|
|
|
|
|
$current_language = $current_element->getAttribute('lang'); |
|
647
|
|
|
|
|
|
|
} |
|
648
|
|
|
|
|
|
|
else |
|
649
|
|
|
|
|
|
|
{ |
|
650
|
|
|
|
|
|
|
$self->_log_error( |
|
651
|
|
|
|
|
|
|
ERR_WARNING, |
|
652
|
|
|
|
|
|
|
ERR_CODE_LANG_INVALID, |
|
653
|
|
|
|
|
|
|
sprintf('Language code "%s" is not valid.', $current_element->getAtrribute('lang')), |
|
654
|
|
|
|
|
|
|
element => $current_element, |
|
655
|
|
|
|
|
|
|
lang => $current_element->getAttribute('lang'), |
|
656
|
|
|
|
|
|
|
) if $@; |
|
657
|
|
|
|
|
|
|
} |
|
658
|
|
|
|
|
|
|
} |
|
659
|
|
|
|
|
|
|
if ($self->{options}{xml_lang} |
|
660
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe(XML_XML_NS, 'lang')) |
|
661
|
|
|
|
|
|
|
{ |
|
662
|
|
|
|
|
|
|
if ($self->_valid_lang( $current_element->getAttributeNsSafe(XML_XML_NS, 'lang') )) |
|
663
|
|
|
|
|
|
|
{ |
|
664
|
|
|
|
|
|
|
$current_language = $current_element->getAttributeNsSafe(XML_XML_NS, 'lang'); |
|
665
|
|
|
|
|
|
|
} |
|
666
|
|
|
|
|
|
|
else |
|
667
|
|
|
|
|
|
|
{ |
|
668
|
|
|
|
|
|
|
$self->_log_error( |
|
669
|
|
|
|
|
|
|
ERR_WARNING, |
|
670
|
|
|
|
|
|
|
ERR_CODE_LANG_INVALID, |
|
671
|
|
|
|
|
|
|
sprintf('Language code "%s" is not valid.', $current_element->getAttributeNsSafe(XML_XML_NS, 'lang')), |
|
672
|
|
|
|
|
|
|
element => $current_element, |
|
673
|
|
|
|
|
|
|
lang => $current_element->getAttributeNsSafe(XML_XML_NS, 'lang'), |
|
674
|
|
|
|
|
|
|
) if $@; |
|
675
|
|
|
|
|
|
|
} |
|
676
|
|
|
|
|
|
|
} |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
# EXTENSION |
|
679
|
|
|
|
|
|
|
# xml:base - important for RDF/XML extension |
|
680
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe(XML_XML_NS, 'base')) |
|
681
|
|
|
|
|
|
|
{ |
|
682
|
|
|
|
|
|
|
my $old_base = $xml_base; |
|
683
|
|
|
|
|
|
|
$xml_base = $current_element->getAttributeNsSafe(XML_XML_NS, 'base'); |
|
684
|
|
|
|
|
|
|
$xml_base =~ s/#.*$//g; |
|
685
|
|
|
|
|
|
|
$xml_base = $self->uri($xml_base, |
|
686
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$old_base}); |
|
687
|
|
|
|
|
|
|
} |
|
688
|
|
|
|
|
|
|
my $hrefsrc_base = $base; |
|
689
|
|
|
|
|
|
|
if ($self->{options}{xml_base}==2 && defined $xml_base) |
|
690
|
|
|
|
|
|
|
{ |
|
691
|
|
|
|
|
|
|
$hrefsrc_base = $xml_base; |
|
692
|
|
|
|
|
|
|
} |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# EXTENSION |
|
695
|
|
|
|
|
|
|
# Parses embedded RDF/XML - mostly useful for non-XHTML documents, e.g. SVG. |
|
696
|
|
|
|
|
|
|
if ($self->{options}{embedded_rdfxml} |
|
697
|
|
|
|
|
|
|
&& $current_element->localname eq 'RDF' |
|
698
|
|
|
|
|
|
|
&& $current_element->namespaceURI eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#') |
|
699
|
|
|
|
|
|
|
{ |
|
700
|
|
|
|
|
|
|
return 1 if $self->{options}{embedded_rdfxml}==2; |
|
701
|
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
my $g = $graph; |
|
703
|
|
|
|
|
|
|
unless ($self->{options}{embedded_rdfxml} == 3) |
|
704
|
|
|
|
|
|
|
{ |
|
705
|
|
|
|
|
|
|
$g = $self->bnode; |
|
706
|
|
|
|
|
|
|
} |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
my $fake_lang = 0; |
|
709
|
|
|
|
|
|
|
unless ($current_element->hasAttributeNsSafe(XML_XML_NS, 'lang')) |
|
710
|
|
|
|
|
|
|
{ |
|
711
|
|
|
|
|
|
|
$current_element->setAttributeNS(XML_XML_NS, 'lang', $current_language); |
|
712
|
|
|
|
|
|
|
$fake_lang = 1; |
|
713
|
|
|
|
|
|
|
} |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
my $rdfxml_base = $self->{'origbase'}; |
|
716
|
|
|
|
|
|
|
$rdfxml_base = $base |
|
717
|
|
|
|
|
|
|
if $self->{options}{xhtml_base}==2; |
|
718
|
|
|
|
|
|
|
$rdfxml_base = $xml_base |
|
719
|
|
|
|
|
|
|
if defined $xml_base; |
|
720
|
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
eval { |
|
722
|
|
|
|
|
|
|
my $_map; |
|
723
|
|
|
|
|
|
|
my $bnode_mapper = sub { |
|
724
|
|
|
|
|
|
|
my $orig = shift; |
|
725
|
|
|
|
|
|
|
$_map->{$orig} = $self->bnode |
|
726
|
|
|
|
|
|
|
unless defined $_map->{$orig}; |
|
727
|
|
|
|
|
|
|
return $_map->{$orig}; |
|
728
|
|
|
|
|
|
|
}; |
|
729
|
|
|
|
|
|
|
my $parser = RDF::Trine::Parser->new('rdfxml'); |
|
730
|
|
|
|
|
|
|
my $r = $parser->parse( |
|
731
|
|
|
|
|
|
|
$rdfxml_base, |
|
732
|
|
|
|
|
|
|
$current_element->toStringEC14N, |
|
733
|
|
|
|
|
|
|
sub { |
|
734
|
|
|
|
|
|
|
my $st = shift; |
|
735
|
|
|
|
|
|
|
my ($s, $p, @o); |
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
$s = $st->subject->is_blank ? |
|
738
|
|
|
|
|
|
|
$bnode_mapper->($st->subject->blank_identifier) : |
|
739
|
|
|
|
|
|
|
$st->subject->uri_value ; |
|
740
|
|
|
|
|
|
|
$p = $st->predicate->uri_value ; |
|
741
|
|
|
|
|
|
|
if ($st->object->is_literal) |
|
742
|
|
|
|
|
|
|
{ |
|
743
|
|
|
|
|
|
|
@o = ( |
|
744
|
|
|
|
|
|
|
$st->object->literal_value, |
|
745
|
|
|
|
|
|
|
$st->object->literal_datatype, |
|
746
|
|
|
|
|
|
|
$st->object->literal_value_language, |
|
747
|
|
|
|
|
|
|
); |
|
748
|
|
|
|
|
|
|
$self->_insert_triple_literal({current=>$current_element}, |
|
749
|
|
|
|
|
|
|
$s, $p, @o, |
|
750
|
|
|
|
|
|
|
($self->{options}{graph} ? $g : undef)); |
|
751
|
|
|
|
|
|
|
} |
|
752
|
|
|
|
|
|
|
else |
|
753
|
|
|
|
|
|
|
{ |
|
754
|
|
|
|
|
|
|
push @o, $st->object->is_blank ? |
|
755
|
|
|
|
|
|
|
$bnode_mapper->($st->object->blank_identifier) : |
|
756
|
|
|
|
|
|
|
$st->object->uri_value; |
|
757
|
|
|
|
|
|
|
$self->_insert_triple_resource({current=>$current_element}, |
|
758
|
|
|
|
|
|
|
$s, $p, @o, |
|
759
|
|
|
|
|
|
|
($self->{options}{graph} ? $g : undef)); |
|
760
|
|
|
|
|
|
|
} |
|
761
|
|
|
|
|
|
|
}); |
|
762
|
|
|
|
|
|
|
}; |
|
763
|
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
$self->_log_error( |
|
765
|
|
|
|
|
|
|
ERR_ERROR, |
|
766
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MESS, |
|
767
|
|
|
|
|
|
|
"Could not parse embedded RDF/XML content: ${@}", |
|
768
|
|
|
|
|
|
|
element => $current_element, |
|
769
|
|
|
|
|
|
|
) if $@; |
|
770
|
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
$current_element->removeAttributeNS(XML_XML_NS, 'lang') |
|
772
|
|
|
|
|
|
|
if ($fake_lang); |
|
773
|
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
return 1; |
|
775
|
|
|
|
|
|
|
} |
|
776
|
|
|
|
|
|
|
elsif ($current_element->localname eq 'RDF' |
|
777
|
|
|
|
|
|
|
and $current_element->namespaceURI eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#') |
|
778
|
|
|
|
|
|
|
{ |
|
779
|
|
|
|
|
|
|
$self->_log_error( |
|
780
|
|
|
|
|
|
|
ERR_WARNING, |
|
781
|
|
|
|
|
|
|
ERR_CODE_RDFXML_MUDDLE, |
|
782
|
|
|
|
|
|
|
'Encountered embedded RDF/XML content, but not configured to parse or skip it.', |
|
783
|
|
|
|
|
|
|
element => $current_element, |
|
784
|
|
|
|
|
|
|
); |
|
785
|
|
|
|
|
|
|
} |
|
786
|
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
# Next the [current element] is parsed for [URI mapping]s and these are |
|
788
|
|
|
|
|
|
|
# added to the [local list of URI mappings]. Note that a [URI mapping] |
|
789
|
|
|
|
|
|
|
# will simply overwrite any current mapping in the list that has the same |
|
790
|
|
|
|
|
|
|
# name |
|
791
|
|
|
|
|
|
|
# |
|
792
|
|
|
|
|
|
|
# Mappings are provided by @xmlns. The value to be mapped is set by |
|
793
|
|
|
|
|
|
|
# the XML namespace prefix, and the value to map is the value of the |
|
794
|
|
|
|
|
|
|
# attribute - a URI. Note that the URI is not processed in any way; |
|
795
|
|
|
|
|
|
|
# in particular if it is a relative path it is not resolved against |
|
796
|
|
|
|
|
|
|
# the current [base]. Authors are advised to follow best practice |
|
797
|
|
|
|
|
|
|
# for using namespaces, which includes not using relative paths. |
|
798
|
|
|
|
|
|
|
if ($self->{'options'}->{'xmlns_attr'}) |
|
799
|
|
|
|
|
|
|
{ |
|
800
|
|
|
|
|
|
|
foreach my $A ($current_element->getAttributes) |
|
801
|
|
|
|
|
|
|
{ |
|
802
|
|
|
|
|
|
|
my $attr = $A->getName; |
|
803
|
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
if ($attr =~ /^xmlns\:(.+)$/i) |
|
805
|
|
|
|
|
|
|
{ |
|
806
|
|
|
|
|
|
|
my $pfx = $self->{'options'}->{'prefix_nocase_xmlns'} ? (lc $1) : $1; |
|
807
|
|
|
|
|
|
|
my $cls = $self->{'options'}->{'prefix_nocase_xmlns'} ? 'insensitive' : 'sensitive'; |
|
808
|
|
|
|
|
|
|
my $uri = $A->getValue; |
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
if ($pfx =~ /^(xml|xmlns|_)$/i) |
|
811
|
|
|
|
|
|
|
{ |
|
812
|
|
|
|
|
|
|
$self->_log_error( |
|
813
|
|
|
|
|
|
|
ERR_ERROR, |
|
814
|
|
|
|
|
|
|
ERR_CODE_PREFIX_BUILTIN, |
|
815
|
|
|
|
|
|
|
"Attempt to redefine built-in CURIE prefix '$pfx' not allowed.", |
|
816
|
|
|
|
|
|
|
element => $current_element, |
|
817
|
|
|
|
|
|
|
prefix => $pfx, |
|
818
|
|
|
|
|
|
|
uri => $uri, |
|
819
|
|
|
|
|
|
|
); |
|
820
|
|
|
|
|
|
|
} |
|
821
|
|
|
|
|
|
|
elsif ($pfx !~ /^($XML::RegExp::NCName)$/) |
|
822
|
|
|
|
|
|
|
{ |
|
823
|
|
|
|
|
|
|
$self->_log_error( |
|
824
|
|
|
|
|
|
|
ERR_ERROR, |
|
825
|
|
|
|
|
|
|
ERR_CODE_PREFIX_ILLEGAL, |
|
826
|
|
|
|
|
|
|
"Attempt to define non-NCName CURIE prefix '$pfx' not allowed.", |
|
827
|
|
|
|
|
|
|
element => $current_element, |
|
828
|
|
|
|
|
|
|
prefix => $pfx, |
|
829
|
|
|
|
|
|
|
uri => $uri, |
|
830
|
|
|
|
|
|
|
); |
|
831
|
|
|
|
|
|
|
} |
|
832
|
|
|
|
|
|
|
elsif ($uri eq XML_XML_NS || $uri eq XML_XMLNS_NS) |
|
833
|
|
|
|
|
|
|
{ |
|
834
|
|
|
|
|
|
|
$self->_log_error( |
|
835
|
|
|
|
|
|
|
ERR_ERROR, |
|
836
|
|
|
|
|
|
|
ERR_CODE_PREFIX_BUILTIN, |
|
837
|
|
|
|
|
|
|
"Attempt to define any CURIE prefix for '$uri' not allowed using \@xmlns.", |
|
838
|
|
|
|
|
|
|
element => $current_element, |
|
839
|
|
|
|
|
|
|
prefix => $pfx, |
|
840
|
|
|
|
|
|
|
uri => $uri, |
|
841
|
|
|
|
|
|
|
); |
|
842
|
|
|
|
|
|
|
} |
|
843
|
|
|
|
|
|
|
else |
|
844
|
|
|
|
|
|
|
{ |
|
845
|
|
|
|
|
|
|
$self->{'sub'}->{'onprefix'}($self, $current_element, $pfx, $uri, $cls) |
|
846
|
|
|
|
|
|
|
if defined $self->{'sub'}->{'onprefix'}; |
|
847
|
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
$local_uri_mappings->{$cls}->{$pfx} = $uri; |
|
849
|
|
|
|
|
|
|
} |
|
850
|
|
|
|
|
|
|
} |
|
851
|
|
|
|
|
|
|
} |
|
852
|
|
|
|
|
|
|
} |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
# RDFa 1.1 - @prefix support. |
|
855
|
|
|
|
|
|
|
# Note that this overwrites @xmlns:foo. |
|
856
|
|
|
|
|
|
|
if ($self->{'options'}->{'prefix_attr'} |
|
857
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'prefix')) |
|
858
|
|
|
|
|
|
|
{ |
|
859
|
|
|
|
|
|
|
my $pfx_attr = $current_element->getAttributeNsSafe($rdfans, 'prefix') . ' '; |
|
860
|
|
|
|
|
|
|
my @bits = split /[\s\r\n]+/, $pfx_attr; |
|
861
|
|
|
|
|
|
|
while (@bits) |
|
862
|
|
|
|
|
|
|
{ |
|
863
|
|
|
|
|
|
|
my ($bit1, $bit2, @rest) = @bits; |
|
864
|
|
|
|
|
|
|
@bits = @rest; |
|
865
|
|
|
|
|
|
|
$bit1 =~ s/:$//; |
|
866
|
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
my $pfx = $self->{'options'}->{'prefix_nocase_attr'} ? (lc $bit1) : $bit1; |
|
868
|
|
|
|
|
|
|
my $cls = $self->{'options'}->{'prefix_nocase_attr'} ? 'insensitive' : 'sensitive'; |
|
869
|
|
|
|
|
|
|
my $uri = $bit2; |
|
870
|
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
unless ($pfx =~ /^$XML::RegExp::NCName$/) |
|
872
|
|
|
|
|
|
|
{ |
|
873
|
|
|
|
|
|
|
$self->_log_error( |
|
874
|
|
|
|
|
|
|
ERR_ERROR, |
|
875
|
|
|
|
|
|
|
ERR_CODE_PREFIX_ILLEGAL, |
|
876
|
|
|
|
|
|
|
"Attempt to define non-NCName CURIE prefix '$pfx' not allowed.", |
|
877
|
|
|
|
|
|
|
element => $current_element, |
|
878
|
|
|
|
|
|
|
prefix => $pfx, |
|
879
|
|
|
|
|
|
|
uri => $uri, |
|
880
|
|
|
|
|
|
|
); |
|
881
|
|
|
|
|
|
|
next; |
|
882
|
|
|
|
|
|
|
} |
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
$self->{'sub'}->{'onprefix'}($self, $current_element, $pfx, $uri, $cls) |
|
885
|
|
|
|
|
|
|
if defined $self->{'sub'}->{'onprefix'}; |
|
886
|
|
|
|
|
|
|
$local_uri_mappings->{$cls}->{$pfx} = $uri; |
|
887
|
|
|
|
|
|
|
} |
|
888
|
|
|
|
|
|
|
} |
|
889
|
|
|
|
|
|
|
elsif ($current_element->hasAttributeNsSafe($rdfans, 'prefix')) |
|
890
|
|
|
|
|
|
|
{ |
|
891
|
|
|
|
|
|
|
$self->_log_error( |
|
892
|
|
|
|
|
|
|
ERR_WARNING, |
|
893
|
|
|
|
|
|
|
ERR_CODE_PREFIX_DISABLED, |
|
894
|
|
|
|
|
|
|
"\@prefix found, but support disabled.", |
|
895
|
|
|
|
|
|
|
element => $current_element, |
|
896
|
|
|
|
|
|
|
); |
|
897
|
|
|
|
|
|
|
} |
|
898
|
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
# RDFa 1.1 - @vocab support |
|
900
|
|
|
|
|
|
|
if ($self->{options}{vocab_attr} |
|
901
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'vocab')) |
|
902
|
|
|
|
|
|
|
{ |
|
903
|
|
|
|
|
|
|
if ($current_element->getAttributeNsSafe($rdfans, 'vocab') eq '') |
|
904
|
|
|
|
|
|
|
{ |
|
905
|
|
|
|
|
|
|
$local_uri_mappings->{'(VOCAB)'} = $self->{options}{vocab_default}; |
|
906
|
|
|
|
|
|
|
} |
|
907
|
|
|
|
|
|
|
else |
|
908
|
|
|
|
|
|
|
{ |
|
909
|
|
|
|
|
|
|
$local_uri_mappings->{'(VOCAB)'} = $self->uri( |
|
910
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'vocab'), |
|
911
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$xml_base}); |
|
912
|
|
|
|
|
|
|
} |
|
913
|
|
|
|
|
|
|
} |
|
914
|
|
|
|
|
|
|
elsif ($current_element->hasAttributeNsSafe($rdfans, 'vocab')) |
|
915
|
|
|
|
|
|
|
{ |
|
916
|
|
|
|
|
|
|
$self->_log_error( |
|
917
|
|
|
|
|
|
|
ERR_WARNING, |
|
918
|
|
|
|
|
|
|
ERR_CODE_VOCAB_DISABLED, |
|
919
|
|
|
|
|
|
|
"\@vocab found, but support disabled.", |
|
920
|
|
|
|
|
|
|
element => $current_element, |
|
921
|
|
|
|
|
|
|
uri => $self->uri( |
|
922
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'vocab'), |
|
923
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$xml_base}), |
|
924
|
|
|
|
|
|
|
); |
|
925
|
|
|
|
|
|
|
} |
|
926
|
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
# EXTENSION |
|
928
|
|
|
|
|
|
|
# KjetilK's named graphs. |
|
929
|
|
|
|
|
|
|
if ($self->{'options'}->{'graph'}) |
|
930
|
|
|
|
|
|
|
{ |
|
931
|
|
|
|
|
|
|
my ($xmlns, $attr) = ($self->{'options'}->{'graph_attr'} =~ /^(?:\{(.+)\})?(.+)$/); |
|
932
|
|
|
|
|
|
|
unless ($attr) |
|
933
|
|
|
|
|
|
|
{ |
|
934
|
|
|
|
|
|
|
$xmlns = $rdfans; |
|
935
|
|
|
|
|
|
|
$attr = 'graph'; |
|
936
|
|
|
|
|
|
|
} |
|
937
|
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
if ($self->{'options'}->{'graph_type'} eq 'id' |
|
939
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($xmlns, $attr)) |
|
940
|
|
|
|
|
|
|
{ |
|
941
|
|
|
|
|
|
|
$graph = $self->uri('#' . $current_element->getAttributeNsSafe($xmlns, $attr), |
|
942
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base}); |
|
943
|
|
|
|
|
|
|
} |
|
944
|
|
|
|
|
|
|
elsif ($self->{'options'}->{'graph_type'} eq 'about' |
|
945
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($xmlns, $attr)) |
|
946
|
|
|
|
|
|
|
{ |
|
947
|
|
|
|
|
|
|
$graph = $self->_expand_curie( |
|
948
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($xmlns, $attr), |
|
949
|
|
|
|
|
|
|
element => $current_element, |
|
950
|
|
|
|
|
|
|
attribute => 'graph', |
|
951
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
|
952
|
|
|
|
|
|
|
terms => $local_term_mappings, |
|
953
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
954
|
|
|
|
|
|
|
); |
|
955
|
|
|
|
|
|
|
$graph = $self->{'options'}->{'graph_default'} |
|
956
|
|
|
|
|
|
|
unless defined $graph; |
|
957
|
|
|
|
|
|
|
} |
|
958
|
|
|
|
|
|
|
} |
|
959
|
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
if ($self->{options}{vocab_triple} |
|
961
|
|
|
|
|
|
|
and $self->{options}{vocab_attr} |
|
962
|
|
|
|
|
|
|
and $current_element->hasAttributeNsSafe($rdfans, 'vocab') |
|
963
|
|
|
|
|
|
|
and defined $local_uri_mappings->{'(VOCAB)'}) |
|
964
|
|
|
|
|
|
|
{ |
|
965
|
|
|
|
|
|
|
$self->_insert_triple_resource({ |
|
966
|
|
|
|
|
|
|
current => $current_element, |
|
967
|
|
|
|
|
|
|
subject => $current_element->ownerDocument->documentElement, |
|
968
|
|
|
|
|
|
|
predicate => $current_element, |
|
969
|
|
|
|
|
|
|
object => $current_element, |
|
970
|
|
|
|
|
|
|
graph => $graph_elem, |
|
971
|
|
|
|
|
|
|
}, |
|
972
|
|
|
|
|
|
|
$base, |
|
973
|
|
|
|
|
|
|
'http://www.w3.org/ns/rdfa#usesVocabulary', |
|
974
|
|
|
|
|
|
|
$local_uri_mappings->{'(VOCAB)'}, |
|
975
|
|
|
|
|
|
|
$graph); |
|
976
|
|
|
|
|
|
|
} |
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
# EXTENSION: @role |
|
979
|
|
|
|
|
|
|
if ($self->{'options'}->{'role_attr'} |
|
980
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'role')) |
|
981
|
|
|
|
|
|
|
{ |
|
982
|
|
|
|
|
|
|
my @role = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'role') ); |
|
983
|
|
|
|
|
|
|
my @ROLE = map { |
|
984
|
|
|
|
|
|
|
my $x = $self->_expand_curie( |
|
985
|
|
|
|
|
|
|
$_, |
|
986
|
|
|
|
|
|
|
element => $current_element, |
|
987
|
|
|
|
|
|
|
attribute => 'role', |
|
988
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
|
989
|
|
|
|
|
|
|
terms => $local_term_mappings, |
|
990
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
991
|
|
|
|
|
|
|
); |
|
992
|
|
|
|
|
|
|
defined $x ? ($x) : (); |
|
993
|
|
|
|
|
|
|
} @role; |
|
994
|
|
|
|
|
|
|
if (@ROLE) |
|
995
|
|
|
|
|
|
|
{ |
|
996
|
|
|
|
|
|
|
if ($current_element->hasAttribute('id') |
|
997
|
|
|
|
|
|
|
and !defined $self->{element_subjects}->{$current_element->nodePath}) |
|
998
|
|
|
|
|
|
|
{ |
|
999
|
|
|
|
|
|
|
$self->{element_subjects}->{$current_element->nodePath} = $self->uri(sprintf('#%s', |
|
1000
|
|
|
|
|
|
|
$current_element->getAttribute('id')), |
|
1001
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base}); |
|
1002
|
|
|
|
|
|
|
} |
|
1003
|
|
|
|
|
|
|
elsif (!defined $self->{element_subjects}->{$current_element->nodePath}) |
|
1004
|
|
|
|
|
|
|
{ |
|
1005
|
|
|
|
|
|
|
$self->{element_subjects}->{$current_element->nodePath} = $self->bnode; |
|
1006
|
|
|
|
|
|
|
} |
|
1007
|
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
foreach my $r (@ROLE) |
|
1009
|
|
|
|
|
|
|
{ |
|
1010
|
|
|
|
|
|
|
my $E = { |
|
1011
|
|
|
|
|
|
|
current => $current_element, |
|
1012
|
|
|
|
|
|
|
subject => $current_element, |
|
1013
|
|
|
|
|
|
|
predicate => $current_element, |
|
1014
|
|
|
|
|
|
|
object => $current_element, |
|
1015
|
|
|
|
|
|
|
graph => $graph_elem, |
|
1016
|
|
|
|
|
|
|
}; |
|
1017
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $self->{element_subjects}->{$current_element->nodePath}, 'http://www.w3.org/1999/xhtml/vocab#role', $r, $graph); |
|
1018
|
|
|
|
|
|
|
} |
|
1019
|
|
|
|
|
|
|
} |
|
1020
|
|
|
|
|
|
|
} |
|
1021
|
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
# EXTENSION: @cite |
|
1023
|
|
|
|
|
|
|
if ($self->{'options'}->{'cite_attr'} |
|
1024
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'cite')) |
|
1025
|
|
|
|
|
|
|
{ |
|
1026
|
|
|
|
|
|
|
my $citation = $self->uri( |
|
1027
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'cite'), |
|
1028
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base} |
|
1029
|
|
|
|
|
|
|
); |
|
1030
|
|
|
|
|
|
|
if (defined $citation) |
|
1031
|
|
|
|
|
|
|
{ |
|
1032
|
|
|
|
|
|
|
if ($current_element->hasAttribute('id') |
|
1033
|
|
|
|
|
|
|
and !defined $self->{element_subjects}->{$current_element->nodePath}) |
|
1034
|
|
|
|
|
|
|
{ |
|
1035
|
|
|
|
|
|
|
$self->{element_subjects}->{$current_element->nodePath} = $self->uri(sprintf('#%s', |
|
1036
|
|
|
|
|
|
|
$current_element->getAttribute('id')), |
|
1037
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base}); |
|
1038
|
|
|
|
|
|
|
} |
|
1039
|
|
|
|
|
|
|
elsif (!defined $self->{element_subjects}->{$current_element->nodePath}) |
|
1040
|
|
|
|
|
|
|
{ |
|
1041
|
|
|
|
|
|
|
$self->{element_subjects}->{$current_element->nodePath} = $self->bnode; |
|
1042
|
|
|
|
|
|
|
} |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
my $E = { |
|
1045
|
|
|
|
|
|
|
current => $current_element, |
|
1046
|
|
|
|
|
|
|
subject => $current_element, |
|
1047
|
|
|
|
|
|
|
predicate => $current_element, |
|
1048
|
|
|
|
|
|
|
object => $current_element, |
|
1049
|
|
|
|
|
|
|
graph => $graph_elem, |
|
1050
|
|
|
|
|
|
|
}; |
|
1051
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $self->{element_subjects}->{$current_element->nodePath}, 'http://www.w3.org/1999/xhtml/vocab#cite', $citation, $graph); |
|
1052
|
|
|
|
|
|
|
} |
|
1053
|
|
|
|
|
|
|
} |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
my @rel = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'rel') ); |
|
1056
|
|
|
|
|
|
|
my @rev = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'rev') ); |
|
1057
|
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
# EXTENSION: rel="alternate stylesheet" |
|
1059
|
|
|
|
|
|
|
if ($self->{options}{alt_stylesheet} |
|
1060
|
|
|
|
|
|
|
&& (grep /^alternate$/i, @rel) |
|
1061
|
|
|
|
|
|
|
&& (grep /^stylesheet$/i, @rel)) |
|
1062
|
|
|
|
|
|
|
{ |
|
1063
|
|
|
|
|
|
|
@rel = grep !/^(alternate|stylesheet)$/i, @rel; |
|
1064
|
|
|
|
|
|
|
push @rel, ':ALTERNATE-STYLESHEET'; |
|
1065
|
|
|
|
|
|
|
} |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
my @REL = map { |
|
1068
|
|
|
|
|
|
|
my $x = $self->_expand_curie( |
|
1069
|
|
|
|
|
|
|
$_, |
|
1070
|
|
|
|
|
|
|
element => $current_element, |
|
1071
|
|
|
|
|
|
|
attribute => 'rel', |
|
1072
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
|
1073
|
|
|
|
|
|
|
terms => $local_term_mappings, |
|
1074
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
1075
|
|
|
|
|
|
|
); |
|
1076
|
|
|
|
|
|
|
defined $x ? ($x) : (); |
|
1077
|
|
|
|
|
|
|
} @rel; |
|
1078
|
|
|
|
|
|
|
my @REV = map { |
|
1079
|
|
|
|
|
|
|
my $x = $self->_expand_curie( |
|
1080
|
|
|
|
|
|
|
$_, |
|
1081
|
|
|
|
|
|
|
element => $current_element, |
|
1082
|
|
|
|
|
|
|
attribute => 'rev', |
|
1083
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
|
1084
|
|
|
|
|
|
|
terms => $local_term_mappings, |
|
1085
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
1086
|
|
|
|
|
|
|
); |
|
1087
|
|
|
|
|
|
|
defined $x ? ($x) : (); |
|
1088
|
|
|
|
|
|
|
} @rev; |
|
1089
|
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
my $NEW_SUBJECT_ATTR_ABOUT = sub |
|
1091
|
|
|
|
|
|
|
{ |
|
1092
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'about')) |
|
1093
|
|
|
|
|
|
|
{ |
|
1094
|
|
|
|
|
|
|
my $s = $self->_expand_curie( |
|
1095
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'about'), |
|
1096
|
|
|
|
|
|
|
element => $current_element, |
|
1097
|
|
|
|
|
|
|
attribute => 'about', |
|
1098
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
|
1099
|
|
|
|
|
|
|
terms => $local_term_mappings, |
|
1100
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
1101
|
|
|
|
|
|
|
); |
|
1102
|
|
|
|
|
|
|
my $e = $current_element; |
|
1103
|
|
|
|
|
|
|
return ($s, $e); |
|
1104
|
|
|
|
|
|
|
} |
|
1105
|
|
|
|
|
|
|
return; |
|
1106
|
|
|
|
|
|
|
}; |
|
1107
|
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
my $NEW_SUBJECT_ATTR_SRC = sub |
|
1109
|
|
|
|
|
|
|
{ |
|
1110
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'src')) |
|
1111
|
|
|
|
|
|
|
{ |
|
1112
|
|
|
|
|
|
|
my $s = $self->uri( |
|
1113
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'src'), |
|
1114
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base} |
|
1115
|
|
|
|
|
|
|
); |
|
1116
|
|
|
|
|
|
|
my $e = $current_element; |
|
1117
|
|
|
|
|
|
|
return ($s, $e); |
|
1118
|
|
|
|
|
|
|
} |
|
1119
|
|
|
|
|
|
|
return; |
|
1120
|
|
|
|
|
|
|
}; |
|
1121
|
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
my $NEW_SUBJECT_DEFAULTS = sub |
|
1123
|
|
|
|
|
|
|
{ |
|
1124
|
|
|
|
|
|
|
if ($current_element == $current_element->ownerDocument->documentElement) |
|
1125
|
|
|
|
|
|
|
{ |
|
1126
|
|
|
|
|
|
|
return ($self->uri(undef, {'element'=>$current_element,'xml_base'=>$hrefsrc_base}), $current_element); |
|
1127
|
|
|
|
|
|
|
} |
|
1128
|
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
# if the element is the head or body element then act as if |
|
1130
|
|
|
|
|
|
|
# there is an empty @about present, and process it according to |
|
1131
|
|
|
|
|
|
|
# the rule for @about, above; |
|
1132
|
|
|
|
|
|
|
if ($self->{options}{xhtml_elements} |
|
1133
|
|
|
|
|
|
|
&& ($current_element->namespaceURI eq 'http://www.w3.org/1999/xhtml') |
|
1134
|
|
|
|
|
|
|
&& ($current_element->tagName eq 'head' || $current_element->tagName eq 'body')) |
|
1135
|
|
|
|
|
|
|
{ |
|
1136
|
|
|
|
|
|
|
return ($parent_object, $parent_object_elem) |
|
1137
|
|
|
|
|
|
|
if $self->{options}{xhtml_elements}==2; |
|
1138
|
|
|
|
|
|
|
return ($self->uri(undef, {'element'=>$current_element,'xml_base'=>$hrefsrc_base}), $current_element); |
|
1139
|
|
|
|
|
|
|
} |
|
1140
|
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
# EXTENSION: atom elements |
|
1142
|
|
|
|
|
|
|
if ($self->{options}{atom_elements} |
|
1143
|
|
|
|
|
|
|
&& ($current_element->namespaceURI eq 'http://www.w3.org/2005/Atom') |
|
1144
|
|
|
|
|
|
|
&& ($current_element->tagName eq 'feed' || $current_element->tagName eq 'entry')) |
|
1145
|
|
|
|
|
|
|
{ |
|
1146
|
|
|
|
|
|
|
return ($self->_atom_magic($current_element), $current_element); |
|
1147
|
|
|
|
|
|
|
} |
|
1148
|
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
return; |
|
1150
|
|
|
|
|
|
|
}; |
|
1151
|
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
my $NEW_SUBJECT_INHERIT = sub |
|
1153
|
|
|
|
|
|
|
{ |
|
1154
|
|
|
|
|
|
|
$skip_element = 1 |
|
1155
|
|
|
|
|
|
|
if shift |
|
1156
|
|
|
|
|
|
|
&& not $current_element->hasAttributeNsSafe($rdfans, 'property'); |
|
1157
|
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
return ($parent_object, $parent_object_elem) if $parent_object; |
|
1159
|
|
|
|
|
|
|
return; |
|
1160
|
|
|
|
|
|
|
}; |
|
1161
|
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
my $NEW_SUBJECT_ATTR_RESOURCE = sub |
|
1163
|
|
|
|
|
|
|
{ |
|
1164
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'resource')) |
|
1165
|
|
|
|
|
|
|
{ |
|
1166
|
|
|
|
|
|
|
my $s = $self->_expand_curie( |
|
1167
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'resource'), |
|
1168
|
|
|
|
|
|
|
element => $current_element, |
|
1169
|
|
|
|
|
|
|
attribute => 'resource', |
|
1170
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
|
1171
|
|
|
|
|
|
|
terms => $local_term_mappings, |
|
1172
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
1173
|
|
|
|
|
|
|
); |
|
1174
|
|
|
|
|
|
|
return ($s, $current_element); |
|
1175
|
|
|
|
|
|
|
} |
|
1176
|
|
|
|
|
|
|
return; |
|
1177
|
|
|
|
|
|
|
}; |
|
1178
|
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
my $NEW_SUBJECT_ATTR_HREF = sub |
|
1180
|
|
|
|
|
|
|
{ |
|
1181
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'href')) |
|
1182
|
|
|
|
|
|
|
{ |
|
1183
|
|
|
|
|
|
|
my $s = $self->uri( |
|
1184
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'href'), |
|
1185
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base} |
|
1186
|
|
|
|
|
|
|
); |
|
1187
|
|
|
|
|
|
|
return ($s, $current_element); |
|
1188
|
|
|
|
|
|
|
} |
|
1189
|
|
|
|
|
|
|
return; |
|
1190
|
|
|
|
|
|
|
}; |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
my $NEW_SUBJECT_ATTR_TYPEOF = sub |
|
1193
|
|
|
|
|
|
|
{ |
|
1194
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'typeof') |
|
1195
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'instanceof')) |
|
1196
|
|
|
|
|
|
|
{ |
|
1197
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'instanceof') |
|
1198
|
|
|
|
|
|
|
and not $current_element->hasAttributeNsSafe($rdfans, 'typeof')) |
|
1199
|
|
|
|
|
|
|
{ |
|
1200
|
|
|
|
|
|
|
$self->_log_error( |
|
1201
|
|
|
|
|
|
|
ERR_WARNING, |
|
1202
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_USED, |
|
1203
|
|
|
|
|
|
|
"Deprecated \@instanceof found; using it anyway.", |
|
1204
|
|
|
|
|
|
|
element => $current_element, |
|
1205
|
|
|
|
|
|
|
); |
|
1206
|
|
|
|
|
|
|
} |
|
1207
|
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
return ($self->bnode($current_element), $current_element); |
|
1209
|
|
|
|
|
|
|
} |
|
1210
|
|
|
|
|
|
|
return; |
|
1211
|
|
|
|
|
|
|
}; |
|
1212
|
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
# If the current element contains no @rel or @rev attribute, then the |
|
1214
|
|
|
|
|
|
|
# next step is to establish a value for new subject. This step has two |
|
1215
|
|
|
|
|
|
|
# possible alternatives. |
|
1216
|
|
|
|
|
|
|
# |
|
1217
|
|
|
|
|
|
|
# If the current element contains the @property attribute, but does not |
|
1218
|
|
|
|
|
|
|
# contain either the @content or @datatype attributes, then |
|
1219
|
|
|
|
|
|
|
# |
|
1220
|
|
|
|
|
|
|
if (!$current_element->hasAttributeNsSafe($rdfans, 'rel') |
|
1221
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'rev') |
|
1222
|
|
|
|
|
|
|
and $current_element->hasAttributeNsSafe($rdfans, 'property') |
|
1223
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'datatype') |
|
1224
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'content') |
|
1225
|
|
|
|
|
|
|
and $self->{options}{property_resources}) |
|
1226
|
|
|
|
|
|
|
{ |
|
1227
|
|
|
|
|
|
|
# new subject is set to the resource obtained from the first match |
|
1228
|
|
|
|
|
|
|
# from the following rule: |
|
1229
|
|
|
|
|
|
|
# |
|
1230
|
|
|
|
|
|
|
# - by using the resource from @about, if present, obtained according |
|
1231
|
|
|
|
|
|
|
# to the section on CURIE and IRI Processing; |
|
1232
|
|
|
|
|
|
|
# - otherwise, if the element is the root element of the document, then |
|
1233
|
|
|
|
|
|
|
# act as if there is an empty @about present, and process it according |
|
1234
|
|
|
|
|
|
|
# to the rule for @about, above; |
|
1235
|
|
|
|
|
|
|
# - otherwise, if parent object is present, new subject is set to the |
|
1236
|
|
|
|
|
|
|
# value of parent object. |
|
1237
|
|
|
|
|
|
|
# |
|
1238
|
|
|
|
|
|
|
# TOBYINK: we add @src to that for RDFa 1.0/1.1 mish-mashes. |
|
1239
|
|
|
|
|
|
|
# |
|
1240
|
|
|
|
|
|
|
foreach my $code ( |
|
1241
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_ABOUT, |
|
1242
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object}, |
|
1243
|
|
|
|
|
|
|
$NEW_SUBJECT_DEFAULTS, |
|
1244
|
|
|
|
|
|
|
$NEW_SUBJECT_INHERIT, |
|
1245
|
|
|
|
|
|
|
) { |
|
1246
|
|
|
|
|
|
|
($new_subject, $new_subject_elem) = $code->() unless $new_subject; |
|
1247
|
|
|
|
|
|
|
} |
|
1248
|
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
# If @typeof is present then typed resource is set to the resource |
|
1250
|
|
|
|
|
|
|
# obtained from the first match from the following rules: |
|
1251
|
|
|
|
|
|
|
# |
|
1252
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'typeof') |
|
1253
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'instanceof')) |
|
1254
|
|
|
|
|
|
|
{ |
|
1255
|
|
|
|
|
|
|
# - by using the resource from @about, if present, obtained |
|
1256
|
|
|
|
|
|
|
# according to the section on CURIE and IRI Processing; |
|
1257
|
|
|
|
|
|
|
# - otherwise, if the element is the root element of the |
|
1258
|
|
|
|
|
|
|
# document, then act as if there is an empty @about present |
|
1259
|
|
|
|
|
|
|
# and process it according to the previous rule; |
|
1260
|
|
|
|
|
|
|
# |
|
1261
|
|
|
|
|
|
|
foreach my $code ( |
|
1262
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_ABOUT, |
|
1263
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object}, |
|
1264
|
|
|
|
|
|
|
$NEW_SUBJECT_DEFAULTS, |
|
1265
|
|
|
|
|
|
|
) { |
|
1266
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = $code->() unless $typed_resource; |
|
1267
|
|
|
|
|
|
|
} |
|
1268
|
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
# - otherwise, |
|
1270
|
|
|
|
|
|
|
unless ($typed_resource) |
|
1271
|
|
|
|
|
|
|
{ |
|
1272
|
|
|
|
|
|
|
# + by using the resource from @resource, if present, |
|
1273
|
|
|
|
|
|
|
# obtained according to the section on CURIE and IRI |
|
1274
|
|
|
|
|
|
|
# Processing; |
|
1275
|
|
|
|
|
|
|
# + otherwise, by using the IRI from @href, if present, |
|
1276
|
|
|
|
|
|
|
# obtained according to the section on CURIE and IRI |
|
1277
|
|
|
|
|
|
|
# Processing; |
|
1278
|
|
|
|
|
|
|
# + otherwise, by using the IRI from @src, if present, |
|
1279
|
|
|
|
|
|
|
# obtained according to the section on CURIE and IRI |
|
1280
|
|
|
|
|
|
|
# Processing; |
|
1281
|
|
|
|
|
|
|
# |
|
1282
|
|
|
|
|
|
|
foreach my $code ( |
|
1283
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_RESOURCE, |
|
1284
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_HREF, |
|
1285
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!!$self->{options}{src_sets_object}, |
|
1286
|
|
|
|
|
|
|
) { |
|
1287
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = $code->() unless $typed_resource; |
|
1288
|
|
|
|
|
|
|
} |
|
1289
|
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
# + otherwise, the value of typed resource is set to a |
|
1291
|
|
|
|
|
|
|
# newly created bnode. |
|
1292
|
|
|
|
|
|
|
# |
|
1293
|
|
|
|
|
|
|
unless ($typed_resource) |
|
1294
|
|
|
|
|
|
|
{ |
|
1295
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = |
|
1296
|
|
|
|
|
|
|
($self->bnode($current_element), $current_element); |
|
1297
|
|
|
|
|
|
|
} |
|
1298
|
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
# + The value of the current object resource is then set |
|
1300
|
|
|
|
|
|
|
# to the value of typed resource. |
|
1301
|
|
|
|
|
|
|
# |
|
1302
|
|
|
|
|
|
|
($current_object_resource, $current_object_resource_elem) = |
|
1303
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem); |
|
1304
|
|
|
|
|
|
|
} |
|
1305
|
|
|
|
|
|
|
} |
|
1306
|
|
|
|
|
|
|
} |
|
1307
|
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
# otherwise |
|
1309
|
|
|
|
|
|
|
elsif (!$current_element->hasAttributeNsSafe($rdfans, 'rel') |
|
1310
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'rev')) |
|
1311
|
|
|
|
|
|
|
{ |
|
1312
|
|
|
|
|
|
|
# - If the element contains an @about, @href, @src, or @resource |
|
1313
|
|
|
|
|
|
|
# attribute, new subject is set to the resource obtained as |
|
1314
|
|
|
|
|
|
|
# follows: |
|
1315
|
|
|
|
|
|
|
# + by using the resource from @about, if present, obtained |
|
1316
|
|
|
|
|
|
|
# according to the section on CURIE and IRI Processing; |
|
1317
|
|
|
|
|
|
|
# + otherwise, by using the resource from @resource, if |
|
1318
|
|
|
|
|
|
|
# present, obtained according to the section on CURIE and |
|
1319
|
|
|
|
|
|
|
# IRI Processing; |
|
1320
|
|
|
|
|
|
|
# + otherwise, by using the IRI from @href, if present, |
|
1321
|
|
|
|
|
|
|
# obtained according to the section on CURIE and IRI |
|
1322
|
|
|
|
|
|
|
# Processing; |
|
1323
|
|
|
|
|
|
|
# + otherwise, by using the IRI from @src, if present, |
|
1324
|
|
|
|
|
|
|
# obtained according to the section on CURIE and IRI |
|
1325
|
|
|
|
|
|
|
# Processing. |
|
1326
|
|
|
|
|
|
|
# - otherwise, if no resource is provided by a resource |
|
1327
|
|
|
|
|
|
|
# attribute, then the first match from the following rules |
|
1328
|
|
|
|
|
|
|
# will apply: |
|
1329
|
|
|
|
|
|
|
# + if the element is the root element of the document, |
|
1330
|
|
|
|
|
|
|
# then act as if there is an empty @about present, and |
|
1331
|
|
|
|
|
|
|
# process it according to the rule for @about, above; |
|
1332
|
|
|
|
|
|
|
# + otherwise, if @typeof is present, then new subject is |
|
1333
|
|
|
|
|
|
|
# set to be a newly created bnode; |
|
1334
|
|
|
|
|
|
|
# + otherwise, if parent object is present, new subject is |
|
1335
|
|
|
|
|
|
|
# set to the value of parent object. Additionally, if |
|
1336
|
|
|
|
|
|
|
# @property is not present then the skip element flag is |
|
1337
|
|
|
|
|
|
|
# set to 'true'. |
|
1338
|
|
|
|
|
|
|
# |
|
1339
|
|
|
|
|
|
|
my $i; |
|
1340
|
|
|
|
|
|
|
foreach my $code ( |
|
1341
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_ABOUT, |
|
1342
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object}, |
|
1343
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_RESOURCE, |
|
1344
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_HREF, |
|
1345
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!!$self->{options}{src_sets_object}, |
|
1346
|
|
|
|
|
|
|
$NEW_SUBJECT_DEFAULTS, |
|
1347
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_TYPEOF, |
|
1348
|
|
|
|
|
|
|
sub { $NEW_SUBJECT_INHERIT->(1) }, |
|
1349
|
|
|
|
|
|
|
) { |
|
1350
|
|
|
|
|
|
|
last if $new_subject; |
|
1351
|
|
|
|
|
|
|
($new_subject, $new_subject_elem) = $code->(); |
|
1352
|
|
|
|
|
|
|
} |
|
1353
|
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
# if ($current_element->{'x-foo'}) |
|
1355
|
|
|
|
|
|
|
# { |
|
1356
|
|
|
|
|
|
|
# use Data::Dumper; |
|
1357
|
|
|
|
|
|
|
# print Dumper \%args; |
|
1358
|
|
|
|
|
|
|
# } |
|
1359
|
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
# - Finally, if @typeof is present, set the typed resource |
|
1361
|
|
|
|
|
|
|
# to the value of new subject. |
|
1362
|
|
|
|
|
|
|
# |
|
1363
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'typeof') |
|
1364
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'instanceof')) |
|
1365
|
|
|
|
|
|
|
{ |
|
1366
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = ($new_subject, $new_subject_elem); |
|
1367
|
|
|
|
|
|
|
} |
|
1368
|
|
|
|
|
|
|
} |
|
1369
|
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
# If the [current element] does contain a valid @rel or @rev URI, obtained |
|
1371
|
|
|
|
|
|
|
# according to the section on CURIE and URI Processing, then the next step |
|
1372
|
|
|
|
|
|
|
# is to establish both a value for [new subject] and a value for [current |
|
1373
|
|
|
|
|
|
|
# object resource]: |
|
1374
|
|
|
|
|
|
|
else |
|
1375
|
|
|
|
|
|
|
{ |
|
1376
|
|
|
|
|
|
|
foreach my $code ( |
|
1377
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_ABOUT, |
|
1378
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!$self->{options}{src_sets_object}, |
|
1379
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_TYPEOF) x!$self->{options}{typeof_resources}, |
|
1380
|
|
|
|
|
|
|
$NEW_SUBJECT_DEFAULTS, |
|
1381
|
|
|
|
|
|
|
$NEW_SUBJECT_INHERIT, |
|
1382
|
|
|
|
|
|
|
) { |
|
1383
|
|
|
|
|
|
|
($new_subject, $new_subject_elem) = $code->() unless $new_subject; |
|
1384
|
|
|
|
|
|
|
} |
|
1385
|
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
foreach my $code ( |
|
1387
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_RESOURCE, |
|
1388
|
|
|
|
|
|
|
$NEW_SUBJECT_ATTR_HREF, |
|
1389
|
|
|
|
|
|
|
($NEW_SUBJECT_ATTR_SRC) x!!$self->{options}{src_sets_object}, |
|
1390
|
|
|
|
|
|
|
) { |
|
1391
|
|
|
|
|
|
|
($current_object_resource, $current_object_resource_elem) = $code->() unless $current_object_resource; |
|
1392
|
|
|
|
|
|
|
} |
|
1393
|
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'typeof') |
|
1395
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'instanceof')) |
|
1396
|
|
|
|
|
|
|
{ |
|
1397
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'about')) |
|
1398
|
|
|
|
|
|
|
{ |
|
1399
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = ($new_subject, $new_subject_elem); |
|
1400
|
|
|
|
|
|
|
} |
|
1401
|
|
|
|
|
|
|
elsif ($self->{options}{typeof_resources}) |
|
1402
|
|
|
|
|
|
|
{ |
|
1403
|
|
|
|
|
|
|
($current_object_resource, $current_object_resource_elem) = |
|
1404
|
|
|
|
|
|
|
($self->bnode($current_element), $current_element) |
|
1405
|
|
|
|
|
|
|
unless $current_object_resource; |
|
1406
|
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = ($current_object_resource, $current_object_resource_elem); |
|
1408
|
|
|
|
|
|
|
} |
|
1409
|
|
|
|
|
|
|
else |
|
1410
|
|
|
|
|
|
|
{ |
|
1411
|
|
|
|
|
|
|
($typed_resource, $typed_resource_elem) = ($new_subject, $new_subject_elem); |
|
1412
|
|
|
|
|
|
|
} |
|
1413
|
|
|
|
|
|
|
} |
|
1414
|
|
|
|
|
|
|
} |
|
1415
|
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
# # NOTE: x876587 |
|
1417
|
|
|
|
|
|
|
# if (!defined $new_subject |
|
1418
|
|
|
|
|
|
|
# and $current_element->nodePath eq $self->dom->documentElement->nodePath) |
|
1419
|
|
|
|
|
|
|
# { |
|
1420
|
|
|
|
|
|
|
# $new_subject = $self->uri(''); |
|
1421
|
|
|
|
|
|
|
# $new_subject_elem = $self->dom->documentElement; |
|
1422
|
|
|
|
|
|
|
# $skip_element = 1 |
|
1423
|
|
|
|
|
|
|
# unless $current_element->hasAttributeNsSafe($rdfans, 'property'); |
|
1424
|
|
|
|
|
|
|
# } |
|
1425
|
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
# If in any of the previous steps a [typed resource] was set to a non-null |
|
1427
|
|
|
|
|
|
|
# value, it is now used to provide a subject for type values |
|
1428
|
|
|
|
|
|
|
if ($typed_resource |
|
1429
|
|
|
|
|
|
|
&& ( $current_element->hasAttributeNsSafe($rdfans, 'instanceof') |
|
1430
|
|
|
|
|
|
|
|| $current_element->hasAttributeNsSafe($rdfans, 'typeof'))) |
|
1431
|
|
|
|
|
|
|
{ |
|
1432
|
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'instanceof') |
|
1434
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'typeof')) |
|
1435
|
|
|
|
|
|
|
{ |
|
1436
|
|
|
|
|
|
|
$self->_log_error( |
|
1437
|
|
|
|
|
|
|
ERR_WARNING, |
|
1438
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_OVERRULED, |
|
1439
|
|
|
|
|
|
|
"Deprecated \@instanceof found; ignored because \@typeof also present.", |
|
1440
|
|
|
|
|
|
|
element => $current_element, |
|
1441
|
|
|
|
|
|
|
); |
|
1442
|
|
|
|
|
|
|
} |
|
1443
|
|
|
|
|
|
|
elsif ($current_element->hasAttributeNsSafe($rdfans, 'instanceof')) |
|
1444
|
|
|
|
|
|
|
{ |
|
1445
|
|
|
|
|
|
|
$self->_log_error( |
|
1446
|
|
|
|
|
|
|
ERR_WARNING, |
|
1447
|
|
|
|
|
|
|
ERR_CODE_INSTANCEOF_USED, |
|
1448
|
|
|
|
|
|
|
"Deprecated \@instanceof found; using it anyway.", |
|
1449
|
|
|
|
|
|
|
element => $current_element, |
|
1450
|
|
|
|
|
|
|
); |
|
1451
|
|
|
|
|
|
|
} |
|
1452
|
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
# One or more 'types' for the [ new subject ] can be set by using |
|
1454
|
|
|
|
|
|
|
# @instanceof. If present, the attribute must contain one or more |
|
1455
|
|
|
|
|
|
|
# URIs, obtained according to the section on URI and CURIE Processing... |
|
1456
|
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
my @instanceof = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'typeof') |
|
1458
|
|
|
|
|
|
|
|| $current_element->getAttributeNsSafe($rdfans, 'instanceof') ); |
|
1459
|
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
foreach my $curie (@instanceof) |
|
1461
|
|
|
|
|
|
|
{ |
|
1462
|
|
|
|
|
|
|
my $rdftype = $self->_expand_curie( |
|
1463
|
|
|
|
|
|
|
$curie, |
|
1464
|
|
|
|
|
|
|
element => $current_element, |
|
1465
|
|
|
|
|
|
|
attribute => 'typeof', |
|
1466
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
|
1467
|
|
|
|
|
|
|
terms => $local_term_mappings, |
|
1468
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
1469
|
|
|
|
|
|
|
); |
|
1470
|
|
|
|
|
|
|
next unless defined $rdftype; |
|
1471
|
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
# ... each of which is used to generate a triple as follows: |
|
1473
|
|
|
|
|
|
|
# |
|
1474
|
|
|
|
|
|
|
# subject |
|
1475
|
|
|
|
|
|
|
# [new subject] |
|
1476
|
|
|
|
|
|
|
# predicate |
|
1477
|
|
|
|
|
|
|
# http://www.w3.org/1999/02/22-rdf-syntax-ns#type |
|
1478
|
|
|
|
|
|
|
# object |
|
1479
|
|
|
|
|
|
|
# full URI of 'type' |
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
my $E = { # provenance tracking |
|
1482
|
|
|
|
|
|
|
current => $current_element, |
|
1483
|
|
|
|
|
|
|
subject => $typed_resource_elem, |
|
1484
|
|
|
|
|
|
|
predicate => $current_element, |
|
1485
|
|
|
|
|
|
|
object => $current_element, |
|
1486
|
|
|
|
|
|
|
graph => $graph_elem, |
|
1487
|
|
|
|
|
|
|
}; |
|
1488
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $typed_resource, RDF_TYPE, $rdftype, $graph); |
|
1489
|
|
|
|
|
|
|
$activity++; |
|
1490
|
|
|
|
|
|
|
} |
|
1491
|
|
|
|
|
|
|
} |
|
1492
|
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
# EXTENSION: @longdesc |
|
1494
|
|
|
|
|
|
|
if ($self->{'options'}->{'longdesc_attr'} |
|
1495
|
|
|
|
|
|
|
&& $current_element->hasAttributeNsSafe($rdfans, 'longdesc')) |
|
1496
|
|
|
|
|
|
|
{ |
|
1497
|
|
|
|
|
|
|
my $longdesc = $self->uri( |
|
1498
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'longdesc'), |
|
1499
|
|
|
|
|
|
|
{'element'=>$current_element,'xml_base'=>$hrefsrc_base} |
|
1500
|
|
|
|
|
|
|
); |
|
1501
|
|
|
|
|
|
|
if (defined $longdesc) |
|
1502
|
|
|
|
|
|
|
{ |
|
1503
|
|
|
|
|
|
|
my $E = { |
|
1504
|
|
|
|
|
|
|
current => $new_subject_elem, |
|
1505
|
|
|
|
|
|
|
subject => $current_element, |
|
1506
|
|
|
|
|
|
|
predicate => $current_element, |
|
1507
|
|
|
|
|
|
|
object => $current_element, |
|
1508
|
|
|
|
|
|
|
graph => $graph_elem, |
|
1509
|
|
|
|
|
|
|
}; |
|
1510
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $new_subject, 'http://www.w3.org/2007/05/powder-s#describedby', $longdesc, $graph); |
|
1511
|
|
|
|
|
|
|
} |
|
1512
|
|
|
|
|
|
|
} |
|
1513
|
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
# If in any of the previous steps a new subject was set to a non-null value |
|
1515
|
|
|
|
|
|
|
# different from the parent object; The list mapping taken from the |
|
1516
|
|
|
|
|
|
|
# evaluation context is set to a new, empty mapping. |
|
1517
|
|
|
|
|
|
|
if (defined $new_subject |
|
1518
|
|
|
|
|
|
|
and $new_subject ne $parent_subject || !%$list_mappings) |
|
1519
|
|
|
|
|
|
|
{ |
|
1520
|
|
|
|
|
|
|
$list_mappings = { |
|
1521
|
|
|
|
|
|
|
'::meta' => { |
|
1522
|
|
|
|
|
|
|
id => Data::UUID->new->create_str, |
|
1523
|
|
|
|
|
|
|
owner => $current_element, |
|
1524
|
|
|
|
|
|
|
}, |
|
1525
|
|
|
|
|
|
|
}; |
|
1526
|
|
|
|
|
|
|
} |
|
1527
|
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
# If in any of the previous steps a [current object resource] was set to |
|
1529
|
|
|
|
|
|
|
# a non-null value, it is now used to generate triples and add entries to |
|
1530
|
|
|
|
|
|
|
# the local list mapping |
|
1531
|
|
|
|
|
|
|
if ($current_object_resource) |
|
1532
|
|
|
|
|
|
|
{ |
|
1533
|
|
|
|
|
|
|
# If the element contains both the inlist and the rel attributes: the |
|
1534
|
|
|
|
|
|
|
# rel may contain one or more IRIs, obtained according to the section |
|
1535
|
|
|
|
|
|
|
# on CURIE and IRI Processing each of which is used to add an entry to |
|
1536
|
|
|
|
|
|
|
# the list mapping as follows: |
|
1537
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'inlist') |
|
1538
|
|
|
|
|
|
|
and $current_element->hasAttributeNsSafe($rdfans, 'rel')) |
|
1539
|
|
|
|
|
|
|
{ |
|
1540
|
|
|
|
|
|
|
foreach my $r (@REL) |
|
1541
|
|
|
|
|
|
|
{ |
|
1542
|
|
|
|
|
|
|
# if the local list mapping does not contain a list associated with |
|
1543
|
|
|
|
|
|
|
# the IRI, instantiate a new list and add to local list mappings |
|
1544
|
|
|
|
|
|
|
$list_mappings->{$r} = [] unless defined $list_mappings->{$r}; |
|
1545
|
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
# add the current object resource to the list associated with the IRI |
|
1547
|
|
|
|
|
|
|
# in the local list mapping |
|
1548
|
|
|
|
|
|
|
push @{ $list_mappings->{$r} }, [resource => $current_object_resource]; |
|
1549
|
|
|
|
|
|
|
$activity++; |
|
1550
|
|
|
|
|
|
|
} |
|
1551
|
|
|
|
|
|
|
} |
|
1552
|
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
# XXX:@inlist doesn't support @rev? |
|
1554
|
|
|
|
|
|
|
# |
|
1555
|
|
|
|
|
|
|
# if ($current_element->hasAttributeNsSafe($rdfans, 'inlist') |
|
1556
|
|
|
|
|
|
|
# and $current_element->hasAttributeNsSafe($rdfans, 'rev')) |
|
1557
|
|
|
|
|
|
|
# { |
|
1558
|
|
|
|
|
|
|
# foreach my $r (@REV) |
|
1559
|
|
|
|
|
|
|
# { |
|
1560
|
|
|
|
|
|
|
# # if the local list mapping does not contain a list associated with |
|
1561
|
|
|
|
|
|
|
# # the IRI, instantiate a new list and add to local list mappings |
|
1562
|
|
|
|
|
|
|
# $list_mappings->{'REV:'.$r} = [] unless defined $list_mappings->{'REV:'.$r}; |
|
1563
|
|
|
|
|
|
|
# |
|
1564
|
|
|
|
|
|
|
# # add the current object resource to the list associated with the IRI |
|
1565
|
|
|
|
|
|
|
# # in the local list mapping |
|
1566
|
|
|
|
|
|
|
# push @{ $list_mappings->{'REV:'.$r} }, [resource => $current_object_resource]; |
|
1567
|
|
|
|
|
|
|
# } |
|
1568
|
|
|
|
|
|
|
# } |
|
1569
|
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
my $E = { # provenance tracking |
|
1571
|
|
|
|
|
|
|
current => $current_element, |
|
1572
|
|
|
|
|
|
|
subject => $new_subject_elem, |
|
1573
|
|
|
|
|
|
|
predicate => $current_element, |
|
1574
|
|
|
|
|
|
|
object => $current_object_resource_elem, |
|
1575
|
|
|
|
|
|
|
graph => $graph_elem, |
|
1576
|
|
|
|
|
|
|
}; |
|
1577
|
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
# Predicates for the [ current object resource ] can be set by |
|
1579
|
|
|
|
|
|
|
# using one or both of the @rel and @rev attributes, but, in |
|
1580
|
|
|
|
|
|
|
# case of the @rel attribute, only if the @inlist is not present: |
|
1581
|
|
|
|
|
|
|
# |
|
1582
|
|
|
|
|
|
|
# * If present, @rel will contain one or more URIs, obtained |
|
1583
|
|
|
|
|
|
|
# according to the section on CURIE and URI Processing each |
|
1584
|
|
|
|
|
|
|
# of which is used to generate a triple as follows: |
|
1585
|
|
|
|
|
|
|
# |
|
1586
|
|
|
|
|
|
|
# subject |
|
1587
|
|
|
|
|
|
|
# [new subject] |
|
1588
|
|
|
|
|
|
|
# predicate |
|
1589
|
|
|
|
|
|
|
# full URI |
|
1590
|
|
|
|
|
|
|
# object |
|
1591
|
|
|
|
|
|
|
# [current object resource] |
|
1592
|
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
unless ($current_element->hasAttributeNsSafe($rdfans, 'inlist')) |
|
1594
|
|
|
|
|
|
|
{ |
|
1595
|
|
|
|
|
|
|
foreach my $r (@REL) |
|
1596
|
|
|
|
|
|
|
{ |
|
1597
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $new_subject, $r, $current_object_resource, $graph); |
|
1598
|
|
|
|
|
|
|
$activity++; |
|
1599
|
|
|
|
|
|
|
} |
|
1600
|
|
|
|
|
|
|
} |
|
1601
|
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
# * If present, @rev will contain one or more URIs, obtained |
|
1603
|
|
|
|
|
|
|
# according to the section on CURIE and URI Processing each |
|
1604
|
|
|
|
|
|
|
# of which is used to generate a triple as follows: |
|
1605
|
|
|
|
|
|
|
# |
|
1606
|
|
|
|
|
|
|
# subject |
|
1607
|
|
|
|
|
|
|
# [current object resource] |
|
1608
|
|
|
|
|
|
|
# predicate |
|
1609
|
|
|
|
|
|
|
# full URI |
|
1610
|
|
|
|
|
|
|
# object |
|
1611
|
|
|
|
|
|
|
# [new subject] |
|
1612
|
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
$E = { # provenance tracking |
|
1614
|
|
|
|
|
|
|
current => $current_element, |
|
1615
|
|
|
|
|
|
|
subject => $current_object_resource_elem, |
|
1616
|
|
|
|
|
|
|
predicate => $current_element, |
|
1617
|
|
|
|
|
|
|
object => $new_subject_elem, |
|
1618
|
|
|
|
|
|
|
graph => $graph_elem, |
|
1619
|
|
|
|
|
|
|
}; |
|
1620
|
|
|
|
|
|
|
foreach my $r (@REV) |
|
1621
|
|
|
|
|
|
|
{ |
|
1622
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $current_object_resource, $r, $new_subject, $graph); |
|
1623
|
|
|
|
|
|
|
$activity++; |
|
1624
|
|
|
|
|
|
|
} |
|
1625
|
|
|
|
|
|
|
} |
|
1626
|
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
# If however [current object resource] was set to null, but there are |
|
1628
|
|
|
|
|
|
|
# predicates present, then they must be stored as [incomplete triple]s, |
|
1629
|
|
|
|
|
|
|
# pending the discovery of a subject that can be used as the object. Also, |
|
1630
|
|
|
|
|
|
|
# [current object resource] should be set to a newly created [bnode] |
|
1631
|
|
|
|
|
|
|
elsif ((scalar @REL) || (scalar @REV)) |
|
1632
|
|
|
|
|
|
|
{ |
|
1633
|
|
|
|
|
|
|
# Predicates for [incomplete triple]s can be set by using one or |
|
1634
|
|
|
|
|
|
|
# both of the @rel and @rev attributes: |
|
1635
|
|
|
|
|
|
|
# |
|
1636
|
|
|
|
|
|
|
# * If present, @rel must contain one or more URIs, obtained |
|
1637
|
|
|
|
|
|
|
# according to the section on CURIE and URI Processing each |
|
1638
|
|
|
|
|
|
|
# of which is added to the [local list of incomplete triples] |
|
1639
|
|
|
|
|
|
|
# as follows: |
|
1640
|
|
|
|
|
|
|
# |
|
1641
|
|
|
|
|
|
|
# predicate |
|
1642
|
|
|
|
|
|
|
# full URI |
|
1643
|
|
|
|
|
|
|
# direction |
|
1644
|
|
|
|
|
|
|
# forward |
|
1645
|
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
push @$local_incomplete_triples, |
|
1647
|
|
|
|
|
|
|
map { |
|
1648
|
|
|
|
|
|
|
$current_element->hasAttributeNsSafe($rdfans, 'inlist') |
|
1649
|
|
|
|
|
|
|
?{ |
|
1650
|
|
|
|
|
|
|
list => do { $list_mappings->{$_} = [] unless defined $list_mappings->{$_}; $list_mappings->{$_} }, |
|
1651
|
|
|
|
|
|
|
direction => 'none', |
|
1652
|
|
|
|
|
|
|
} |
|
1653
|
|
|
|
|
|
|
:{ |
|
1654
|
|
|
|
|
|
|
predicate => $_, |
|
1655
|
|
|
|
|
|
|
direction => 'forward', |
|
1656
|
|
|
|
|
|
|
graph => $graph, |
|
1657
|
|
|
|
|
|
|
predicate_element => $current_element, |
|
1658
|
|
|
|
|
|
|
graph_element => $graph_elem, |
|
1659
|
|
|
|
|
|
|
} |
|
1660
|
|
|
|
|
|
|
} @REL; |
|
1661
|
|
|
|
|
|
|
|
|
1662
|
|
|
|
|
|
|
# * If present, @rev must contain one or more URIs, obtained |
|
1663
|
|
|
|
|
|
|
# according to the section on CURIE and URI Processing, each |
|
1664
|
|
|
|
|
|
|
# of which is added to the [local list of incomplete triples] |
|
1665
|
|
|
|
|
|
|
# as follows: |
|
1666
|
|
|
|
|
|
|
# |
|
1667
|
|
|
|
|
|
|
# predicate |
|
1668
|
|
|
|
|
|
|
# full URI |
|
1669
|
|
|
|
|
|
|
# direction |
|
1670
|
|
|
|
|
|
|
# reverse |
|
1671
|
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
push @$local_incomplete_triples, |
|
1673
|
|
|
|
|
|
|
map { |
|
1674
|
|
|
|
|
|
|
# $current_element->hasAttributeNsSafe($rdfans, 'inlist') |
|
1675
|
|
|
|
|
|
|
# ?{ |
|
1676
|
|
|
|
|
|
|
# list => do { $list_mappings->{'REV:'.$_} = [] unless defined $list_mappings->{'REV:'.$_}; $list_mappings->{'REV:'.$_}; }, |
|
1677
|
|
|
|
|
|
|
# direction => 'none', |
|
1678
|
|
|
|
|
|
|
# } |
|
1679
|
|
|
|
|
|
|
# :{ |
|
1680
|
|
|
|
|
|
|
+{ |
|
1681
|
|
|
|
|
|
|
predicate => $_, |
|
1682
|
|
|
|
|
|
|
direction => 'reverse', |
|
1683
|
|
|
|
|
|
|
graph => $graph, |
|
1684
|
|
|
|
|
|
|
predicate_element => $current_element, |
|
1685
|
|
|
|
|
|
|
graph_element => $graph_elem, |
|
1686
|
|
|
|
|
|
|
} |
|
1687
|
|
|
|
|
|
|
} @REV; |
|
1688
|
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
$current_object_resource = $self->bnode; |
|
1690
|
|
|
|
|
|
|
$current_object_resource_elem = $current_element; |
|
1691
|
|
|
|
|
|
|
} |
|
1692
|
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
# The next step of the iteration is to establish any [current |
|
1694
|
|
|
|
|
|
|
# property value] |
|
1695
|
|
|
|
|
|
|
my @current_property_value; |
|
1696
|
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
my @prop = $self->_split_tokens( $current_element->getAttributeNsSafe($rdfans, 'property') ); |
|
1698
|
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
my $has_datatype = 0; |
|
1700
|
|
|
|
|
|
|
my $datatype = undef; |
|
1701
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'datatype')) |
|
1702
|
|
|
|
|
|
|
{ |
|
1703
|
|
|
|
|
|
|
$has_datatype = 1; |
|
1704
|
|
|
|
|
|
|
$datatype = $self->_expand_curie( |
|
1705
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'datatype'), |
|
1706
|
|
|
|
|
|
|
element => $current_element, |
|
1707
|
|
|
|
|
|
|
attribute => 'datatype', |
|
1708
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
|
1709
|
|
|
|
|
|
|
terms => $local_term_mappings, |
|
1710
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
1711
|
|
|
|
|
|
|
); |
|
1712
|
|
|
|
|
|
|
} |
|
1713
|
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
if (@prop) |
|
1715
|
|
|
|
|
|
|
{ |
|
1716
|
|
|
|
|
|
|
# Predicates for the [current object literal] can be set by using |
|
1717
|
|
|
|
|
|
|
# @property. If present, one or more URIs are obtained according |
|
1718
|
|
|
|
|
|
|
# to the section on CURIE and URI Processing and then the actual |
|
1719
|
|
|
|
|
|
|
# literal value is obtained as follows: |
|
1720
|
|
|
|
|
|
|
|
|
1721
|
|
|
|
|
|
|
# HTML+RDFa |
|
1722
|
|
|
|
|
|
|
if ($self->{options}{datetime_attr} |
|
1723
|
|
|
|
|
|
|
and ( |
|
1724
|
|
|
|
|
|
|
$current_element->hasAttributeNsSafe($rdfans, 'datetime') |
|
1725
|
|
|
|
|
|
|
or $current_element->namespaceURI eq 'http://www.w3.org/1999/xhtml' |
|
1726
|
|
|
|
|
|
|
&& lc($current_element->tagName) eq 'time' |
|
1727
|
|
|
|
|
|
|
)) { |
|
1728
|
|
|
|
|
|
|
@current_property_value = ( |
|
1729
|
|
|
|
|
|
|
$current_element->hasAttributeNsSafe($rdfans, 'datetime') |
|
1730
|
|
|
|
|
|
|
? $current_element->getAttributeNsSafe($rdfans, 'datetime') |
|
1731
|
|
|
|
|
|
|
: $self->_element_to_string($current_element) |
|
1732
|
|
|
|
|
|
|
); |
|
1733
|
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
push @current_property_value, do |
|
1735
|
|
|
|
|
|
|
{ |
|
1736
|
|
|
|
|
|
|
local $_ = $current_property_value[0]; |
|
1737
|
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
if (!!$has_datatype == !!1) |
|
1739
|
|
|
|
|
|
|
{ $datatype } |
|
1740
|
|
|
|
|
|
|
elsif (/^(\-?\d{4,})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2})(?:\.\d+)?)?(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
|
1741
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#dateTime' } |
|
1742
|
|
|
|
|
|
|
elsif (/^(\d{2}):(\d{2})(:(\d{2})(?:\.\d+)?)?(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
|
1743
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#time' } |
|
1744
|
|
|
|
|
|
|
elsif (/^(\-?\d{4,})-(\d{2})-(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
|
1745
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#date' } |
|
1746
|
|
|
|
|
|
|
elsif (/^(\-?\d{4,})-(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
|
1747
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#gYearMonth' } # XXX: not in spec! |
|
1748
|
|
|
|
|
|
|
elsif (/^(\-?\d{4,})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
|
1749
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#gYear' } # XXX: not in spec! |
|
1750
|
|
|
|
|
|
|
elsif (/^--(\d{2})-(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
|
1751
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#gMonthDay' } # XXX: not in spec! |
|
1752
|
|
|
|
|
|
|
elsif (/^---(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
|
1753
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#gDay' } # XXX: not in spec! |
|
1754
|
|
|
|
|
|
|
elsif (/^--(\d{2})(Z|(?:[\+\-]\d{2}:?\d{2}))?$/i) |
|
1755
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#gMonth' } # XXX: not in spec! |
|
1756
|
|
|
|
|
|
|
elsif (/^P([\d\.]+Y)?([\d\.]+M)?([\d\.]+D)?(T([\d\.]+H)?([\d\.]+M)?([\d\.]+S)?)?$/i) |
|
1757
|
|
|
|
|
|
|
{ 'http://www.w3.org/2001/XMLSchema#duration' } |
|
1758
|
|
|
|
|
|
|
else |
|
1759
|
|
|
|
|
|
|
{ undef } |
|
1760
|
|
|
|
|
|
|
}, $current_language; |
|
1761
|
|
|
|
|
|
|
} |
|
1762
|
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
# HTML+RDFa |
|
1764
|
|
|
|
|
|
|
elsif ($self->{options}{value_attr} |
|
1765
|
|
|
|
|
|
|
and $current_element->hasAttributeNsSafe($rdfans, 'value')) |
|
1766
|
|
|
|
|
|
|
{ |
|
1767
|
|
|
|
|
|
|
@current_property_value = ( |
|
1768
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'value'), |
|
1769
|
|
|
|
|
|
|
($has_datatype ? $datatype : undef), |
|
1770
|
|
|
|
|
|
|
$current_language, |
|
1771
|
|
|
|
|
|
|
); |
|
1772
|
|
|
|
|
|
|
} |
|
1773
|
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
# as a [ plain literal ] if: |
|
1775
|
|
|
|
|
|
|
# |
|
1776
|
|
|
|
|
|
|
# @content is present; |
|
1777
|
|
|
|
|
|
|
elsif ($current_element->hasAttributeNsSafe($rdfans, 'content')) |
|
1778
|
|
|
|
|
|
|
{ |
|
1779
|
|
|
|
|
|
|
@current_property_value = ( |
|
1780
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'content'), |
|
1781
|
|
|
|
|
|
|
($has_datatype ? $datatype : undef), |
|
1782
|
|
|
|
|
|
|
$current_language, |
|
1783
|
|
|
|
|
|
|
); |
|
1784
|
|
|
|
|
|
|
} |
|
1785
|
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
# OpenDocument 1.2 extension |
|
1787
|
|
|
|
|
|
|
elsif (defined $self->{options}{bookmark_end} |
|
1788
|
|
|
|
|
|
|
and defined $self->{options}{bookmark_name} |
|
1789
|
|
|
|
|
|
|
and sprintf('{%s}%s', $current_element->namespaceURI, $current_element->localname) |
|
1790
|
|
|
|
|
|
|
~~ ['{}'.$self->{options}{bookmark_start}, $self->{options}{bookmark_start}] |
|
1791
|
|
|
|
|
|
|
) { |
|
1792
|
|
|
|
|
|
|
@current_property_value = ( |
|
1793
|
|
|
|
|
|
|
$self->_element_to_bookmarked_string($current_element), |
|
1794
|
|
|
|
|
|
|
($has_datatype ? $datatype: undef), |
|
1795
|
|
|
|
|
|
|
$current_language, |
|
1796
|
|
|
|
|
|
|
); |
|
1797
|
|
|
|
|
|
|
} |
|
1798
|
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
# Additionally, if there is a value for [current language] then |
|
1800
|
|
|
|
|
|
|
# the value of the [plain literal] should include this language |
|
1801
|
|
|
|
|
|
|
# information, as described in [RDF-CONCEPTS]. The actual literal |
|
1802
|
|
|
|
|
|
|
# is either the value of @content (if present) or a string created |
|
1803
|
|
|
|
|
|
|
# by concatenating the text content of each of the descendant |
|
1804
|
|
|
|
|
|
|
# elements of the [current element] in document order. |
|
1805
|
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
# or all children of the [current element] are text nodes; |
|
1807
|
|
|
|
|
|
|
# or there are no child nodes; |
|
1808
|
|
|
|
|
|
|
# or the body of the [ current element ] does have non-text |
|
1809
|
|
|
|
|
|
|
# child nodes but @datatype is present, with an empty value. |
|
1810
|
|
|
|
|
|
|
elsif ($has_datatype and $datatype eq '') |
|
1811
|
|
|
|
|
|
|
{ |
|
1812
|
|
|
|
|
|
|
@current_property_value = ( |
|
1813
|
|
|
|
|
|
|
$self->_element_to_string($current_element), |
|
1814
|
|
|
|
|
|
|
($has_datatype ? $datatype: undef), |
|
1815
|
|
|
|
|
|
|
$current_language, |
|
1816
|
|
|
|
|
|
|
); |
|
1817
|
|
|
|
|
|
|
} |
|
1818
|
|
|
|
|
|
|
|
|
1819
|
|
|
|
|
|
|
# as an [XML literal] if: explicitly rdf:XMLLiteral. |
|
1820
|
|
|
|
|
|
|
elsif ($datatype eq RDF_XMLLIT) |
|
1821
|
|
|
|
|
|
|
{ |
|
1822
|
|
|
|
|
|
|
@current_property_value = ( |
|
1823
|
|
|
|
|
|
|
$self->_element_to_xml($current_element, $current_language), |
|
1824
|
|
|
|
|
|
|
RDF_XMLLIT, |
|
1825
|
|
|
|
|
|
|
$current_language, |
|
1826
|
|
|
|
|
|
|
); |
|
1827
|
|
|
|
|
|
|
$recurse = $self->{options}{xmllit_recurse}; |
|
1828
|
|
|
|
|
|
|
} |
|
1829
|
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
# as a [typed literal] if: |
|
1831
|
|
|
|
|
|
|
# |
|
1832
|
|
|
|
|
|
|
# * @datatype is present, and does not have an empty value. |
|
1833
|
|
|
|
|
|
|
# |
|
1834
|
|
|
|
|
|
|
# The actual literal is either the value of @content (if present) |
|
1835
|
|
|
|
|
|
|
# or a string created by concatenating the value of all descendant |
|
1836
|
|
|
|
|
|
|
# text nodes, of the [current element] in turn. The final string |
|
1837
|
|
|
|
|
|
|
# includes the datatype URI, as described in [RDF-CONCEPTS], which |
|
1838
|
|
|
|
|
|
|
# will have been obtained according to the section on CURIE and URI |
|
1839
|
|
|
|
|
|
|
# Processing. |
|
1840
|
|
|
|
|
|
|
elsif ($has_datatype) |
|
1841
|
|
|
|
|
|
|
{ |
|
1842
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'content')) |
|
1843
|
|
|
|
|
|
|
{ |
|
1844
|
|
|
|
|
|
|
@current_property_value = ( |
|
1845
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, 'content'), |
|
1846
|
|
|
|
|
|
|
$datatype, |
|
1847
|
|
|
|
|
|
|
$current_language, |
|
1848
|
|
|
|
|
|
|
); |
|
1849
|
|
|
|
|
|
|
} |
|
1850
|
|
|
|
|
|
|
else |
|
1851
|
|
|
|
|
|
|
{ |
|
1852
|
|
|
|
|
|
|
@current_property_value = ( |
|
1853
|
|
|
|
|
|
|
$self->_element_to_string($current_element), |
|
1854
|
|
|
|
|
|
|
$datatype, |
|
1855
|
|
|
|
|
|
|
$current_language, |
|
1856
|
|
|
|
|
|
|
); |
|
1857
|
|
|
|
|
|
|
} |
|
1858
|
|
|
|
|
|
|
} |
|
1859
|
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
elsif ($self->{options}{property_resources} |
|
1861
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'datatype') |
|
1862
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'content') |
|
1863
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'rel') |
|
1864
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'rev') |
|
1865
|
|
|
|
|
|
|
and ( |
|
1866
|
|
|
|
|
|
|
$current_element->hasAttributeNsSafe($rdfans, 'resource') |
|
1867
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'href') |
|
1868
|
|
|
|
|
|
|
or $current_element->hasAttributeNsSafe($rdfans, 'src') |
|
1869
|
|
|
|
|
|
|
&& $self->{options}{src_sets_object} |
|
1870
|
|
|
|
|
|
|
)) |
|
1871
|
|
|
|
|
|
|
{ |
|
1872
|
|
|
|
|
|
|
my $resource; |
|
1873
|
|
|
|
|
|
|
foreach my $attr (qw(resource href src)) |
|
1874
|
|
|
|
|
|
|
{ |
|
1875
|
|
|
|
|
|
|
next unless $current_element->hasAttributeNsSafe($rdfans, $attr); |
|
1876
|
|
|
|
|
|
|
$resource = $self->_expand_curie( |
|
1877
|
|
|
|
|
|
|
$current_element->getAttributeNsSafe($rdfans, $attr), |
|
1878
|
|
|
|
|
|
|
element => $current_element, |
|
1879
|
|
|
|
|
|
|
attribute => $attr, |
|
1880
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
|
1881
|
|
|
|
|
|
|
terms => $local_term_mappings, |
|
1882
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
1883
|
|
|
|
|
|
|
); |
|
1884
|
|
|
|
|
|
|
last if defined $resource; |
|
1885
|
|
|
|
|
|
|
} |
|
1886
|
|
|
|
|
|
|
@current_property_value = ([ $resource ]) if defined $resource; |
|
1887
|
|
|
|
|
|
|
} |
|
1888
|
|
|
|
|
|
|
|
|
1889
|
|
|
|
|
|
|
elsif ($self->{options}{property_resources} |
|
1890
|
|
|
|
|
|
|
and defined $typed_resource |
|
1891
|
|
|
|
|
|
|
and $current_element->hasAttributeNsSafe($rdfans, 'typeof') |
|
1892
|
|
|
|
|
|
|
and !$current_element->hasAttributeNsSafe($rdfans, 'about')) |
|
1893
|
|
|
|
|
|
|
{ |
|
1894
|
|
|
|
|
|
|
@current_property_value = ([ $typed_resource ]); |
|
1895
|
|
|
|
|
|
|
} |
|
1896
|
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
# or all children of the [current element] are text nodes; |
|
1898
|
|
|
|
|
|
|
# or there are no child nodes; |
|
1899
|
|
|
|
|
|
|
# or the body of the [ current element ] does have non-text |
|
1900
|
|
|
|
|
|
|
# child nodes but @datatype is present, with an empty value. |
|
1901
|
|
|
|
|
|
|
elsif (not $current_element->getElementsByTagName('*')) |
|
1902
|
|
|
|
|
|
|
{ |
|
1903
|
|
|
|
|
|
|
@current_property_value = ( |
|
1904
|
|
|
|
|
|
|
$self->_element_to_string($current_element), |
|
1905
|
|
|
|
|
|
|
($has_datatype ? $datatype: undef), |
|
1906
|
|
|
|
|
|
|
$current_language, |
|
1907
|
|
|
|
|
|
|
); |
|
1908
|
|
|
|
|
|
|
} |
|
1909
|
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
# In RDFa 1.0 by default generate an XML Literal; |
|
1911
|
|
|
|
|
|
|
# in RDFa 1.1 by default generate a plain literal. |
|
1912
|
|
|
|
|
|
|
elsif (!$has_datatype and $current_element->getElementsByTagName('*')) |
|
1913
|
|
|
|
|
|
|
{ |
|
1914
|
|
|
|
|
|
|
if ($self->{options}{xmllit_default}) |
|
1915
|
|
|
|
|
|
|
{ |
|
1916
|
|
|
|
|
|
|
@current_property_value = ($self->_element_to_xml($current_element, $current_language), |
|
1917
|
|
|
|
|
|
|
RDF_XMLLIT, |
|
1918
|
|
|
|
|
|
|
$current_language); |
|
1919
|
|
|
|
|
|
|
$recurse = $self->{options}{xmllit_recurse}; |
|
1920
|
|
|
|
|
|
|
} |
|
1921
|
|
|
|
|
|
|
else |
|
1922
|
|
|
|
|
|
|
{ |
|
1923
|
|
|
|
|
|
|
@current_property_value = ($self->_element_to_string($current_element), |
|
1924
|
|
|
|
|
|
|
undef, |
|
1925
|
|
|
|
|
|
|
$current_language); |
|
1926
|
|
|
|
|
|
|
} |
|
1927
|
|
|
|
|
|
|
} |
|
1928
|
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
else |
|
1930
|
|
|
|
|
|
|
{ |
|
1931
|
|
|
|
|
|
|
die("How did we get here??\n"); |
|
1932
|
|
|
|
|
|
|
} |
|
1933
|
|
|
|
|
|
|
} |
|
1934
|
|
|
|
|
|
|
|
|
1935
|
|
|
|
|
|
|
my $E = { # provenance tracking |
|
1936
|
|
|
|
|
|
|
current => $current_element, |
|
1937
|
|
|
|
|
|
|
subject => $new_subject_elem, |
|
1938
|
|
|
|
|
|
|
predicate => $current_element, |
|
1939
|
|
|
|
|
|
|
object => $current_element, |
|
1940
|
|
|
|
|
|
|
graph => $graph_elem, |
|
1941
|
|
|
|
|
|
|
}; |
|
1942
|
|
|
|
|
|
|
foreach my $property (@prop) |
|
1943
|
|
|
|
|
|
|
{ |
|
1944
|
|
|
|
|
|
|
next unless defined $current_property_value[0]; |
|
1945
|
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
# The [current property value] is then used with each predicate to |
|
1947
|
|
|
|
|
|
|
# generate a triple as follows: |
|
1948
|
|
|
|
|
|
|
# |
|
1949
|
|
|
|
|
|
|
# subject |
|
1950
|
|
|
|
|
|
|
# [new subject] |
|
1951
|
|
|
|
|
|
|
# predicate |
|
1952
|
|
|
|
|
|
|
# full URI |
|
1953
|
|
|
|
|
|
|
# object |
|
1954
|
|
|
|
|
|
|
# [current object literal] |
|
1955
|
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
my $p = $self->_expand_curie( |
|
1957
|
|
|
|
|
|
|
$property, |
|
1958
|
|
|
|
|
|
|
element => $current_element, |
|
1959
|
|
|
|
|
|
|
attribute => 'property', |
|
1960
|
|
|
|
|
|
|
prefixes => $local_uri_mappings, |
|
1961
|
|
|
|
|
|
|
terms => $local_term_mappings, |
|
1962
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
1963
|
|
|
|
|
|
|
); |
|
1964
|
|
|
|
|
|
|
next unless defined $p; |
|
1965
|
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
if (ref $current_property_value[0] eq 'ARRAY') |
|
1967
|
|
|
|
|
|
|
{ |
|
1968
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'inlist')) |
|
1969
|
|
|
|
|
|
|
{ |
|
1970
|
|
|
|
|
|
|
$list_mappings->{$p} = [] unless defined $list_mappings->{$p}; |
|
1971
|
|
|
|
|
|
|
push @{ $list_mappings->{$p} }, [resource => $current_property_value[0][0]]; |
|
1972
|
|
|
|
|
|
|
} |
|
1973
|
|
|
|
|
|
|
else |
|
1974
|
|
|
|
|
|
|
{ |
|
1975
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $new_subject, $p, $current_property_value[0][0], $graph); |
|
1976
|
|
|
|
|
|
|
$activity++; |
|
1977
|
|
|
|
|
|
|
} |
|
1978
|
|
|
|
|
|
|
} |
|
1979
|
|
|
|
|
|
|
else |
|
1980
|
|
|
|
|
|
|
{ |
|
1981
|
|
|
|
|
|
|
if ($current_element->hasAttributeNsSafe($rdfans, 'inlist')) |
|
1982
|
|
|
|
|
|
|
{ |
|
1983
|
|
|
|
|
|
|
$list_mappings->{$p} = [] unless defined $list_mappings->{$p}; |
|
1984
|
|
|
|
|
|
|
push @{ $list_mappings->{$p} }, [literal => @current_property_value]; |
|
1985
|
|
|
|
|
|
|
} |
|
1986
|
|
|
|
|
|
|
else |
|
1987
|
|
|
|
|
|
|
{ |
|
1988
|
|
|
|
|
|
|
$self->_insert_triple_literal($E, $new_subject, $p, @current_property_value, $graph); |
|
1989
|
|
|
|
|
|
|
$activity++; |
|
1990
|
|
|
|
|
|
|
} |
|
1991
|
|
|
|
|
|
|
} |
|
1992
|
|
|
|
|
|
|
# Once the triple has been created, if the [datatype] of the |
|
1993
|
|
|
|
|
|
|
# [current object literal] is rdf:XMLLiteral, then the [recurse] |
|
1994
|
|
|
|
|
|
|
# flag is set to false. |
|
1995
|
|
|
|
|
|
|
# $recurse = 0 |
|
1996
|
|
|
|
|
|
|
# if $datatype eq RDF_XMLLIT; |
|
1997
|
|
|
|
|
|
|
} |
|
1998
|
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
# # If the [skip element] flag is 'false', and either: the previous step |
|
2000
|
|
|
|
|
|
|
# # resulted in a 'true' flag, or [new subject] was set to a non-null and |
|
2001
|
|
|
|
|
|
|
# # non-bnode value, then any [incomplete triple]s within the current context |
|
2002
|
|
|
|
|
|
|
# # should be completed: |
|
2003
|
|
|
|
|
|
|
# if (!$skip_element && ($flag || ((defined $new_subject) && ($new_subject !~ /^bnodeXXX:/)))) |
|
2004
|
|
|
|
|
|
|
# { |
|
2005
|
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
if (!$skip_element && defined $new_subject) |
|
2007
|
|
|
|
|
|
|
{ |
|
2008
|
|
|
|
|
|
|
# Loop through list of incomplete triples... |
|
2009
|
|
|
|
|
|
|
foreach my $it (@$incomplete_triples) |
|
2010
|
|
|
|
|
|
|
{ |
|
2011
|
|
|
|
|
|
|
my $direction = $it->{direction}; |
|
2012
|
|
|
|
|
|
|
my $predicate = $it->{predicate}; |
|
2013
|
|
|
|
|
|
|
my $parent_graph = $it->{graph}; |
|
2014
|
|
|
|
|
|
|
|
|
2015
|
|
|
|
|
|
|
if ($direction eq 'none' and defined $it->{list}) |
|
2016
|
|
|
|
|
|
|
{ |
|
2017
|
|
|
|
|
|
|
push @{$it->{list}}, [resource => $new_subject]; |
|
2018
|
|
|
|
|
|
|
} |
|
2019
|
|
|
|
|
|
|
elsif ($direction eq 'forward') |
|
2020
|
|
|
|
|
|
|
{ |
|
2021
|
|
|
|
|
|
|
my $E = { # provenance tracking |
|
2022
|
|
|
|
|
|
|
current => $current_element, |
|
2023
|
|
|
|
|
|
|
subject => $parent_subject_elem, |
|
2024
|
|
|
|
|
|
|
predicate => $it->{predicate_element}, |
|
2025
|
|
|
|
|
|
|
object => $new_subject_elem, |
|
2026
|
|
|
|
|
|
|
graph => $it->{graph_element}, |
|
2027
|
|
|
|
|
|
|
}; |
|
2028
|
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $parent_subject, $predicate, $new_subject, $parent_graph); |
|
2030
|
|
|
|
|
|
|
$activity++; |
|
2031
|
|
|
|
|
|
|
} |
|
2032
|
|
|
|
|
|
|
elsif ($direction eq 'reverse') |
|
2033
|
|
|
|
|
|
|
{ |
|
2034
|
|
|
|
|
|
|
my $E = { # provenance tracking |
|
2035
|
|
|
|
|
|
|
current => $current_element, |
|
2036
|
|
|
|
|
|
|
subject => $new_subject_elem, |
|
2037
|
|
|
|
|
|
|
predicate => $it->{predicate_element}, |
|
2038
|
|
|
|
|
|
|
object => $parent_subject_elem, |
|
2039
|
|
|
|
|
|
|
graph => $it->{graph_element}, |
|
2040
|
|
|
|
|
|
|
}; |
|
2041
|
|
|
|
|
|
|
|
|
2042
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $new_subject, $predicate, $parent_subject, $parent_graph); |
|
2043
|
|
|
|
|
|
|
$activity++; |
|
2044
|
|
|
|
|
|
|
} |
|
2045
|
|
|
|
|
|
|
else |
|
2046
|
|
|
|
|
|
|
{ |
|
2047
|
|
|
|
|
|
|
die "Direction is '$direction'??"; |
|
2048
|
|
|
|
|
|
|
} |
|
2049
|
|
|
|
|
|
|
} |
|
2050
|
|
|
|
|
|
|
} |
|
2051
|
|
|
|
|
|
|
|
|
2052
|
|
|
|
|
|
|
# If the [recurse] flag is 'true', all elements that are children of the |
|
2053
|
|
|
|
|
|
|
# [current element] are processed using the rules described here, using a |
|
2054
|
|
|
|
|
|
|
# new [evaluation context], initialized as follows |
|
2055
|
|
|
|
|
|
|
my $flag = 0; |
|
2056
|
|
|
|
|
|
|
if ($recurse) |
|
2057
|
|
|
|
|
|
|
{ |
|
2058
|
|
|
|
|
|
|
my $evaluation_context; |
|
2059
|
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
# If the [skip element] flag is 'true' then the new [evaluation context] |
|
2061
|
|
|
|
|
|
|
# is a copy of the current context that was passed in to this level of |
|
2062
|
|
|
|
|
|
|
# processing, with the [language] and [list of URI mappings] values |
|
2063
|
|
|
|
|
|
|
# replaced with the local values; |
|
2064
|
|
|
|
|
|
|
if ($skip_element) |
|
2065
|
|
|
|
|
|
|
{ |
|
2066
|
|
|
|
|
|
|
$evaluation_context = { |
|
2067
|
|
|
|
|
|
|
%$args, |
|
2068
|
|
|
|
|
|
|
base => $base, |
|
2069
|
|
|
|
|
|
|
language => $current_language, |
|
2070
|
|
|
|
|
|
|
uri_mappings => $uri_mappings, |
|
2071
|
|
|
|
|
|
|
term_mappings => $term_mappings, |
|
2072
|
|
|
|
|
|
|
list_mappings => $list_mappings, |
|
2073
|
|
|
|
|
|
|
# parent_subject => $parent_subject, |
|
2074
|
|
|
|
|
|
|
# parent_subject_elem => $parent_subject_elem, |
|
2075
|
|
|
|
|
|
|
# parent_object => $parent_object, |
|
2076
|
|
|
|
|
|
|
# parent_object_elem => $parent_object_elem, |
|
2077
|
|
|
|
|
|
|
# incomplete_triples => $incomplete_triples, |
|
2078
|
|
|
|
|
|
|
graph => $graph, |
|
2079
|
|
|
|
|
|
|
graph_elem => $graph_elem, |
|
2080
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
2081
|
|
|
|
|
|
|
parent => $args, |
|
2082
|
|
|
|
|
|
|
}; |
|
2083
|
|
|
|
|
|
|
} |
|
2084
|
|
|
|
|
|
|
|
|
2085
|
|
|
|
|
|
|
# Otherwise, the values are: |
|
2086
|
|
|
|
|
|
|
else |
|
2087
|
|
|
|
|
|
|
{ |
|
2088
|
|
|
|
|
|
|
$evaluation_context = { |
|
2089
|
|
|
|
|
|
|
base => $base, |
|
2090
|
|
|
|
|
|
|
parent_subject => $new_subject, |
|
2091
|
|
|
|
|
|
|
parent_subject_elem => $new_subject_elem, |
|
2092
|
|
|
|
|
|
|
parent_object => (defined $current_object_resource ? $current_object_resource : (defined $new_subject ? $new_subject : $parent_subject)), |
|
2093
|
|
|
|
|
|
|
parent_object_elem => (defined $current_object_resource_elem ? $current_object_resource_elem : (defined $new_subject_elem ? $new_subject_elem : $parent_subject_elem)), |
|
2094
|
|
|
|
|
|
|
uri_mappings => $local_uri_mappings, |
|
2095
|
|
|
|
|
|
|
term_mappings => $local_term_mappings, |
|
2096
|
|
|
|
|
|
|
incomplete_triples => $local_incomplete_triples, |
|
2097
|
|
|
|
|
|
|
list_mappings => $list_mappings, |
|
2098
|
|
|
|
|
|
|
language => $current_language, |
|
2099
|
|
|
|
|
|
|
graph => $graph, |
|
2100
|
|
|
|
|
|
|
graph_elem => $graph_elem, |
|
2101
|
|
|
|
|
|
|
xml_base => $xml_base, |
|
2102
|
|
|
|
|
|
|
parent => $args, |
|
2103
|
|
|
|
|
|
|
}; |
|
2104
|
|
|
|
|
|
|
} |
|
2105
|
|
|
|
|
|
|
|
|
2106
|
|
|
|
|
|
|
foreach my $kid ($current_element->getChildrenByTagName('*')) |
|
2107
|
|
|
|
|
|
|
{ |
|
2108
|
|
|
|
|
|
|
$flag = $self->_consume_element($kid, $evaluation_context) || $flag; |
|
2109
|
|
|
|
|
|
|
} |
|
2110
|
|
|
|
|
|
|
} |
|
2111
|
|
|
|
|
|
|
|
|
2112
|
|
|
|
|
|
|
# Once all the child elements have been traversed, list triples are |
|
2113
|
|
|
|
|
|
|
# generated, if necessary. |
|
2114
|
|
|
|
|
|
|
if ($list_mappings->{'::meta'}{owner} == $current_element) |
|
2115
|
|
|
|
|
|
|
{ |
|
2116
|
|
|
|
|
|
|
foreach my $iri (keys %$list_mappings) |
|
2117
|
|
|
|
|
|
|
{ |
|
2118
|
|
|
|
|
|
|
next if $iri eq '::meta'; |
|
2119
|
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
# For each IRI in the local list mapping, if the equivalent list does |
|
2121
|
|
|
|
|
|
|
# not exist in the evaluation context, indicating that the list was |
|
2122
|
|
|
|
|
|
|
# originally defined on the current element, use the list as follows: |
|
2123
|
|
|
|
|
|
|
if ($args->{list_mappings}{$iri} == $list_mappings->{$iri} |
|
2124
|
|
|
|
|
|
|
and ref $args->{list_mappings}{$iri} eq 'HASH' |
|
2125
|
|
|
|
|
|
|
and %{ $args->{list_mappings}{$iri} }) |
|
2126
|
|
|
|
|
|
|
{ |
|
2127
|
|
|
|
|
|
|
next; |
|
2128
|
|
|
|
|
|
|
} |
|
2129
|
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
# Create a new 'bnode' array containing newly created bnodes, one for |
|
2131
|
|
|
|
|
|
|
# each element in the list |
|
2132
|
|
|
|
|
|
|
my @bnode = map { $self->bnode; } @{ $list_mappings->{$iri} }; |
|
2133
|
|
|
|
|
|
|
my $first = @bnode ? $bnode[0] : undef; |
|
2134
|
|
|
|
|
|
|
|
|
2135
|
|
|
|
|
|
|
while (my $bnode = shift @bnode) |
|
2136
|
|
|
|
|
|
|
{ |
|
2137
|
|
|
|
|
|
|
my $value = shift @{ $list_mappings->{$iri} }; |
|
2138
|
|
|
|
|
|
|
my $type = shift @$value; |
|
2139
|
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
my $E = { # provenance tracking |
|
2141
|
|
|
|
|
|
|
current => $current_element, |
|
2142
|
|
|
|
|
|
|
graph => $graph_elem, |
|
2143
|
|
|
|
|
|
|
}; |
|
2144
|
|
|
|
|
|
|
if ($type eq 'literal') |
|
2145
|
|
|
|
|
|
|
{ |
|
2146
|
|
|
|
|
|
|
$self->_insert_triple_literal($E, $bnode, RDF_FIRST, @$value, $graph); |
|
2147
|
|
|
|
|
|
|
} |
|
2148
|
|
|
|
|
|
|
else |
|
2149
|
|
|
|
|
|
|
{ |
|
2150
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $bnode, RDF_FIRST, @$value, $graph); |
|
2151
|
|
|
|
|
|
|
} |
|
2152
|
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
if (exists $bnode[0]) |
|
2154
|
|
|
|
|
|
|
{ |
|
2155
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $bnode, RDF_REST, $bnode[0], $graph); |
|
2156
|
|
|
|
|
|
|
} |
|
2157
|
|
|
|
|
|
|
else |
|
2158
|
|
|
|
|
|
|
{ |
|
2159
|
|
|
|
|
|
|
$self->_insert_triple_resource($E, $bnode, RDF_REST, RDF_NIL, $graph); |
|
2160
|
|
|
|
|
|
|
} |
|
2161
|
|
|
|
|
|
|
} |
|
2162
|
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
my $E = { # provenance tracking |
|
2164
|
|
|
|
|
|
|
current => $current_element, |
|
2165
|
|
|
|
|
|
|
subject => $new_subject_elem, |
|
2166
|
|
|
|
|
|
|
predicate => $current_element, |
|
2167
|
|
|
|
|
|
|
graph => $graph_elem, |
|
2168
|
|
|
|
|
|
|
}; |
|
2169
|
|
|
|
|
|
|
|
|
2170
|
|
|
|
|
|
|
#my ($attr, $iri) = split /:/, $iri, 2; |
|
2171
|
|
|
|
|
|
|
my $attr = 'REL'; |
|
2172
|
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
if (defined $first) |
|
2174
|
|
|
|
|
|
|
{ |
|
2175
|
|
|
|
|
|
|
$attr eq 'REV' |
|
2176
|
|
|
|
|
|
|
? $self->_insert_triple_resource($E, $first, $iri, $new_subject, $graph) |
|
2177
|
|
|
|
|
|
|
: $self->_insert_triple_resource($E, $new_subject, $iri, $first, $graph); |
|
2178
|
|
|
|
|
|
|
} |
|
2179
|
|
|
|
|
|
|
else |
|
2180
|
|
|
|
|
|
|
{ |
|
2181
|
|
|
|
|
|
|
$attr eq 'REV' |
|
2182
|
|
|
|
|
|
|
? $self->_insert_triple_resource($E, RDF_NIL, $iri, $new_subject, $graph) |
|
2183
|
|
|
|
|
|
|
: $self->_insert_triple_resource($E, $new_subject, $iri, RDF_NIL, $graph); |
|
2184
|
|
|
|
|
|
|
} |
|
2185
|
|
|
|
|
|
|
|
|
2186
|
|
|
|
|
|
|
$activity++; |
|
2187
|
|
|
|
|
|
|
} |
|
2188
|
|
|
|
|
|
|
} |
|
2189
|
|
|
|
|
|
|
|
|
2190
|
|
|
|
|
|
|
return 1 if $activity || $new_subject || $flag; |
|
2191
|
|
|
|
|
|
|
return 0; |
|
2192
|
|
|
|
|
|
|
} |
|
2193
|
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
sub set_callbacks |
|
2195
|
|
|
|
|
|
|
# Set callback functions for handling RDF triples. |
|
2196
|
|
|
|
|
|
|
{ |
|
2197
|
|
|
|
|
|
|
my $self = shift; |
|
2198
|
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
if ('HASH' eq ref $_[0]) |
|
2200
|
|
|
|
|
|
|
{ |
|
2201
|
|
|
|
|
|
|
$self->{'sub'} = $_[0]; |
|
2202
|
|
|
|
|
|
|
$self->{'sub'}->{'pretriple_resource'} = \&_print0 |
|
2203
|
|
|
|
|
|
|
if lc ($self->{'sub'}->{'pretriple_resource'}||'') eq 'print'; |
|
2204
|
|
|
|
|
|
|
$self->{'sub'}->{'pretriple_literal'} = \&_print1 |
|
2205
|
|
|
|
|
|
|
if lc ($self->{'sub'}->{'pretriple_literal'}||'') eq 'print'; |
|
2206
|
|
|
|
|
|
|
} |
|
2207
|
|
|
|
|
|
|
else |
|
2208
|
|
|
|
|
|
|
{ |
|
2209
|
|
|
|
|
|
|
die "Unsupported set_callbacks call.\n"; |
|
2210
|
|
|
|
|
|
|
} |
|
2211
|
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
return $self; |
|
2213
|
|
|
|
|
|
|
} |
|
2214
|
|
|
|
|
|
|
|
|
2215
|
|
|
|
|
|
|
sub _print0 |
|
2216
|
|
|
|
|
|
|
# Prints a Turtle triple. |
|
2217
|
|
|
|
|
|
|
{ |
|
2218
|
|
|
|
|
|
|
my $self = shift; |
|
2219
|
|
|
|
|
|
|
my $element = shift; |
|
2220
|
|
|
|
|
|
|
my $subject = shift; |
|
2221
|
|
|
|
|
|
|
my $pred = shift; |
|
2222
|
|
|
|
|
|
|
my $object = shift; |
|
2223
|
|
|
|
|
|
|
my $graph = shift; |
|
2224
|
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
if ($graph) |
|
2226
|
|
|
|
|
|
|
{ |
|
2227
|
|
|
|
|
|
|
print "# GRAPH $graph\n"; |
|
2228
|
|
|
|
|
|
|
} |
|
2229
|
|
|
|
|
|
|
if ($element) |
|
2230
|
|
|
|
|
|
|
{ |
|
2231
|
|
|
|
|
|
|
printf("# Triple on element %s.\n", $element->nodePath); |
|
2232
|
|
|
|
|
|
|
} |
|
2233
|
|
|
|
|
|
|
else |
|
2234
|
|
|
|
|
|
|
{ |
|
2235
|
|
|
|
|
|
|
printf("# Triple.\n"); |
|
2236
|
|
|
|
|
|
|
} |
|
2237
|
|
|
|
|
|
|
|
|
2238
|
|
|
|
|
|
|
printf("%s %s %s .\n", |
|
2239
|
|
|
|
|
|
|
($subject =~ /^_:/ ? $subject : "<$subject>"), |
|
2240
|
|
|
|
|
|
|
"<$pred>", |
|
2241
|
|
|
|
|
|
|
($object =~ /^_:/ ? $object : "<$object>")); |
|
2242
|
|
|
|
|
|
|
|
|
2243
|
|
|
|
|
|
|
return; |
|
2244
|
|
|
|
|
|
|
} |
|
2245
|
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
sub _print1 |
|
2247
|
|
|
|
|
|
|
# Prints a Turtle triple. |
|
2248
|
|
|
|
|
|
|
{ |
|
2249
|
|
|
|
|
|
|
my $self = shift; |
|
2250
|
|
|
|
|
|
|
my $element = shift; |
|
2251
|
|
|
|
|
|
|
my $subject = shift; |
|
2252
|
|
|
|
|
|
|
my $pred = shift; |
|
2253
|
|
|
|
|
|
|
my $object = shift; |
|
2254
|
|
|
|
|
|
|
my $dt = shift; |
|
2255
|
|
|
|
|
|
|
my $lang = shift; |
|
2256
|
|
|
|
|
|
|
my $graph = shift; |
|
2257
|
|
|
|
|
|
|
|
|
2258
|
|
|
|
|
|
|
# Clumsy, but probably works. |
|
2259
|
|
|
|
|
|
|
$object =~ s/\\/\\\\/g; |
|
2260
|
|
|
|
|
|
|
$object =~ s/\n/\\n/g; |
|
2261
|
|
|
|
|
|
|
$object =~ s/\r/\\r/g; |
|
2262
|
|
|
|
|
|
|
$object =~ s/\t/\\t/g; |
|
2263
|
|
|
|
|
|
|
$object =~ s/\"/\\\"/g; |
|
2264
|
|
|
|
|
|
|
|
|
2265
|
|
|
|
|
|
|
if ($graph) |
|
2266
|
|
|
|
|
|
|
{ |
|
2267
|
|
|
|
|
|
|
print "# GRAPH $graph\n"; |
|
2268
|
|
|
|
|
|
|
} |
|
2269
|
|
|
|
|
|
|
if ($element) |
|
2270
|
|
|
|
|
|
|
{ |
|
2271
|
|
|
|
|
|
|
printf("# Triple on element %s.\n", $element->nodePath); |
|
2272
|
|
|
|
|
|
|
} |
|
2273
|
|
|
|
|
|
|
else |
|
2274
|
|
|
|
|
|
|
{ |
|
2275
|
|
|
|
|
|
|
printf("# Triple.\n"); |
|
2276
|
|
|
|
|
|
|
} |
|
2277
|
|
|
|
|
|
|
|
|
2278
|
|
|
|
|
|
|
printf("%s %s %s%s%s .\n", |
|
2279
|
|
|
|
|
|
|
($subject =~ /^_:/ ? $subject : "<$subject>"), |
|
2280
|
|
|
|
|
|
|
"<$pred>", |
|
2281
|
|
|
|
|
|
|
"\"$object\"", |
|
2282
|
|
|
|
|
|
|
(length $dt ? "^^<$dt>" : ''), |
|
2283
|
|
|
|
|
|
|
((length $lang && !length $dt) ? "\@$lang" : '') |
|
2284
|
|
|
|
|
|
|
); |
|
2285
|
|
|
|
|
|
|
|
|
2286
|
|
|
|
|
|
|
return; |
|
2287
|
|
|
|
|
|
|
} |
|
2288
|
|
|
|
|
|
|
|
|
2289
|
|
|
|
|
|
|
sub element_subjects |
|
2290
|
|
|
|
|
|
|
{ |
|
2291
|
|
|
|
|
|
|
my ($self) = shift; |
|
2292
|
|
|
|
|
|
|
$self->consume; |
|
2293
|
|
|
|
|
|
|
$self->{element_subjects} = shift if @_; |
|
2294
|
|
|
|
|
|
|
return $self->{element_subjects}; |
|
2295
|
|
|
|
|
|
|
} |
|
2296
|
|
|
|
|
|
|
|
|
2297
|
|
|
|
|
|
|
sub _insert_triple_resource |
|
2298
|
|
|
|
|
|
|
{ |
|
2299
|
|
|
|
|
|
|
my $self = shift; |
|
2300
|
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
my $element = shift; # A reference to the XML::LibXML element being parsed |
|
2302
|
|
|
|
|
|
|
my $subject = shift; # Subject URI or bnode |
|
2303
|
|
|
|
|
|
|
my $predicate = shift; # Predicate URI |
|
2304
|
|
|
|
|
|
|
my $object = shift; # Resource URI or bnode |
|
2305
|
|
|
|
|
|
|
my $graph = shift; # Graph URI or bnode (if named graphs feature is enabled) |
|
2306
|
|
|
|
|
|
|
|
|
2307
|
|
|
|
|
|
|
my $suppress_triple = 0; |
|
2308
|
|
|
|
|
|
|
$suppress_triple = $self->{'sub'}->{'pretriple_resource'}( |
|
2309
|
|
|
|
|
|
|
$self, |
|
2310
|
|
|
|
|
|
|
ref $element ? $element->{current} : undef, |
|
2311
|
|
|
|
|
|
|
$subject, |
|
2312
|
|
|
|
|
|
|
$predicate, |
|
2313
|
|
|
|
|
|
|
$object, |
|
2314
|
|
|
|
|
|
|
$graph, |
|
2315
|
|
|
|
|
|
|
) |
|
2316
|
|
|
|
|
|
|
if defined $self->{'sub'}->{'pretriple_resource'}; |
|
2317
|
|
|
|
|
|
|
return if $suppress_triple; |
|
2318
|
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
# First make sure the object node type is ok. |
|
2320
|
|
|
|
|
|
|
my $to; |
|
2321
|
|
|
|
|
|
|
if ($object =~ m/^_:(.*)/) |
|
2322
|
|
|
|
|
|
|
{ |
|
2323
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Blank->new($1); |
|
2324
|
|
|
|
|
|
|
} |
|
2325
|
|
|
|
|
|
|
else |
|
2326
|
|
|
|
|
|
|
{ |
|
2327
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Resource->new($object); |
|
2328
|
|
|
|
|
|
|
} |
|
2329
|
|
|
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
# Run the common function |
|
2331
|
|
|
|
|
|
|
return $self->_insert_triple_common($element, $subject, $predicate, $to, $graph); |
|
2332
|
|
|
|
|
|
|
} |
|
2333
|
|
|
|
|
|
|
|
|
2334
|
|
|
|
|
|
|
sub _insert_triple_literal |
|
2335
|
|
|
|
|
|
|
{ |
|
2336
|
|
|
|
|
|
|
my $self = shift; |
|
2337
|
|
|
|
|
|
|
|
|
2338
|
|
|
|
|
|
|
my $element = shift; # A reference to the XML::LibXML element being parsed |
|
2339
|
|
|
|
|
|
|
my $subject = shift; # Subject URI or bnode |
|
2340
|
|
|
|
|
|
|
my $predicate = shift; # Predicate URI |
|
2341
|
|
|
|
|
|
|
my $object = shift; # Resource Literal |
|
2342
|
|
|
|
|
|
|
my $datatype = shift; # Datatype URI (possibly undef or '') |
|
2343
|
|
|
|
|
|
|
my $language = shift; # Language (possibly undef or '') |
|
2344
|
|
|
|
|
|
|
my $graph = shift; # Graph URI or bnode (if named graphs feature is enabled) |
|
2345
|
|
|
|
|
|
|
|
|
2346
|
|
|
|
|
|
|
my $suppress_triple = 0; |
|
2347
|
|
|
|
|
|
|
$suppress_triple = $self->{'sub'}->{'pretriple_literal'}( |
|
2348
|
|
|
|
|
|
|
$self, |
|
2349
|
|
|
|
|
|
|
ref $element ? $element->{current} : undef, |
|
2350
|
|
|
|
|
|
|
$subject, |
|
2351
|
|
|
|
|
|
|
$predicate, |
|
2352
|
|
|
|
|
|
|
$object, |
|
2353
|
|
|
|
|
|
|
$datatype, |
|
2354
|
|
|
|
|
|
|
$language, |
|
2355
|
|
|
|
|
|
|
$graph, |
|
2356
|
|
|
|
|
|
|
) |
|
2357
|
|
|
|
|
|
|
if defined $self->{'sub'}->{'pretriple_literal'}; |
|
2358
|
|
|
|
|
|
|
return if $suppress_triple; |
|
2359
|
|
|
|
|
|
|
|
|
2360
|
|
|
|
|
|
|
# Now we know there's a literal |
|
2361
|
|
|
|
|
|
|
my $to; |
|
2362
|
|
|
|
|
|
|
|
|
2363
|
|
|
|
|
|
|
# Work around bad Unicode handling in RDF::Trine. |
|
2364
|
|
|
|
|
|
|
# $object = encode_utf8($object); |
|
2365
|
|
|
|
|
|
|
|
|
2366
|
|
|
|
|
|
|
if (defined $datatype) |
|
2367
|
|
|
|
|
|
|
{ |
|
2368
|
|
|
|
|
|
|
if ($datatype eq RDF_XMLLIT) |
|
2369
|
|
|
|
|
|
|
{ |
|
2370
|
|
|
|
|
|
|
if ($self->{options}{use_rtnlx}) |
|
2371
|
|
|
|
|
|
|
{ |
|
2372
|
|
|
|
|
|
|
eval |
|
2373
|
|
|
|
|
|
|
{ |
|
2374
|
|
|
|
|
|
|
require RDF::Trine::Node::Literal::XML; |
|
2375
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Literal::XML->new($element->childNodes); |
|
2376
|
|
|
|
|
|
|
}; |
|
2377
|
|
|
|
|
|
|
} |
|
2378
|
|
|
|
|
|
|
|
|
2379
|
|
|
|
|
|
|
if ( $@ || !defined $to) |
|
2380
|
|
|
|
|
|
|
{ |
|
2381
|
|
|
|
|
|
|
my $orig = $RDF::Trine::Node::Literal::USE_XMLLITERALS; |
|
2382
|
|
|
|
|
|
|
$RDF::Trine::Node::Literal::USE_XMLLITERALS = 0; |
|
2383
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Literal->new($object, undef, $datatype); |
|
2384
|
|
|
|
|
|
|
$RDF::Trine::Node::Literal::USE_XMLLITERALS = $orig; |
|
2385
|
|
|
|
|
|
|
} |
|
2386
|
|
|
|
|
|
|
} |
|
2387
|
|
|
|
|
|
|
else |
|
2388
|
|
|
|
|
|
|
{ |
|
2389
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Literal->new($object, undef, $datatype); |
|
2390
|
|
|
|
|
|
|
} |
|
2391
|
|
|
|
|
|
|
} |
|
2392
|
|
|
|
|
|
|
else |
|
2393
|
|
|
|
|
|
|
{ |
|
2394
|
|
|
|
|
|
|
$to = RDF::Trine::Node::Literal->new($object, $language, undef); |
|
2395
|
|
|
|
|
|
|
} |
|
2396
|
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
# Run the common function |
|
2398
|
|
|
|
|
|
|
$self->_insert_triple_common($element, $subject, $predicate, $to, $graph); |
|
2399
|
|
|
|
|
|
|
} |
|
2400
|
|
|
|
|
|
|
|
|
2401
|
|
|
|
|
|
|
sub _insert_triple_common |
|
2402
|
|
|
|
|
|
|
{ |
|
2403
|
|
|
|
|
|
|
my $self = shift; # A reference to the RDF::RDFa::Parser object |
|
2404
|
|
|
|
|
|
|
my $element = shift; # A reference to the XML::LibXML element being parsed |
|
2405
|
|
|
|
|
|
|
my $subject = shift; # Subject URI or bnode |
|
2406
|
|
|
|
|
|
|
my $predicate = shift; # Predicate URI |
|
2407
|
|
|
|
|
|
|
my $to = shift; # RDF::Trine::Node Resource URI or bnode |
|
2408
|
|
|
|
|
|
|
my $graph = shift; # Graph URI or bnode (if named graphs feature is enabled) |
|
2409
|
|
|
|
|
|
|
|
|
2410
|
|
|
|
|
|
|
# First, make sure subject and predicates are the right kind of nodes |
|
2411
|
|
|
|
|
|
|
my $tp = RDF::Trine::Node::Resource->new($predicate); |
|
2412
|
|
|
|
|
|
|
my $ts; |
|
2413
|
|
|
|
|
|
|
if ($subject =~ m/^_:(.*)/) |
|
2414
|
|
|
|
|
|
|
{ |
|
2415
|
|
|
|
|
|
|
$ts = RDF::Trine::Node::Blank->new($1); |
|
2416
|
|
|
|
|
|
|
} |
|
2417
|
|
|
|
|
|
|
else |
|
2418
|
|
|
|
|
|
|
{ |
|
2419
|
|
|
|
|
|
|
$ts = RDF::Trine::Node::Resource->new($subject); |
|
2420
|
|
|
|
|
|
|
} |
|
2421
|
|
|
|
|
|
|
|
|
2422
|
|
|
|
|
|
|
my $statement; |
|
2423
|
|
|
|
|
|
|
|
|
2424
|
|
|
|
|
|
|
# If we are configured for it, and graph name can be found, add it. |
|
2425
|
|
|
|
|
|
|
if ($self->{'options'}->{'graph'} && $graph) |
|
2426
|
|
|
|
|
|
|
{ |
|
2427
|
|
|
|
|
|
|
$self->{Graphs}->{$graph}++; |
|
2428
|
|
|
|
|
|
|
|
|
2429
|
|
|
|
|
|
|
my $tg; |
|
2430
|
|
|
|
|
|
|
if ($graph =~ m/^_:(.*)/) |
|
2431
|
|
|
|
|
|
|
{ |
|
2432
|
|
|
|
|
|
|
$tg = RDF::Trine::Node::Blank->new($1); |
|
2433
|
|
|
|
|
|
|
} |
|
2434
|
|
|
|
|
|
|
else |
|
2435
|
|
|
|
|
|
|
{ |
|
2436
|
|
|
|
|
|
|
$tg = RDF::Trine::Node::Resource->new($graph); |
|
2437
|
|
|
|
|
|
|
} |
|
2438
|
|
|
|
|
|
|
|
|
2439
|
|
|
|
|
|
|
$statement = RDF::Trine::Statement::Quad->new($ts, $tp, $to, $tg); |
|
2440
|
|
|
|
|
|
|
} |
|
2441
|
|
|
|
|
|
|
# If no graph name, just add triples |
|
2442
|
|
|
|
|
|
|
else |
|
2443
|
|
|
|
|
|
|
{ |
|
2444
|
|
|
|
|
|
|
$statement = RDF::Trine::Statement->new($ts, $tp, $to); |
|
2445
|
|
|
|
|
|
|
} |
|
2446
|
|
|
|
|
|
|
|
|
2447
|
|
|
|
|
|
|
my $suppress_triple = 0; |
|
2448
|
|
|
|
|
|
|
$suppress_triple = $self->{'sub'}->{'ontriple'}($self, $element, $statement) |
|
2449
|
|
|
|
|
|
|
if ($self->{'sub'}->{'ontriple'}); |
|
2450
|
|
|
|
|
|
|
return if $suppress_triple; |
|
2451
|
|
|
|
|
|
|
|
|
2452
|
|
|
|
|
|
|
$self->{model}->add_statement($statement); |
|
2453
|
|
|
|
|
|
|
} |
|
2454
|
|
|
|
|
|
|
|
|
2455
|
|
|
|
|
|
|
sub _atom_magic |
|
2456
|
|
|
|
|
|
|
{ |
|
2457
|
|
|
|
|
|
|
my $self = shift; |
|
2458
|
|
|
|
|
|
|
my $element = shift; |
|
2459
|
|
|
|
|
|
|
|
|
2460
|
|
|
|
|
|
|
return $self->bnode($element, 1); |
|
2461
|
|
|
|
|
|
|
} |
|
2462
|
|
|
|
|
|
|
|
|
2463
|
|
|
|
|
|
|
# Splits things like property="foaf:name rdfs:label" |
|
2464
|
|
|
|
|
|
|
sub _split_tokens |
|
2465
|
|
|
|
|
|
|
{ |
|
2466
|
|
|
|
|
|
|
my ($self, $string) = @_; |
|
2467
|
|
|
|
|
|
|
$string ||= ''; |
|
2468
|
|
|
|
|
|
|
$string =~ s/(^\s+|\s+$)//g; |
|
2469
|
|
|
|
|
|
|
my @return = split /\s+/, $string; |
|
2470
|
|
|
|
|
|
|
return @return; |
|
2471
|
|
|
|
|
|
|
} |
|
2472
|
|
|
|
|
|
|
|
|
2473
|
|
|
|
|
|
|
sub _element_to_bookmarked_string |
|
2474
|
|
|
|
|
|
|
{ |
|
2475
|
|
|
|
|
|
|
my ($self, $bookmark) = @_; |
|
2476
|
|
|
|
|
|
|
|
|
2477
|
|
|
|
|
|
|
my @name_attribute; |
|
2478
|
|
|
|
|
|
|
if ($self->{'options'}->{'bookmark_name'} =~ /^\{(.*)\}(.+)$/) |
|
2479
|
|
|
|
|
|
|
{ |
|
2480
|
|
|
|
|
|
|
@name_attribute = $1 ? ($1, $2) : (undef, $2); |
|
2481
|
|
|
|
|
|
|
} |
|
2482
|
|
|
|
|
|
|
else |
|
2483
|
|
|
|
|
|
|
{ |
|
2484
|
|
|
|
|
|
|
@name_attribute = (undef, $self->{'options'}->{'bookmark_name'}); |
|
2485
|
|
|
|
|
|
|
} |
|
2486
|
|
|
|
|
|
|
|
|
2487
|
|
|
|
|
|
|
my ($endtag_namespace, $endtag_localname); |
|
2488
|
|
|
|
|
|
|
if ($self->{'options'}->{'bookmark_end'} =~ /^\{(.*)\}(.+)$/) |
|
2489
|
|
|
|
|
|
|
{ |
|
2490
|
|
|
|
|
|
|
($endtag_namespace, $endtag_localname) = $1 ? ($1, $2) : (undef, $2); |
|
2491
|
|
|
|
|
|
|
} |
|
2492
|
|
|
|
|
|
|
else |
|
2493
|
|
|
|
|
|
|
{ |
|
2494
|
|
|
|
|
|
|
($endtag_namespace, $endtag_localname) = (undef, $self->{'options'}->{'bookmark_end'}); |
|
2495
|
|
|
|
|
|
|
} |
|
2496
|
|
|
|
|
|
|
|
|
2497
|
|
|
|
|
|
|
my $string = ''; |
|
2498
|
|
|
|
|
|
|
my $current = $bookmark; |
|
2499
|
|
|
|
|
|
|
while ($current) |
|
2500
|
|
|
|
|
|
|
{ |
|
2501
|
|
|
|
|
|
|
$current = $self->_find_next_node($current); |
|
2502
|
|
|
|
|
|
|
|
|
2503
|
|
|
|
|
|
|
if (defined $current |
|
2504
|
|
|
|
|
|
|
&& $current->nodeType == XML_TEXT_NODE) |
|
2505
|
|
|
|
|
|
|
{ |
|
2506
|
|
|
|
|
|
|
$string .= $current->getData; |
|
2507
|
|
|
|
|
|
|
} |
|
2508
|
|
|
|
|
|
|
if (defined $current |
|
2509
|
|
|
|
|
|
|
&& $current->nodeType == XML_ELEMENT_NODE |
|
2510
|
|
|
|
|
|
|
&& $current->localname eq $endtag_localname |
|
2511
|
|
|
|
|
|
|
&& $current->namespaceURI eq $endtag_namespace |
|
2512
|
|
|
|
|
|
|
&& $current->getAttributeNsSafe(@name_attribute) eq $bookmark->getAttributeNsSafe(@name_attribute)) |
|
2513
|
|
|
|
|
|
|
{ |
|
2514
|
|
|
|
|
|
|
$current = undef; |
|
2515
|
|
|
|
|
|
|
} |
|
2516
|
|
|
|
|
|
|
} |
|
2517
|
|
|
|
|
|
|
|
|
2518
|
|
|
|
|
|
|
return $string; |
|
2519
|
|
|
|
|
|
|
} |
|
2520
|
|
|
|
|
|
|
|
|
2521
|
|
|
|
|
|
|
sub _find_next_node |
|
2522
|
|
|
|
|
|
|
{ |
|
2523
|
|
|
|
|
|
|
my ($self, $node) = @_; |
|
2524
|
|
|
|
|
|
|
|
|
2525
|
|
|
|
|
|
|
if ($node->nodeType == XML_ELEMENT_NODE) |
|
2526
|
|
|
|
|
|
|
{ |
|
2527
|
|
|
|
|
|
|
my @kids = $node->childNodes; |
|
2528
|
|
|
|
|
|
|
return $kids[0] if @kids; |
|
2529
|
|
|
|
|
|
|
} |
|
2530
|
|
|
|
|
|
|
|
|
2531
|
|
|
|
|
|
|
my $ancestor = $node; |
|
2532
|
|
|
|
|
|
|
while ($ancestor) |
|
2533
|
|
|
|
|
|
|
{ |
|
2534
|
|
|
|
|
|
|
return $ancestor->nextSibling if $ancestor->nextSibling; |
|
2535
|
|
|
|
|
|
|
$ancestor = $ancestor->parentNode; |
|
2536
|
|
|
|
|
|
|
} |
|
2537
|
|
|
|
|
|
|
|
|
2538
|
|
|
|
|
|
|
return undef; |
|
2539
|
|
|
|
|
|
|
} |
|
2540
|
|
|
|
|
|
|
|
|
2541
|
|
|
|
|
|
|
sub _element_to_string |
|
2542
|
|
|
|
|
|
|
{ |
|
2543
|
|
|
|
|
|
|
my $self = shift; |
|
2544
|
|
|
|
|
|
|
my $dom = shift; |
|
2545
|
|
|
|
|
|
|
|
|
2546
|
|
|
|
|
|
|
if ($dom->nodeType == XML_TEXT_NODE) |
|
2547
|
|
|
|
|
|
|
{ |
|
2548
|
|
|
|
|
|
|
return $dom->getData; |
|
2549
|
|
|
|
|
|
|
} |
|
2550
|
|
|
|
|
|
|
elsif ($dom->nodeType == XML_ELEMENT_NODE) |
|
2551
|
|
|
|
|
|
|
{ |
|
2552
|
|
|
|
|
|
|
my $rv = ''; |
|
2553
|
|
|
|
|
|
|
foreach my $kid ($dom->childNodes) |
|
2554
|
|
|
|
|
|
|
{ $rv .= $self->_element_to_string($kid); } |
|
2555
|
|
|
|
|
|
|
return $rv; |
|
2556
|
|
|
|
|
|
|
} |
|
2557
|
|
|
|
|
|
|
|
|
2558
|
|
|
|
|
|
|
return ''; |
|
2559
|
|
|
|
|
|
|
} |
|
2560
|
|
|
|
|
|
|
|
|
2561
|
|
|
|
|
|
|
sub _element_to_xml |
|
2562
|
|
|
|
|
|
|
{ |
|
2563
|
|
|
|
|
|
|
my $self = shift; |
|
2564
|
|
|
|
|
|
|
my $dom = shift; |
|
2565
|
|
|
|
|
|
|
my $lang = shift; |
|
2566
|
|
|
|
|
|
|
my $rv; |
|
2567
|
|
|
|
|
|
|
|
|
2568
|
|
|
|
|
|
|
foreach my $kid ($dom->childNodes) |
|
2569
|
|
|
|
|
|
|
{ |
|
2570
|
|
|
|
|
|
|
my $fakelang = 0; |
|
2571
|
|
|
|
|
|
|
if (($kid->nodeType == XML_ELEMENT_NODE) && defined $lang) |
|
2572
|
|
|
|
|
|
|
{ |
|
2573
|
|
|
|
|
|
|
unless ($kid->hasAttributeNS(XML_XML_NS, 'lang')) |
|
2574
|
|
|
|
|
|
|
{ |
|
2575
|
|
|
|
|
|
|
$kid->setAttributeNS(XML_XML_NS, 'lang', $lang); |
|
2576
|
|
|
|
|
|
|
$fakelang++; |
|
2577
|
|
|
|
|
|
|
} |
|
2578
|
|
|
|
|
|
|
} |
|
2579
|
|
|
|
|
|
|
|
|
2580
|
|
|
|
|
|
|
$rv .= $kid->toStringEC14N(1); |
|
2581
|
|
|
|
|
|
|
|
|
2582
|
|
|
|
|
|
|
if ($fakelang) |
|
2583
|
|
|
|
|
|
|
{ |
|
2584
|
|
|
|
|
|
|
$kid->removeAttributeNS(XML_XML_NS, 'lang'); |
|
2585
|
|
|
|
|
|
|
} |
|
2586
|
|
|
|
|
|
|
} |
|
2587
|
|
|
|
|
|
|
|
|
2588
|
|
|
|
|
|
|
return $rv; |
|
2589
|
|
|
|
|
|
|
} |
|
2590
|
|
|
|
|
|
|
|
|
2591
|
|
|
|
|
|
|
sub bnode |
|
2592
|
|
|
|
|
|
|
{ |
|
2593
|
|
|
|
|
|
|
my $self = shift; |
|
2594
|
|
|
|
|
|
|
my $element = shift; |
|
2595
|
|
|
|
|
|
|
my $save_me = shift || 0; |
|
2596
|
|
|
|
|
|
|
my $ident = shift || undef; |
|
2597
|
|
|
|
|
|
|
|
|
2598
|
|
|
|
|
|
|
if (defined $element |
|
2599
|
|
|
|
|
|
|
and $self->{'saved_bnodes'}->{ $element->nodePath }) |
|
2600
|
|
|
|
|
|
|
{ |
|
2601
|
|
|
|
|
|
|
return $self->{'saved_bnodes'}->{ $element->nodePath }; |
|
2602
|
|
|
|
|
|
|
} |
|
2603
|
|
|
|
|
|
|
|
|
2604
|
|
|
|
|
|
|
elsif (defined $ident |
|
2605
|
|
|
|
|
|
|
and $self->{'saved_bnodes'}->{ $ident }) |
|
2606
|
|
|
|
|
|
|
{ |
|
2607
|
|
|
|
|
|
|
return $self->{'saved_bnodes'}->{ $ident }; |
|
2608
|
|
|
|
|
|
|
} |
|
2609
|
|
|
|
|
|
|
|
|
2610
|
|
|
|
|
|
|
return sprintf('http://thing-described-by.org/?%s#%s', |
|
2611
|
|
|
|
|
|
|
$self->uri, |
|
2612
|
|
|
|
|
|
|
$self->{element}->getAttribute('id')) |
|
2613
|
|
|
|
|
|
|
if ($self->{options}->{tdb_service} && $element && length $element->getAttribute('id')); |
|
2614
|
|
|
|
|
|
|
|
|
2615
|
|
|
|
|
|
|
unless (defined $self->{bnode_prefix}) |
|
2616
|
|
|
|
|
|
|
{ |
|
2617
|
|
|
|
|
|
|
$self->{bnode_prefix} = Data::UUID->new->create_str; |
|
2618
|
|
|
|
|
|
|
$self->{bnode_prefix} =~ s/-//g; |
|
2619
|
|
|
|
|
|
|
} |
|
2620
|
|
|
|
|
|
|
|
|
2621
|
|
|
|
|
|
|
my $rv; |
|
2622
|
|
|
|
|
|
|
if ($self->{options}->{skolemize}) |
|
2623
|
|
|
|
|
|
|
{ |
|
2624
|
|
|
|
|
|
|
$rv = sprintf('tag:buzzword.org.uk,2010:RDF-RDFa-Parser:skolem:%s:%04d', $self->{bnode_prefix}, $self->{bnodes}++); |
|
2625
|
|
|
|
|
|
|
} |
|
2626
|
|
|
|
|
|
|
else |
|
2627
|
|
|
|
|
|
|
{ |
|
2628
|
|
|
|
|
|
|
$rv = sprintf('_:rdfa%snode%04d', $self->{bnode_prefix}, $self->{bnodes}++); |
|
2629
|
|
|
|
|
|
|
} |
|
2630
|
|
|
|
|
|
|
|
|
2631
|
|
|
|
|
|
|
if ($save_me and defined $element) |
|
2632
|
|
|
|
|
|
|
{ |
|
2633
|
|
|
|
|
|
|
$self->{'saved_bnodes'}->{ $element->nodePath } = $rv; |
|
2634
|
|
|
|
|
|
|
} |
|
2635
|
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
if (defined $ident) |
|
2637
|
|
|
|
|
|
|
{ |
|
2638
|
|
|
|
|
|
|
$self->{'saved_bnodes'}->{ $ident } = $rv; |
|
2639
|
|
|
|
|
|
|
} |
|
2640
|
|
|
|
|
|
|
|
|
2641
|
|
|
|
|
|
|
return $rv; |
|
2642
|
|
|
|
|
|
|
} |
|
2643
|
|
|
|
|
|
|
|
|
2644
|
|
|
|
|
|
|
sub _valid_lang |
|
2645
|
|
|
|
|
|
|
{ |
|
2646
|
|
|
|
|
|
|
my ($self, $value_to_test) = @_; |
|
2647
|
|
|
|
|
|
|
|
|
2648
|
|
|
|
|
|
|
return 1 if (defined $value_to_test) && ($value_to_test eq ''); |
|
2649
|
|
|
|
|
|
|
return 0 unless defined $value_to_test; |
|
2650
|
|
|
|
|
|
|
|
|
2651
|
|
|
|
|
|
|
# Regex for recognizing RFC 4646 well-formed tags |
|
2652
|
|
|
|
|
|
|
# http://www.rfc-editor.org/rfc/rfc4646.txt |
|
2653
|
|
|
|
|
|
|
# http://tools.ietf.org/html/draft-ietf-ltru-4646bis-21 |
|
2654
|
|
|
|
|
|
|
|
|
2655
|
|
|
|
|
|
|
# The structure requires no forward references, so it reverses the order. |
|
2656
|
|
|
|
|
|
|
# It uses Java/Perl syntax instead of the old ABNF |
|
2657
|
|
|
|
|
|
|
# The uppercase comments are fragments copied from RFC 4646 |
|
2658
|
|
|
|
|
|
|
|
|
2659
|
|
|
|
|
|
|
# Note: the tool requires that any real "=" or "#" or ";" in the regex be escaped. |
|
2660
|
|
|
|
|
|
|
|
|
2661
|
|
|
|
|
|
|
my $alpha = '[a-z]'; # ALPHA |
|
2662
|
|
|
|
|
|
|
my $digit = '[0-9]'; # DIGIT |
|
2663
|
|
|
|
|
|
|
my $alphanum = '[a-z0-9]'; # ALPHA / DIGIT |
|
2664
|
|
|
|
|
|
|
my $x = 'x'; # private use singleton |
|
2665
|
|
|
|
|
|
|
my $singleton = '[a-wyz]'; # other singleton |
|
2666
|
|
|
|
|
|
|
my $s = '[_-]'; # separator -- lenient parsers will use [_-] -- strict will use [-] |
|
2667
|
|
|
|
|
|
|
|
|
2668
|
|
|
|
|
|
|
# Now do the components. The structure is slightly different to allow for capturing the right components. |
|
2669
|
|
|
|
|
|
|
# The notation (?:....) is a non-capturing version of (...): so the "?:" can be deleted if someone doesn't care about capturing. |
|
2670
|
|
|
|
|
|
|
|
|
2671
|
|
|
|
|
|
|
my $language = '([a-z]{2,8}) | ([a-z]{2,3} $s [a-z]{3})'; |
|
2672
|
|
|
|
|
|
|
|
|
2673
|
|
|
|
|
|
|
# ABNF (2*3ALPHA) / 4ALPHA / 5*8ALPHA --- note: because of how | works in regex, don't use $alpha{2,3} | $alpha{4,8} |
|
2674
|
|
|
|
|
|
|
# We don't have to have the general case of extlang, because there can be only one extlang (except for zh-min-nan). |
|
2675
|
|
|
|
|
|
|
|
|
2676
|
|
|
|
|
|
|
# Note: extlang invalid in Unicode language tags |
|
2677
|
|
|
|
|
|
|
|
|
2678
|
|
|
|
|
|
|
my $script = '[a-z]{4}' ; # 4ALPHA |
|
2679
|
|
|
|
|
|
|
|
|
2680
|
|
|
|
|
|
|
my $region = '(?: [a-z]{2}|[0-9]{3})' ; # 2ALPHA / 3DIGIT |
|
2681
|
|
|
|
|
|
|
|
|
2682
|
|
|
|
|
|
|
my $variant = '(?: [a-z0-9]{5,8} | [0-9] [a-z0-9]{3} )' ; # 5*8alphanum / (DIGIT 3alphanum) |
|
2683
|
|
|
|
|
|
|
|
|
2684
|
|
|
|
|
|
|
my $extension = '(?: [a-wyz] (?: [_-] [a-z0-9]{2,8} )+ )' ; # singleton 1*("-" (2*8alphanum)) |
|
2685
|
|
|
|
|
|
|
|
|
2686
|
|
|
|
|
|
|
my $privateUse = '(?: x (?: [_-] [a-z0-9]{1,8} )+ )' ; # "x" 1*("-" (1*8alphanum)) |
|
2687
|
|
|
|
|
|
|
|
|
2688
|
|
|
|
|
|
|
# Define certain grandfathered codes, since otherwise the regex is pretty useless. |
|
2689
|
|
|
|
|
|
|
# Since these are limited, this is safe even later changes to the registry -- |
|
2690
|
|
|
|
|
|
|
# the only oddity is that it might change the type of the tag, and thus |
|
2691
|
|
|
|
|
|
|
# the results from the capturing groups. |
|
2692
|
|
|
|
|
|
|
# http://www.iana.org/assignments/language-subtag-registry |
|
2693
|
|
|
|
|
|
|
# Note that these have to be compared case insensitively, requiring (?i) below. |
|
2694
|
|
|
|
|
|
|
|
|
2695
|
|
|
|
|
|
|
my $grandfathered = '(?: |
|
2696
|
|
|
|
|
|
|
(en [_-] GB [_-] oed) |
|
2697
|
|
|
|
|
|
|
| (i [_-] (?: ami | bnn | default | enochian | hak | klingon | lux | mingo | navajo | pwn | tao | tay | tsu )) |
|
2698
|
|
|
|
|
|
|
| (no [_-] (?: bok | nyn )) |
|
2699
|
|
|
|
|
|
|
| (sgn [_-] (?: BE [_-] (?: fr | nl) | CH [_-] de )) |
|
2700
|
|
|
|
|
|
|
| (zh [_-] min [_-] nan) |
|
2701
|
|
|
|
|
|
|
)'; |
|
2702
|
|
|
|
|
|
|
|
|
2703
|
|
|
|
|
|
|
# old: | zh $s (?: cmn (?: $s Hans | $s Hant )? | gan | min (?: $s nan)? | wuu | yue ); |
|
2704
|
|
|
|
|
|
|
# For well-formedness, we don't need the ones that would otherwise pass. |
|
2705
|
|
|
|
|
|
|
# For validity, they need to be checked. |
|
2706
|
|
|
|
|
|
|
|
|
2707
|
|
|
|
|
|
|
# $grandfatheredWellFormed = (?: |
|
2708
|
|
|
|
|
|
|
# art $s lojban |
|
2709
|
|
|
|
|
|
|
# | cel $s gaulish |
|
2710
|
|
|
|
|
|
|
# | zh $s (?: guoyu | hakka | xiang ) |
|
2711
|
|
|
|
|
|
|
# ); |
|
2712
|
|
|
|
|
|
|
|
|
2713
|
|
|
|
|
|
|
# Unicode locales: but we are shifting to a compatible form |
|
2714
|
|
|
|
|
|
|
# $keyvalue = (?: $alphanum+ \= $alphanum+); |
|
2715
|
|
|
|
|
|
|
# $keywords = ($keyvalue (?: \; $keyvalue)*); |
|
2716
|
|
|
|
|
|
|
|
|
2717
|
|
|
|
|
|
|
# We separate items that we want to capture as a single group |
|
2718
|
|
|
|
|
|
|
|
|
2719
|
|
|
|
|
|
|
my $variantList = $variant . '(?:' . $s . $variant . ')*' ; # special for multiples |
|
2720
|
|
|
|
|
|
|
my $extensionList = $extension . '(?:' . $s . $extension . ')*' ; # special for multiples |
|
2721
|
|
|
|
|
|
|
|
|
2722
|
|
|
|
|
|
|
my $langtag = " |
|
2723
|
|
|
|
|
|
|
($language) |
|
2724
|
|
|
|
|
|
|
($s ( $script ) )? |
|
2725
|
|
|
|
|
|
|
($s ( $region ) )? |
|
2726
|
|
|
|
|
|
|
($s ( $variantList ) )? |
|
2727
|
|
|
|
|
|
|
($s ( $extensionList ) )? |
|
2728
|
|
|
|
|
|
|
($s ( $privateUse ) )? |
|
2729
|
|
|
|
|
|
|
"; |
|
2730
|
|
|
|
|
|
|
|
|
2731
|
|
|
|
|
|
|
# Here is the final breakdown, with capturing groups for each of these components |
|
2732
|
|
|
|
|
|
|
# The variants, extensions, grandfathered, and private-use may have interior '-' |
|
2733
|
|
|
|
|
|
|
|
|
2734
|
|
|
|
|
|
|
my $r = ($value_to_test =~ |
|
2735
|
|
|
|
|
|
|
/^( |
|
2736
|
|
|
|
|
|
|
($langtag) |
|
2737
|
|
|
|
|
|
|
| ($privateUse) |
|
2738
|
|
|
|
|
|
|
| ($grandfathered) |
|
2739
|
|
|
|
|
|
|
)$/xi); |
|
2740
|
|
|
|
|
|
|
return $r; |
|
2741
|
|
|
|
|
|
|
} |
|
2742
|
|
|
|
|
|
|
|
|
2743
|
|
|
|
|
|
|
sub _expand_curie |
|
2744
|
|
|
|
|
|
|
{ |
|
2745
|
|
|
|
|
|
|
my ($self, $token, %args) = @_; |
|
2746
|
|
|
|
|
|
|
my $r = $self->__expand_curie($token, %args); |
|
2747
|
|
|
|
|
|
|
|
|
2748
|
|
|
|
|
|
|
if (defined $self->{'sub'}->{'ontoken'}) |
|
2749
|
|
|
|
|
|
|
{ |
|
2750
|
|
|
|
|
|
|
return $self->{'sub'}->{'ontoken'}($self, $args{element}, $token, $r); |
|
2751
|
|
|
|
|
|
|
} |
|
2752
|
|
|
|
|
|
|
|
|
2753
|
|
|
|
|
|
|
return $r; |
|
2754
|
|
|
|
|
|
|
} |
|
2755
|
|
|
|
|
|
|
|
|
2756
|
|
|
|
|
|
|
sub __expand_curie |
|
2757
|
|
|
|
|
|
|
{ |
|
2758
|
|
|
|
|
|
|
my ($self, $token, %args) = @_; |
|
2759
|
|
|
|
|
|
|
|
|
2760
|
|
|
|
|
|
|
# Blank nodes |
|
2761
|
|
|
|
|
|
|
{ |
|
2762
|
|
|
|
|
|
|
my $bnode; |
|
2763
|
|
|
|
|
|
|
if ($token eq '_:' || $token eq '[_:]') |
|
2764
|
|
|
|
|
|
|
{ $bnode = $self->bnode(undef, undef, '_:'); } |
|
2765
|
|
|
|
|
|
|
elsif ($token =~ /^_:(.+)$/i || $token =~ /^\[_:(.+)\]$/i) |
|
2766
|
|
|
|
|
|
|
{ $bnode = $self->bnode(undef, undef, '_:'.$1); } |
|
2767
|
|
|
|
|
|
|
|
|
2768
|
|
|
|
|
|
|
if (defined $bnode) |
|
2769
|
|
|
|
|
|
|
{ |
|
2770
|
|
|
|
|
|
|
if ($args{'attribute'} =~ /^(rel|rev|property|datatype)$/i) |
|
2771
|
|
|
|
|
|
|
{ |
|
2772
|
|
|
|
|
|
|
$self->_log_error( |
|
2773
|
|
|
|
|
|
|
ERR_ERROR, |
|
2774
|
|
|
|
|
|
|
ERR_CODE_BNODE_WRONGPLACE, |
|
2775
|
|
|
|
|
|
|
"Blank node found in $args{attribute} where URIs are expected as values.", |
|
2776
|
|
|
|
|
|
|
token => $token, |
|
2777
|
|
|
|
|
|
|
element => $args{element}, |
|
2778
|
|
|
|
|
|
|
attribute => $args{attribute}, |
|
2779
|
|
|
|
|
|
|
); |
|
2780
|
|
|
|
|
|
|
|
|
2781
|
|
|
|
|
|
|
return $1 if $token =~ /^\[_:(.+)\]$/i; |
|
2782
|
|
|
|
|
|
|
return $token; |
|
2783
|
|
|
|
|
|
|
} |
|
2784
|
|
|
|
|
|
|
|
|
2785
|
|
|
|
|
|
|
return $bnode; |
|
2786
|
|
|
|
|
|
|
} |
|
2787
|
|
|
|
|
|
|
} |
|
2788
|
|
|
|
|
|
|
|
|
2789
|
|
|
|
|
|
|
my $is_safe = 0; |
|
2790
|
|
|
|
|
|
|
if ($token =~ /^\[(.*)\]$/) |
|
2791
|
|
|
|
|
|
|
{ |
|
2792
|
|
|
|
|
|
|
$is_safe = 1; |
|
2793
|
|
|
|
|
|
|
$token = $1; |
|
2794
|
|
|
|
|
|
|
} |
|
2795
|
|
|
|
|
|
|
|
|
2796
|
|
|
|
|
|
|
# CURIEs - default vocab |
|
2797
|
|
|
|
|
|
|
if ($token =~ /^($XML::RegExp::NCName)$/ |
|
2798
|
|
|
|
|
|
|
and ($is_safe || $args{'attribute'} =~ /^(rel|rev|property|typeof|datatype|role)$/i || $args{'allow_unsafe_default_vocab'})) |
|
2799
|
|
|
|
|
|
|
{ |
|
2800
|
|
|
|
|
|
|
my $suffix = $token; |
|
2801
|
|
|
|
|
|
|
|
|
2802
|
|
|
|
|
|
|
if ($args{'attribute'} eq 'role') |
|
2803
|
|
|
|
|
|
|
{ return 'http://www.w3.org/1999/xhtml/vocab#' . $suffix; } |
|
2804
|
|
|
|
|
|
|
elsif (defined $args{'prefixes'}{'(VOCAB)'}) |
|
2805
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'(VOCAB)'} . $suffix; } |
|
2806
|
|
|
|
|
|
|
|
|
2807
|
|
|
|
|
|
|
return undef if $is_safe; |
|
2808
|
|
|
|
|
|
|
} |
|
2809
|
|
|
|
|
|
|
|
|
2810
|
|
|
|
|
|
|
|
|
2811
|
|
|
|
|
|
|
# Keywords / terms / whatever-they're-called |
|
2812
|
|
|
|
|
|
|
if ($token =~ /^($XML::RegExp::NCName)$/ |
|
2813
|
|
|
|
|
|
|
and ($is_safe || $args{'attribute'} =~ /^(rel|rev|property|typeof|datatype|role)$/i || $args{'allow_unsafe_term'})) |
|
2814
|
|
|
|
|
|
|
{ |
|
2815
|
|
|
|
|
|
|
my $terms = $args{'terms'}; |
|
2816
|
|
|
|
|
|
|
my $attr = $args{'attribute'}; |
|
2817
|
|
|
|
|
|
|
|
|
2818
|
|
|
|
|
|
|
return $terms->{'sensitive'}{$attr}{$token} |
|
2819
|
|
|
|
|
|
|
if defined $terms->{'sensitive'}{ $attr }{$token}; |
|
2820
|
|
|
|
|
|
|
|
|
2821
|
|
|
|
|
|
|
return $terms->{'sensitive'}{'*'}{$token} |
|
2822
|
|
|
|
|
|
|
if defined $terms->{'sensitive'}{'*'}{$token}; |
|
2823
|
|
|
|
|
|
|
|
|
2824
|
|
|
|
|
|
|
return $terms->{'insensitive'}{$attr}{lc $token} |
|
2825
|
|
|
|
|
|
|
if defined $terms->{'insensitive'}{$attr}{lc $token}; |
|
2826
|
|
|
|
|
|
|
|
|
2827
|
|
|
|
|
|
|
return $terms->{'insensitive'}{'*'}{lc $token} |
|
2828
|
|
|
|
|
|
|
if defined $terms->{'insensitive'}{'*'}{lc $token}; |
|
2829
|
|
|
|
|
|
|
} |
|
2830
|
|
|
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
# CURIEs - prefixed |
|
2832
|
|
|
|
|
|
|
if ($token =~ /^($XML::RegExp::NCName)?:(\S*)$/ |
|
2833
|
|
|
|
|
|
|
and ( |
|
2834
|
|
|
|
|
|
|
$is_safe |
|
2835
|
|
|
|
|
|
|
or $args{attribute} =~ /^(rel|rev|property|typeof|datatype|role)$/i |
|
2836
|
|
|
|
|
|
|
or $self->{options}{safe_optional} |
|
2837
|
|
|
|
|
|
|
)) |
|
2838
|
|
|
|
|
|
|
{ |
|
2839
|
|
|
|
|
|
|
$token =~ /^($XML::RegExp::NCName)?:(\S*)$/; |
|
2840
|
|
|
|
|
|
|
my $prefix = (defined $1 && length $1) ? $1 : '(DEFAULT PREFIX)'; |
|
2841
|
|
|
|
|
|
|
my $suffix = $2; |
|
2842
|
|
|
|
|
|
|
|
|
2843
|
|
|
|
|
|
|
if (defined $args{'prefixes'}{'(DEFAULT PREFIX)'} && $prefix eq '(DEFAULT PREFIX)') |
|
2844
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'(DEFAULT PREFIX)'} . $suffix; } |
|
2845
|
|
|
|
|
|
|
elsif (defined $args{'prefixes'}{'sensitive'}{$prefix}) |
|
2846
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'sensitive'}{$prefix} . $suffix; } |
|
2847
|
|
|
|
|
|
|
elsif (defined $args{'prefixes'}{'insensitive'}{lc $prefix}) |
|
2848
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'insensitive'}{lc $prefix} . $suffix; } |
|
2849
|
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
if ($is_safe) |
|
2851
|
|
|
|
|
|
|
{ |
|
2852
|
|
|
|
|
|
|
$prefix = ($prefix eq '(DEFAULT PREFIX)') ? '' : $prefix; |
|
2853
|
|
|
|
|
|
|
$self->_log_error( |
|
2854
|
|
|
|
|
|
|
ERR_WARNING, |
|
2855
|
|
|
|
|
|
|
ERR_CODE_CURIE_UNDEFINED, |
|
2856
|
|
|
|
|
|
|
"CURIE '$token' used in safe CURIE, but '$prefix' is undefined.", |
|
2857
|
|
|
|
|
|
|
token => $token, |
|
2858
|
|
|
|
|
|
|
element => $args{element}, |
|
2859
|
|
|
|
|
|
|
attribute => $args{attribute}, |
|
2860
|
|
|
|
|
|
|
prefix => $prefix, |
|
2861
|
|
|
|
|
|
|
); |
|
2862
|
|
|
|
|
|
|
return undef; |
|
2863
|
|
|
|
|
|
|
} |
|
2864
|
|
|
|
|
|
|
} |
|
2865
|
|
|
|
|
|
|
|
|
2866
|
|
|
|
|
|
|
# CURIEs - bare prefixes |
|
2867
|
|
|
|
|
|
|
if ($self->{options}{prefix_bare} |
|
2868
|
|
|
|
|
|
|
and $token =~ /^($XML::RegExp::NCName)$/ |
|
2869
|
|
|
|
|
|
|
and ( |
|
2870
|
|
|
|
|
|
|
$is_safe |
|
2871
|
|
|
|
|
|
|
or $args{attribute} =~ /^(rel|rev|property|typeof|datatype|role)$/i |
|
2872
|
|
|
|
|
|
|
or $self->{options}{safe_optional} |
|
2873
|
|
|
|
|
|
|
)) |
|
2874
|
|
|
|
|
|
|
{ |
|
2875
|
|
|
|
|
|
|
my $prefix = $token; |
|
2876
|
|
|
|
|
|
|
my $suffix = ''; |
|
2877
|
|
|
|
|
|
|
|
|
2878
|
|
|
|
|
|
|
if (defined $args{'prefixes'}{'sensitive'}{$prefix}) |
|
2879
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'sensitive'}{$prefix} . $suffix; } |
|
2880
|
|
|
|
|
|
|
elsif (defined $args{'prefixes'}{'insensitive'}{lc $prefix}) |
|
2881
|
|
|
|
|
|
|
{ return $args{'prefixes'}{'insensitive'}{lc $prefix} . $suffix; } |
|
2882
|
|
|
|
|
|
|
} |
|
2883
|
|
|
|
|
|
|
|
|
2884
|
|
|
|
|
|
|
# Absolute URIs |
|
2885
|
|
|
|
|
|
|
if ($token =~ /^[A-Z][A-Z0-9\.\+-]*:/i and !$is_safe |
|
2886
|
|
|
|
|
|
|
and ($self->{'options'}{'full_uris'} || $args{'attribute'} =~ /^(about|resource|graph)$/i)) |
|
2887
|
|
|
|
|
|
|
{ |
|
2888
|
|
|
|
|
|
|
return $token; |
|
2889
|
|
|
|
|
|
|
} |
|
2890
|
|
|
|
|
|
|
|
|
2891
|
|
|
|
|
|
|
# Relative URIs |
|
2892
|
|
|
|
|
|
|
if (!$is_safe and ($args{'attribute'} =~ /^(about|resource|graph)$/i || $args{'allow_relative'})) |
|
2893
|
|
|
|
|
|
|
{ |
|
2894
|
|
|
|
|
|
|
return $self->uri($token, {'element'=>$args{'element'}, 'xml_base'=>$args{'xml_base'}}); |
|
2895
|
|
|
|
|
|
|
} |
|
2896
|
|
|
|
|
|
|
|
|
2897
|
|
|
|
|
|
|
$self->_log_error( |
|
2898
|
|
|
|
|
|
|
ERR_WARNING, |
|
2899
|
|
|
|
|
|
|
ERR_CODE_CURIE_FELLTHROUGH, |
|
2900
|
|
|
|
|
|
|
"Couldn't make sense of token '$token'.", |
|
2901
|
|
|
|
|
|
|
token => $token, |
|
2902
|
|
|
|
|
|
|
element => $args{element}, |
|
2903
|
|
|
|
|
|
|
attribute => $args{attribute}, |
|
2904
|
|
|
|
|
|
|
); |
|
2905
|
|
|
|
|
|
|
|
|
2906
|
|
|
|
|
|
|
return undef; |
|
2907
|
|
|
|
|
|
|
} |
|
2908
|
|
|
|
|
|
|
|
|
2909
|
|
|
|
|
|
|
__PACKAGE__ |
|
2910
|
|
|
|
|
|
|
__END__ |
|
2911
|
|
|
|
|
|
|
|
|
2912
|
|
|
|
|
|
|
=head1 NAME |
|
2913
|
|
|
|
|
|
|
|
|
2914
|
|
|
|
|
|
|
RDF::RDFa::Parser - flexible RDFa parser |
|
2915
|
|
|
|
|
|
|
|
|
2916
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
2917
|
|
|
|
|
|
|
|
|
2918
|
|
|
|
|
|
|
If you're wanting to work with an RDF::Trine::Model that can be queried with SPARQL, etc: |
|
2919
|
|
|
|
|
|
|
|
|
2920
|
|
|
|
|
|
|
use RDF::RDFa::Parser; |
|
2921
|
|
|
|
|
|
|
my $url = 'http://example.com/document.html'; |
|
2922
|
|
|
|
|
|
|
my $options = RDF::RDFa::Parser::Config->new('xhtml', '1.1'); |
|
2923
|
|
|
|
|
|
|
my $rdfa = RDF::RDFa::Parser->new_from_url($url, $options); |
|
2924
|
|
|
|
|
|
|
my $model = $rdfa->graph; |
|
2925
|
|
|
|
|
|
|
|
|
2926
|
|
|
|
|
|
|
For dealing with local data: |
|
2927
|
|
|
|
|
|
|
|
|
2928
|
|
|
|
|
|
|
use RDF::RDFa::Parser; |
|
2929
|
|
|
|
|
|
|
my $base_url = 'http://example.com/document.html'; |
|
2930
|
|
|
|
|
|
|
my $options = RDF::RDFa::Parser::Config->new('xhtml', '1.1'); |
|
2931
|
|
|
|
|
|
|
my $rdfa = RDF::RDFa::Parser->new($markup, $base_url, $options); |
|
2932
|
|
|
|
|
|
|
my $model = $rdfa->graph; |
|
2933
|
|
|
|
|
|
|
|
|
2934
|
|
|
|
|
|
|
A simple set of operations for working with Open Graph Protocol data: |
|
2935
|
|
|
|
|
|
|
|
|
2936
|
|
|
|
|
|
|
use RDF::RDFa::Parser; |
|
2937
|
|
|
|
|
|
|
my $url = 'http://www.rottentomatoes.com/m/net/'; |
|
2938
|
|
|
|
|
|
|
my $options = RDF::RDFa::Parser::Config->tagsoup; |
|
2939
|
|
|
|
|
|
|
my $rdfa = RDF::RDFa::Parser->new_from_url($url, $options); |
|
2940
|
|
|
|
|
|
|
print $rdfa->opengraph('title') . "\n"; |
|
2941
|
|
|
|
|
|
|
print $rdfa->opengraph('image') . "\n"; |
|
2942
|
|
|
|
|
|
|
|
|
2943
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
2944
|
|
|
|
|
|
|
|
|
2945
|
|
|
|
|
|
|
L<RDF::TrineX::Parser::RDFa> provides a saner interface for this module. |
|
2946
|
|
|
|
|
|
|
If you are new to parsing RDFa with Perl, then that's the best place to |
|
2947
|
|
|
|
|
|
|
start. |
|
2948
|
|
|
|
|
|
|
|
|
2949
|
|
|
|
|
|
|
=head2 Forthcoming API Changes |
|
2950
|
|
|
|
|
|
|
|
|
2951
|
|
|
|
|
|
|
Some of the logic regarding host language and RDFa version guessing |
|
2952
|
|
|
|
|
|
|
is likely to be removed from RDF::RDFa::Parser and |
|
2953
|
|
|
|
|
|
|
RDF::RDFa::Parser::Config, and shifted into RDF::TrineX::Parser::RDFa |
|
2954
|
|
|
|
|
|
|
instead. |
|
2955
|
|
|
|
|
|
|
|
|
2956
|
|
|
|
|
|
|
=head2 Constructors |
|
2957
|
|
|
|
|
|
|
|
|
2958
|
|
|
|
|
|
|
=over 4 |
|
2959
|
|
|
|
|
|
|
|
|
2960
|
|
|
|
|
|
|
=item C<< $p = RDF::RDFa::Parser->new($markup, $base, [$config], [$storage]) >> |
|
2961
|
|
|
|
|
|
|
|
|
2962
|
|
|
|
|
|
|
This method creates a new RDF::RDFa::Parser object and returns it. |
|
2963
|
|
|
|
|
|
|
|
|
2964
|
|
|
|
|
|
|
The $markup variable may contain an XHTML/XML string, or a |
|
2965
|
|
|
|
|
|
|
XML::LibXML::Document. If a string, the document is parsed using |
|
2966
|
|
|
|
|
|
|
XML::LibXML::Parser or HTML::HTML5::Parser, depending on the |
|
2967
|
|
|
|
|
|
|
configuration in $config. XML well-formedness errors will cause the |
|
2968
|
|
|
|
|
|
|
function to die. |
|
2969
|
|
|
|
|
|
|
|
|
2970
|
|
|
|
|
|
|
$base is a URL used to resolve relative links found in the document. |
|
2971
|
|
|
|
|
|
|
|
|
2972
|
|
|
|
|
|
|
$config optionally holds an RDF::RDFa::Parser::Config object which |
|
2973
|
|
|
|
|
|
|
determines the set of rules used to parse the RDFa. It defaults to |
|
2974
|
|
|
|
|
|
|
XHTML+RDFa 1.1. |
|
2975
|
|
|
|
|
|
|
|
|
2976
|
|
|
|
|
|
|
B<Advanced usage note:> $storage optionally holds an RDF::Trine::Store |
|
2977
|
|
|
|
|
|
|
object. If undef, then a new temporary store is created. |
|
2978
|
|
|
|
|
|
|
|
|
2979
|
|
|
|
|
|
|
=item C<< $p = RDF::RDFa::Parser->new_from_url($url, [$config], [$storage]) >> |
|
2980
|
|
|
|
|
|
|
|
|
2981
|
|
|
|
|
|
|
=item C<< $p = RDF::RDFa::Parser->new_from_uri($url, [$config], [$storage]) >> |
|
2982
|
|
|
|
|
|
|
|
|
2983
|
|
|
|
|
|
|
$url is a URL to fetch and parse, or an HTTP::Response object. |
|
2984
|
|
|
|
|
|
|
|
|
2985
|
|
|
|
|
|
|
$config optionally holds an RDF::RDFa::Parser::Config object which |
|
2986
|
|
|
|
|
|
|
determines the set of rules used to parse the RDFa. The default is |
|
2987
|
|
|
|
|
|
|
to determine the configuration by looking at the HTTP response |
|
2988
|
|
|
|
|
|
|
Content-Type header; it's probably sensible to keep the default. |
|
2989
|
|
|
|
|
|
|
|
|
2990
|
|
|
|
|
|
|
$storage optionally holds an RDF::Trine::Store object. If undef, then |
|
2991
|
|
|
|
|
|
|
a new temporary store is created. |
|
2992
|
|
|
|
|
|
|
|
|
2993
|
|
|
|
|
|
|
This function can also be called as C<new_from_url> or C<new_from_uri>. |
|
2994
|
|
|
|
|
|
|
Same thing. |
|
2995
|
|
|
|
|
|
|
|
|
2996
|
|
|
|
|
|
|
=item C<< $p = RDF::RDFa::Parser->new_from_response($response, [$config], [$storage]) >> |
|
2997
|
|
|
|
|
|
|
|
|
2998
|
|
|
|
|
|
|
$response is an C<HTTP::Response> object. |
|
2999
|
|
|
|
|
|
|
|
|
3000
|
|
|
|
|
|
|
Otherwise the same as C<new_from_url>. |
|
3001
|
|
|
|
|
|
|
|
|
3002
|
|
|
|
|
|
|
=back |
|
3003
|
|
|
|
|
|
|
|
|
3004
|
|
|
|
|
|
|
=head2 Public Methods |
|
3005
|
|
|
|
|
|
|
|
|
3006
|
|
|
|
|
|
|
=over 4 |
|
3007
|
|
|
|
|
|
|
|
|
3008
|
|
|
|
|
|
|
=item C<< $p->graph >> |
|
3009
|
|
|
|
|
|
|
|
|
3010
|
|
|
|
|
|
|
This will return an RDF::Trine::Model containing all the RDFa |
|
3011
|
|
|
|
|
|
|
data found on the page. |
|
3012
|
|
|
|
|
|
|
|
|
3013
|
|
|
|
|
|
|
B<Advanced usage note:> If passed a graph URI as a parameter, |
|
3014
|
|
|
|
|
|
|
will return a single named graph from within the page. This |
|
3015
|
|
|
|
|
|
|
feature is only useful if you're using named graphs. |
|
3016
|
|
|
|
|
|
|
|
|
3017
|
|
|
|
|
|
|
=item C<< $p->graphs >> |
|
3018
|
|
|
|
|
|
|
|
|
3019
|
|
|
|
|
|
|
B<Advanced usage only.> |
|
3020
|
|
|
|
|
|
|
|
|
3021
|
|
|
|
|
|
|
Will return a hashref of all named graphs, where the graph name is a |
|
3022
|
|
|
|
|
|
|
key and the value is a RDF::Trine::Model tied to a temporary storage. |
|
3023
|
|
|
|
|
|
|
|
|
3024
|
|
|
|
|
|
|
This method is only useful if you're using named graphs. |
|
3025
|
|
|
|
|
|
|
|
|
3026
|
|
|
|
|
|
|
=item C<< $p->opengraph([$property]) >> |
|
3027
|
|
|
|
|
|
|
|
|
3028
|
|
|
|
|
|
|
If $property is provided, will return the value or list of values (if |
|
3029
|
|
|
|
|
|
|
called in list context) for that Open Graph Protocol property. (In pure |
|
3030
|
|
|
|
|
|
|
RDF terms, it returns the non-bnode objects of triples where the |
|
3031
|
|
|
|
|
|
|
subject is the document base URI; and the predicate is $property, |
|
3032
|
|
|
|
|
|
|
with non-URI $property strings taken as having the implicit prefix |
|
3033
|
|
|
|
|
|
|
'http://ogp.me/ns#'. There is no distinction between literal and |
|
3034
|
|
|
|
|
|
|
non-literal values; literal datatypes and languages are dropped.) |
|
3035
|
|
|
|
|
|
|
|
|
3036
|
|
|
|
|
|
|
If $property is omitted, returns a list of possible properties. |
|
3037
|
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
Example: |
|
3039
|
|
|
|
|
|
|
|
|
3040
|
|
|
|
|
|
|
foreach my $property (sort $p->opengraph) |
|
3041
|
|
|
|
|
|
|
{ |
|
3042
|
|
|
|
|
|
|
print "$property :\n"; |
|
3043
|
|
|
|
|
|
|
foreach my $val (sort $p->opengraph($property)) |
|
3044
|
|
|
|
|
|
|
{ |
|
3045
|
|
|
|
|
|
|
print " * $val\n"; |
|
3046
|
|
|
|
|
|
|
} |
|
3047
|
|
|
|
|
|
|
} |
|
3048
|
|
|
|
|
|
|
|
|
3049
|
|
|
|
|
|
|
See also: L<http://opengraphprotocol.org/>. |
|
3050
|
|
|
|
|
|
|
|
|
3051
|
|
|
|
|
|
|
=item C<< $p->dom >> |
|
3052
|
|
|
|
|
|
|
|
|
3053
|
|
|
|
|
|
|
Returns the parsed XML::LibXML::Document. |
|
3054
|
|
|
|
|
|
|
|
|
3055
|
|
|
|
|
|
|
=item C<< $p->uri( [$other_uri] ) >> |
|
3056
|
|
|
|
|
|
|
|
|
3057
|
|
|
|
|
|
|
Returns the base URI of the document being parsed. This will usually be the |
|
3058
|
|
|
|
|
|
|
same as the base URI provided to the constructor, but may differ if the |
|
3059
|
|
|
|
|
|
|
document contains a <base> HTML element. |
|
3060
|
|
|
|
|
|
|
|
|
3061
|
|
|
|
|
|
|
Optionally it may be passed a parameter - an absolute or relative URI - in |
|
3062
|
|
|
|
|
|
|
which case it returns the same URI which it was passed as a parameter, but |
|
3063
|
|
|
|
|
|
|
as an absolute URI, resolved relative to the document's base URI. |
|
3064
|
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
This seems like two unrelated functions, but if you consider the consequence |
|
3066
|
|
|
|
|
|
|
of passing a relative URI consisting of a zero-length string, it in fact makes |
|
3067
|
|
|
|
|
|
|
sense. |
|
3068
|
|
|
|
|
|
|
|
|
3069
|
|
|
|
|
|
|
=item C<< $p->errors >> |
|
3070
|
|
|
|
|
|
|
|
|
3071
|
|
|
|
|
|
|
Returns a list of errors and warnings that occurred during parsing. |
|
3072
|
|
|
|
|
|
|
|
|
3073
|
|
|
|
|
|
|
=item C<< $p->processor_graph >> |
|
3074
|
|
|
|
|
|
|
|
|
3075
|
|
|
|
|
|
|
As per C<< $p->errors >> but returns data as an RDF model. |
|
3076
|
|
|
|
|
|
|
|
|
3077
|
|
|
|
|
|
|
=item C<< $p->output_graph >> |
|
3078
|
|
|
|
|
|
|
|
|
3079
|
|
|
|
|
|
|
An alias for C<graph>, but does not accept a parameter. |
|
3080
|
|
|
|
|
|
|
|
|
3081
|
|
|
|
|
|
|
=item C<< $p->processor_and_output_graph >> |
|
3082
|
|
|
|
|
|
|
|
|
3083
|
|
|
|
|
|
|
Union of the above two graphs. |
|
3084
|
|
|
|
|
|
|
|
|
3085
|
|
|
|
|
|
|
=item C<< $p->consume >> |
|
3086
|
|
|
|
|
|
|
|
|
3087
|
|
|
|
|
|
|
B<Advanced usage only.> |
|
3088
|
|
|
|
|
|
|
|
|
3089
|
|
|
|
|
|
|
The document is parsed for RDFa. As of RDF::RDFa::Parser 1.09x, |
|
3090
|
|
|
|
|
|
|
this is called automatically when needed; you probably don't need |
|
3091
|
|
|
|
|
|
|
to touch it unless you're doing interesting things with callbacks. |
|
3092
|
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
Calling C<< $p->consume(survive => 1) >> will avoid crashing (e.g. |
|
3094
|
|
|
|
|
|
|
when the markup provided cannot be parsed), and instead make more |
|
3095
|
|
|
|
|
|
|
errors available in C<< $p->errors >>. |
|
3096
|
|
|
|
|
|
|
|
|
3097
|
|
|
|
|
|
|
=item C<< $p->set_callbacks(\%callbacks) >> |
|
3098
|
|
|
|
|
|
|
|
|
3099
|
|
|
|
|
|
|
B<Advanced usage only.> |
|
3100
|
|
|
|
|
|
|
|
|
3101
|
|
|
|
|
|
|
Set callback functions for the parser to call on certain events. These are only necessary if |
|
3102
|
|
|
|
|
|
|
you want to do something especially unusual. |
|
3103
|
|
|
|
|
|
|
|
|
3104
|
|
|
|
|
|
|
$p->set_callbacks({ |
|
3105
|
|
|
|
|
|
|
'pretriple_resource' => sub { ... } , |
|
3106
|
|
|
|
|
|
|
'pretriple_literal' => sub { ... } , |
|
3107
|
|
|
|
|
|
|
'ontriple' => undef , |
|
3108
|
|
|
|
|
|
|
'onprefix' => \&some_function , |
|
3109
|
|
|
|
|
|
|
}); |
|
3110
|
|
|
|
|
|
|
|
|
3111
|
|
|
|
|
|
|
Either of the two pretriple callbacks can be set to the string 'print' instead of a coderef. |
|
3112
|
|
|
|
|
|
|
This enables built-in callbacks for printing Turtle to STDOUT. |
|
3113
|
|
|
|
|
|
|
|
|
3114
|
|
|
|
|
|
|
For details of the callback functions, see the section CALLBACKS. If used, C<set_callbacks> |
|
3115
|
|
|
|
|
|
|
must be called I<before> C<consume>. C<set_callbacks> returns a reference to the parser |
|
3116
|
|
|
|
|
|
|
object itself. |
|
3117
|
|
|
|
|
|
|
|
|
3118
|
|
|
|
|
|
|
=item C<< $p->element_subjects >> |
|
3119
|
|
|
|
|
|
|
|
|
3120
|
|
|
|
|
|
|
B<Advanced usage only.> |
|
3121
|
|
|
|
|
|
|
|
|
3122
|
|
|
|
|
|
|
Gets/sets a hashref of { xpath => RDF::Trine::Node } mappings. |
|
3123
|
|
|
|
|
|
|
|
|
3124
|
|
|
|
|
|
|
This is not touched during normal RDFa parsing, only being used by the @role and |
|
3125
|
|
|
|
|
|
|
@cite features where RDF resources (i.e. URIs and blank nodes) are needed to |
|
3126
|
|
|
|
|
|
|
represent XML elements themselves. |
|
3127
|
|
|
|
|
|
|
|
|
3128
|
|
|
|
|
|
|
=back |
|
3129
|
|
|
|
|
|
|
|
|
3130
|
|
|
|
|
|
|
=head1 CALLBACKS |
|
3131
|
|
|
|
|
|
|
|
|
3132
|
|
|
|
|
|
|
Several callback functions are provided. These may be set using the C<set_callbacks> function, |
|
3133
|
|
|
|
|
|
|
which takes a hashref of keys pointing to coderefs. The keys are named for the event to fire the |
|
3134
|
|
|
|
|
|
|
callback on. |
|
3135
|
|
|
|
|
|
|
|
|
3136
|
|
|
|
|
|
|
=head2 ontriple |
|
3137
|
|
|
|
|
|
|
|
|
3138
|
|
|
|
|
|
|
This is called once a triple is ready to be added to the graph. (After the pretriple |
|
3139
|
|
|
|
|
|
|
callbacks.) The parameters passed to the callback function are: |
|
3140
|
|
|
|
|
|
|
|
|
3141
|
|
|
|
|
|
|
=over 4 |
|
3142
|
|
|
|
|
|
|
|
|
3143
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
|
3144
|
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
=item * A hashref of relevant C<XML::LibXML::Element> objects (subject, predicate, object, graph, current) |
|
3146
|
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
=item * An RDF::Trine::Statement object. |
|
3148
|
|
|
|
|
|
|
|
|
3149
|
|
|
|
|
|
|
=back |
|
3150
|
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
The callback should return 1 to tell the parser to skip this triple (not add it to |
|
3152
|
|
|
|
|
|
|
the graph); return 0 otherwise. The callback may modify the RDF::Trine::Statement |
|
3153
|
|
|
|
|
|
|
object. |
|
3154
|
|
|
|
|
|
|
|
|
3155
|
|
|
|
|
|
|
=head2 onprefix |
|
3156
|
|
|
|
|
|
|
|
|
3157
|
|
|
|
|
|
|
This is called when a new CURIE prefix is discovered. The parameters passed |
|
3158
|
|
|
|
|
|
|
to the callback function are: |
|
3159
|
|
|
|
|
|
|
|
|
3160
|
|
|
|
|
|
|
=over 4 |
|
3161
|
|
|
|
|
|
|
|
|
3162
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
|
3163
|
|
|
|
|
|
|
|
|
3164
|
|
|
|
|
|
|
=item * A reference to the C<XML::LibXML::Element> being parsed |
|
3165
|
|
|
|
|
|
|
|
|
3166
|
|
|
|
|
|
|
=item * The prefix (string, e.g. "foaf") |
|
3167
|
|
|
|
|
|
|
|
|
3168
|
|
|
|
|
|
|
=item * The expanded URI (string, e.g. "http://xmlns.com/foaf/0.1/") |
|
3169
|
|
|
|
|
|
|
|
|
3170
|
|
|
|
|
|
|
=back |
|
3171
|
|
|
|
|
|
|
|
|
3172
|
|
|
|
|
|
|
The return value of this callback is currently ignored, but you should return |
|
3173
|
|
|
|
|
|
|
0 in case future versions of this module assign significance to the return value. |
|
3174
|
|
|
|
|
|
|
|
|
3175
|
|
|
|
|
|
|
=head2 ontoken |
|
3176
|
|
|
|
|
|
|
|
|
3177
|
|
|
|
|
|
|
This is called when a CURIE or term has been expanded. The parameters are: |
|
3178
|
|
|
|
|
|
|
|
|
3179
|
|
|
|
|
|
|
=over 4 |
|
3180
|
|
|
|
|
|
|
|
|
3181
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
|
3182
|
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
=item * A reference to the C<XML::LibXML::Element> being parsed |
|
3184
|
|
|
|
|
|
|
|
|
3185
|
|
|
|
|
|
|
=item * The CURIE or token as a string (e.g. "foaf:name" or "Stylesheet") |
|
3186
|
|
|
|
|
|
|
|
|
3187
|
|
|
|
|
|
|
=item * The fully expanded URI |
|
3188
|
|
|
|
|
|
|
|
|
3189
|
|
|
|
|
|
|
=back |
|
3190
|
|
|
|
|
|
|
|
|
3191
|
|
|
|
|
|
|
The callback function must return a fully expanded URI, or if it |
|
3192
|
|
|
|
|
|
|
wants the CURIE to be ignored, undef. |
|
3193
|
|
|
|
|
|
|
|
|
3194
|
|
|
|
|
|
|
=head2 onerror |
|
3195
|
|
|
|
|
|
|
|
|
3196
|
|
|
|
|
|
|
This is called when an error occurs: |
|
3197
|
|
|
|
|
|
|
|
|
3198
|
|
|
|
|
|
|
=over 4 |
|
3199
|
|
|
|
|
|
|
|
|
3200
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
|
3201
|
|
|
|
|
|
|
|
|
3202
|
|
|
|
|
|
|
=item * The error level (RDF::RDFa::Parser::ERR_ERROR or |
|
3203
|
|
|
|
|
|
|
RDF::RDFa::Parser::ERR_WARNING) |
|
3204
|
|
|
|
|
|
|
|
|
3205
|
|
|
|
|
|
|
=item * An error code |
|
3206
|
|
|
|
|
|
|
|
|
3207
|
|
|
|
|
|
|
=item * An error message |
|
3208
|
|
|
|
|
|
|
|
|
3209
|
|
|
|
|
|
|
=item * A hash of other information |
|
3210
|
|
|
|
|
|
|
|
|
3211
|
|
|
|
|
|
|
=back |
|
3212
|
|
|
|
|
|
|
|
|
3213
|
|
|
|
|
|
|
The return value of this callback is currently ignored, but you should return |
|
3214
|
|
|
|
|
|
|
0 in case future versions of this module assign significance to the return value. |
|
3215
|
|
|
|
|
|
|
|
|
3216
|
|
|
|
|
|
|
If you do not define an onerror callback, then errors will be output via STDERR |
|
3217
|
|
|
|
|
|
|
and warnings will be silent. Either way, you can retrieve errors after parsing |
|
3218
|
|
|
|
|
|
|
using the C<errors> method. |
|
3219
|
|
|
|
|
|
|
|
|
3220
|
|
|
|
|
|
|
=head2 pretriple_resource |
|
3221
|
|
|
|
|
|
|
|
|
3222
|
|
|
|
|
|
|
B<This callback is deprecated - use ontriple instead.> |
|
3223
|
|
|
|
|
|
|
|
|
3224
|
|
|
|
|
|
|
This is called when a triple has been found, but before preparing the triple for |
|
3225
|
|
|
|
|
|
|
adding to the model. It is only called for triples with a non-literal object value. |
|
3226
|
|
|
|
|
|
|
|
|
3227
|
|
|
|
|
|
|
The parameters passed to the callback function are: |
|
3228
|
|
|
|
|
|
|
|
|
3229
|
|
|
|
|
|
|
=over 4 |
|
3230
|
|
|
|
|
|
|
|
|
3231
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
|
3232
|
|
|
|
|
|
|
|
|
3233
|
|
|
|
|
|
|
=item * A reference to the C<XML::LibXML::Element> being parsed |
|
3234
|
|
|
|
|
|
|
|
|
3235
|
|
|
|
|
|
|
=item * Subject URI or bnode (string) |
|
3236
|
|
|
|
|
|
|
|
|
3237
|
|
|
|
|
|
|
=item * Predicate URI (string) |
|
3238
|
|
|
|
|
|
|
|
|
3239
|
|
|
|
|
|
|
=item * Object URI or bnode (string) |
|
3240
|
|
|
|
|
|
|
|
|
3241
|
|
|
|
|
|
|
=item * Graph URI or bnode (string or undef) |
|
3242
|
|
|
|
|
|
|
|
|
3243
|
|
|
|
|
|
|
=back |
|
3244
|
|
|
|
|
|
|
|
|
3245
|
|
|
|
|
|
|
The callback should return 1 to tell the parser to skip this triple (not add it to |
|
3246
|
|
|
|
|
|
|
the graph); return 0 otherwise. |
|
3247
|
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
=head2 pretriple_literal |
|
3249
|
|
|
|
|
|
|
|
|
3250
|
|
|
|
|
|
|
B<This callback is deprecated - use ontriple instead.> |
|
3251
|
|
|
|
|
|
|
|
|
3252
|
|
|
|
|
|
|
This is the equivalent of pretriple_resource, but is only called for triples with a |
|
3253
|
|
|
|
|
|
|
literal object value. |
|
3254
|
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
The parameters passed to the callback function are: |
|
3256
|
|
|
|
|
|
|
|
|
3257
|
|
|
|
|
|
|
=over 4 |
|
3258
|
|
|
|
|
|
|
|
|
3259
|
|
|
|
|
|
|
=item * A reference to the C<RDF::RDFa::Parser> object |
|
3260
|
|
|
|
|
|
|
|
|
3261
|
|
|
|
|
|
|
=item * A reference to the C<XML::LibXML::Element> being parsed |
|
3262
|
|
|
|
|
|
|
|
|
3263
|
|
|
|
|
|
|
=item * Subject URI or bnode (string) |
|
3264
|
|
|
|
|
|
|
|
|
3265
|
|
|
|
|
|
|
=item * Predicate URI (string) |
|
3266
|
|
|
|
|
|
|
|
|
3267
|
|
|
|
|
|
|
=item * Object literal (string) |
|
3268
|
|
|
|
|
|
|
|
|
3269
|
|
|
|
|
|
|
=item * Datatype URI (string or undef) |
|
3270
|
|
|
|
|
|
|
|
|
3271
|
|
|
|
|
|
|
=item * Language (string or undef) |
|
3272
|
|
|
|
|
|
|
|
|
3273
|
|
|
|
|
|
|
=item * Graph URI or bnode (string or undef) |
|
3274
|
|
|
|
|
|
|
|
|
3275
|
|
|
|
|
|
|
=back |
|
3276
|
|
|
|
|
|
|
|
|
3277
|
|
|
|
|
|
|
Beware: sometimes both a datatype I<and> a language will be passed. |
|
3278
|
|
|
|
|
|
|
This goes beyond the normal RDF data model.) |
|
3279
|
|
|
|
|
|
|
|
|
3280
|
|
|
|
|
|
|
The callback should return 1 to tell the parser to skip this triple (not add it to |
|
3281
|
|
|
|
|
|
|
the graph); return 0 otherwise. |
|
3282
|
|
|
|
|
|
|
|
|
3283
|
|
|
|
|
|
|
=head1 FEATURES |
|
3284
|
|
|
|
|
|
|
|
|
3285
|
|
|
|
|
|
|
Most features are configurable using L<RDF::RDFa::Parser::Config>. |
|
3286
|
|
|
|
|
|
|
|
|
3287
|
|
|
|
|
|
|
=head2 RDFa Versions |
|
3288
|
|
|
|
|
|
|
|
|
3289
|
|
|
|
|
|
|
RDF::RDFa::Parser supports RDFa versions 1.0 and 1.1. |
|
3290
|
|
|
|
|
|
|
|
|
3291
|
|
|
|
|
|
|
1.1 is currently a moving target; support is experimental. |
|
3292
|
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
1.1 is the default, but this can be configured using RDF::RDFa::Parser::Config. |
|
3294
|
|
|
|
|
|
|
|
|
3295
|
|
|
|
|
|
|
=head2 Host Languages |
|
3296
|
|
|
|
|
|
|
|
|
3297
|
|
|
|
|
|
|
RDF::RDFa::Parser supports various different RDFa host languages: |
|
3298
|
|
|
|
|
|
|
|
|
3299
|
|
|
|
|
|
|
=over 4 |
|
3300
|
|
|
|
|
|
|
|
|
3301
|
|
|
|
|
|
|
=item * B<XHTML> |
|
3302
|
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
As per the XHTML+RDFa 1.0 and XHTML+RDFa 1.1 specifications. |
|
3304
|
|
|
|
|
|
|
|
|
3305
|
|
|
|
|
|
|
=item * B<HTML 4> |
|
3306
|
|
|
|
|
|
|
|
|
3307
|
|
|
|
|
|
|
Uses an HTML5 (sic) parser; uses @lang instead of @xml:lang; keeps prefixes |
|
3308
|
|
|
|
|
|
|
and terms case-insensitive; recognises the @rel relations defined in the HTML |
|
3309
|
|
|
|
|
|
|
4 specification. Otherwise the same as XHTML. |
|
3310
|
|
|
|
|
|
|
|
|
3311
|
|
|
|
|
|
|
=item * B<HTML5> |
|
3312
|
|
|
|
|
|
|
|
|
3313
|
|
|
|
|
|
|
Uses an HTML5 parser; uses @lang as well as @xml:lang; keeps prefixes |
|
3314
|
|
|
|
|
|
|
and terms case-insensitive; recognises the @rel relations defined in the HTML5 |
|
3315
|
|
|
|
|
|
|
draft specification. Otherwise the same as XHTML. |
|
3316
|
|
|
|
|
|
|
|
|
3317
|
|
|
|
|
|
|
=item * B<XML> |
|
3318
|
|
|
|
|
|
|
|
|
3319
|
|
|
|
|
|
|
This is implemented as per the RDFa Core 1.1 specification. There is also |
|
3320
|
|
|
|
|
|
|
support for "RDFa Core 1.0", for which no specification exists, but has been |
|
3321
|
|
|
|
|
|
|
reverse-engineered by applying the differences between XHTML+RDFa 1.1 and |
|
3322
|
|
|
|
|
|
|
RDFa Core 1.1 to the XHTML+RDFa 1.0 specification. |
|
3323
|
|
|
|
|
|
|
|
|
3324
|
|
|
|
|
|
|
Embedded chunks of RDF/XML within XML are supported. |
|
3325
|
|
|
|
|
|
|
|
|
3326
|
|
|
|
|
|
|
=item * B<SVG> |
|
3327
|
|
|
|
|
|
|
|
|
3328
|
|
|
|
|
|
|
For now, a synonym for XML. |
|
3329
|
|
|
|
|
|
|
|
|
3330
|
|
|
|
|
|
|
=item * B<Atom> |
|
3331
|
|
|
|
|
|
|
|
|
3332
|
|
|
|
|
|
|
The E<lt>feedE<gt> and E<lt>entryE<gt> elements are treated specially, setting |
|
3333
|
|
|
|
|
|
|
a new subject; IANA-registered rel keywords are recognised. |
|
3334
|
|
|
|
|
|
|
|
|
3335
|
|
|
|
|
|
|
By passing C<< atom_parser=>1 >> as a Config option, you can also handle |
|
3336
|
|
|
|
|
|
|
Atom's native semantics. (Uses L<XML::Atom::OWL>. If this module is not installed, |
|
3337
|
|
|
|
|
|
|
this option is silently ignored.) |
|
3338
|
|
|
|
|
|
|
|
|
3339
|
|
|
|
|
|
|
Otherwise, the same as XML. |
|
3340
|
|
|
|
|
|
|
|
|
3341
|
|
|
|
|
|
|
=item * B<DataRSS> |
|
3342
|
|
|
|
|
|
|
|
|
3343
|
|
|
|
|
|
|
Defines some default prefixes. Otherwise, the same as Atom. |
|
3344
|
|
|
|
|
|
|
|
|
3345
|
|
|
|
|
|
|
=item * B<OpenDocument XML> |
|
3346
|
|
|
|
|
|
|
|
|
3347
|
|
|
|
|
|
|
That is, XML content formatted along the lines of 'content.xml' in OpenDocument |
|
3348
|
|
|
|
|
|
|
files. |
|
3349
|
|
|
|
|
|
|
|
|
3350
|
|
|
|
|
|
|
Supports OpenDocument bookmarked ranges used as typed or plain object literals |
|
3351
|
|
|
|
|
|
|
(though not XML literals); expects RDFa attributes in the XHTML namespace |
|
3352
|
|
|
|
|
|
|
instead of in no namespace. Otherwise, the same as XML. |
|
3353
|
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
=item * B<OpenDocument> |
|
3355
|
|
|
|
|
|
|
|
|
3356
|
|
|
|
|
|
|
That is, a ZIP file containing OpenDocument XML files. RDF::RDFa::Parser |
|
3357
|
|
|
|
|
|
|
will do all the unzipping and combining for you, so you don't have to. |
|
3358
|
|
|
|
|
|
|
The unregistered "jar:" URI scheme is used to refer to files within the ZIP. |
|
3359
|
|
|
|
|
|
|
|
|
3360
|
|
|
|
|
|
|
=back |
|
3361
|
|
|
|
|
|
|
|
|
3362
|
|
|
|
|
|
|
=head2 Embedded RDF/XML |
|
3363
|
|
|
|
|
|
|
|
|
3364
|
|
|
|
|
|
|
Though a rarely used feature, XHTML allows other XML markup languages |
|
3365
|
|
|
|
|
|
|
to be directly embedded into it. In particular, chunks of RDF/XML can |
|
3366
|
|
|
|
|
|
|
be included in XHTML. While this is not common in XHTML, it's seen quite |
|
3367
|
|
|
|
|
|
|
often in SVG and other XML markup languages. |
|
3368
|
|
|
|
|
|
|
|
|
3369
|
|
|
|
|
|
|
When RDF::RDFa::Parser encounters a chunk of RDF/XML in a document |
|
3370
|
|
|
|
|
|
|
it's parsing (i.e. an element called 'RDF' with namespace |
|
3371
|
|
|
|
|
|
|
'http://www.w3.org/1999/02/22-rdf-syntax-ns#'), there are three different |
|
3372
|
|
|
|
|
|
|
courses of action it can take: |
|
3373
|
|
|
|
|
|
|
|
|
3374
|
|
|
|
|
|
|
=over 4 |
|
3375
|
|
|
|
|
|
|
|
|
3376
|
|
|
|
|
|
|
=item 0. Continue straight through it. |
|
3377
|
|
|
|
|
|
|
|
|
3378
|
|
|
|
|
|
|
This is the behaviour that XHTML+RDFa seems to suggest is the right |
|
3379
|
|
|
|
|
|
|
option. It should mostly not do any harm: triples encoded in RDF/XML |
|
3380
|
|
|
|
|
|
|
will be generally ignored (though the chunk itself could theoretically |
|
3381
|
|
|
|
|
|
|
end up as part of an XML literal). It will waste a bit of time though. |
|
3382
|
|
|
|
|
|
|
|
|
3383
|
|
|
|
|
|
|
=item 1. Parse the RDF/XML. |
|
3384
|
|
|
|
|
|
|
|
|
3385
|
|
|
|
|
|
|
The parser will parse the RDF/XML properly. If named graphs are |
|
3386
|
|
|
|
|
|
|
enabled, any triples will be added to a separate graph. This is |
|
3387
|
|
|
|
|
|
|
the behaviour that SVG Tiny 1.2 seems to suggest is the correct |
|
3388
|
|
|
|
|
|
|
thing to do. |
|
3389
|
|
|
|
|
|
|
|
|
3390
|
|
|
|
|
|
|
=item 2. Skip the chunk. |
|
3391
|
|
|
|
|
|
|
|
|
3392
|
|
|
|
|
|
|
This will skip over the RDF element entirely, and thus save you a |
|
3393
|
|
|
|
|
|
|
bit of time. |
|
3394
|
|
|
|
|
|
|
|
|
3395
|
|
|
|
|
|
|
=back |
|
3396
|
|
|
|
|
|
|
|
|
3397
|
|
|
|
|
|
|
You can decide which path to take by setting the 'embedded_rdfxml' |
|
3398
|
|
|
|
|
|
|
Config option. For HTML and XHTML, you probably want |
|
3399
|
|
|
|
|
|
|
to set embedded_rdfxml to '0' (the default) or '2' (a little faster). |
|
3400
|
|
|
|
|
|
|
For other XML markup languages (e.g. SVG or Atom), then you probably want to |
|
3401
|
|
|
|
|
|
|
set it to '1'. |
|
3402
|
|
|
|
|
|
|
|
|
3403
|
|
|
|
|
|
|
(There's also an option '3' which controls how embedded RDF/XML interacts |
|
3404
|
|
|
|
|
|
|
with named graphs, but this is only really intended for internal use, parsing |
|
3405
|
|
|
|
|
|
|
OpenDocument.) |
|
3406
|
|
|
|
|
|
|
|
|
3407
|
|
|
|
|
|
|
=head2 Named Graphs |
|
3408
|
|
|
|
|
|
|
|
|
3409
|
|
|
|
|
|
|
The parser has support for named graphs within a single RDFa |
|
3410
|
|
|
|
|
|
|
document. To switch this on, use the 'graph' Config option. |
|
3411
|
|
|
|
|
|
|
|
|
3412
|
|
|
|
|
|
|
See also L<http://buzzword.org.uk/2009/rdfa4/spec>. |
|
3413
|
|
|
|
|
|
|
|
|
3414
|
|
|
|
|
|
|
The name of the attribute which indicates graph URIs is by |
|
3415
|
|
|
|
|
|
|
default 'graph', but can be changed using the 'graph_attr' |
|
3416
|
|
|
|
|
|
|
Config option. This option accepts Clark Notation to specify a |
|
3417
|
|
|
|
|
|
|
namespaced attribute. By default, the attribute value is |
|
3418
|
|
|
|
|
|
|
interpreted as like the 'about' attribute (i.e. CURIEs, URIs, etc), |
|
3419
|
|
|
|
|
|
|
but if you set the 'graph_type' Config option to 'id', |
|
3420
|
|
|
|
|
|
|
it will be treated as setting a fragment identifier (like the 'id' |
|
3421
|
|
|
|
|
|
|
attribute). |
|
3422
|
|
|
|
|
|
|
|
|
3423
|
|
|
|
|
|
|
The 'graph_default' Config option allows you to set the default |
|
3424
|
|
|
|
|
|
|
graph URI/bnode identifier. |
|
3425
|
|
|
|
|
|
|
|
|
3426
|
|
|
|
|
|
|
Once you're using named graphs, the C<graphs> method becomes |
|
3427
|
|
|
|
|
|
|
useful: it returns a hashref of { graph_uri => trine_model } pairs. |
|
3428
|
|
|
|
|
|
|
The optional parameter to the C<graph> method also becomes useful. |
|
3429
|
|
|
|
|
|
|
|
|
3430
|
|
|
|
|
|
|
OpenDocument (ZIP) host language support makes internal use |
|
3431
|
|
|
|
|
|
|
of named graphs, so if you're parsing OpenDocument, tinker with |
|
3432
|
|
|
|
|
|
|
the graph Config options at your own risk! |
|
3433
|
|
|
|
|
|
|
|
|
3434
|
|
|
|
|
|
|
=head2 Auto Config |
|
3435
|
|
|
|
|
|
|
|
|
3436
|
|
|
|
|
|
|
RDF::RDFa::Parser has a lot of different Config options to play with. Sometimes it |
|
3437
|
|
|
|
|
|
|
might be useful to allow the page being parsed to control some of these options. |
|
3438
|
|
|
|
|
|
|
If you switch on the 'auto_config' Config option, pages can do this. |
|
3439
|
|
|
|
|
|
|
|
|
3440
|
|
|
|
|
|
|
A page can set options using a specially crafted E<lt>metaE<gt> tag: |
|
3441
|
|
|
|
|
|
|
|
|
3442
|
|
|
|
|
|
|
<meta name="http://search.cpan.org/dist/RDF-RDFa-Parser/#auto_config" |
|
3443
|
|
|
|
|
|
|
content="xhtml_lang=1&xml_lang=0" /> |
|
3444
|
|
|
|
|
|
|
|
|
3445
|
|
|
|
|
|
|
Note that the C<content> attribute is an application/x-www-form-urlencoded |
|
3446
|
|
|
|
|
|
|
string (which must then be HTML-escaped of course). Semicolons may be used |
|
3447
|
|
|
|
|
|
|
instead of ampersands, as these tend to look nicer: |
|
3448
|
|
|
|
|
|
|
|
|
3449
|
|
|
|
|
|
|
<meta name="http://search.cpan.org/dist/RDF-RDFa-Parser/#auto_config" |
|
3450
|
|
|
|
|
|
|
content="xhtml_lang=1;xml_lang=0" /> |
|
3451
|
|
|
|
|
|
|
|
|
3452
|
|
|
|
|
|
|
It's possible to use auto config outside XHTML (e.g. in Atom or |
|
3453
|
|
|
|
|
|
|
SVG) using namespaces: |
|
3454
|
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
<xhtml:meta xmlns:xhtml="http://www.w3.org/1999/xhtml" |
|
3456
|
|
|
|
|
|
|
name="http://search.cpan.org/dist/RDF-RDFa-Parser/#auto_config" |
|
3457
|
|
|
|
|
|
|
content="xhtml_lang=0;xml_base=2;atom_elements=1" /> |
|
3458
|
|
|
|
|
|
|
|
|
3459
|
|
|
|
|
|
|
Any Config option may be given using auto config, except 'use_rtnlx', 'dom_parser', |
|
3460
|
|
|
|
|
|
|
and of course 'auto_config' itself. |
|
3461
|
|
|
|
|
|
|
|
|
3462
|
|
|
|
|
|
|
=head2 Profiles |
|
3463
|
|
|
|
|
|
|
|
|
3464
|
|
|
|
|
|
|
Support for Profiles (an experimental RDFa 1.1 feature) was added in |
|
3465
|
|
|
|
|
|
|
version 1.09_00, but dropped after version 1.096, because the feature |
|
3466
|
|
|
|
|
|
|
was removed from draft specs. |
|
3467
|
|
|
|
|
|
|
|
|
3468
|
|
|
|
|
|
|
=head1 BUGS |
|
3469
|
|
|
|
|
|
|
|
|
3470
|
|
|
|
|
|
|
RDF::RDFa::Parser 0.21 passed all approved tests in the XHTML+RDFa |
|
3471
|
|
|
|
|
|
|
test suite at the time of its release. |
|
3472
|
|
|
|
|
|
|
|
|
3473
|
|
|
|
|
|
|
RDF::RDFa::Parser 0.22 (used in conjunction with HTML::HTML5::Parser |
|
3474
|
|
|
|
|
|
|
0.01 and HTML::HTML5::Sanity 0.01) additionally passes all approved |
|
3475
|
|
|
|
|
|
|
tests in the HTML4+RDFa and HTML5+RDFa test suites at the time of |
|
3476
|
|
|
|
|
|
|
its release; except test cases 0113 and 0121, which the author of |
|
3477
|
|
|
|
|
|
|
this module believes mandate incorrect HTML parsing. |
|
3478
|
|
|
|
|
|
|
|
|
3479
|
|
|
|
|
|
|
RDF::RDFa::Parser 1.096_01 passes all approved tests on the default |
|
3480
|
|
|
|
|
|
|
graph (not the processor graph) in the RDFa 1.1 test suite for language |
|
3481
|
|
|
|
|
|
|
versions 1.0 and host languages xhtml1, html4 and html5, with the |
|
3482
|
|
|
|
|
|
|
following exceptions which are skipped: |
|
3483
|
|
|
|
|
|
|
|
|
3484
|
|
|
|
|
|
|
=over |
|
3485
|
|
|
|
|
|
|
|
|
3486
|
|
|
|
|
|
|
=item * B<0140> - wilful violation, pending proof that the test is backed up by the spec. |
|
3487
|
|
|
|
|
|
|
|
|
3488
|
|
|
|
|
|
|
=item * B<0198> - an XML canonicalisation test that may be dropped in the future. |
|
3489
|
|
|
|
|
|
|
|
|
3490
|
|
|
|
|
|
|
=item * B<0212> - wilful violation, as passing this test would require regressing on the old RDFa 1.0 test suite. |
|
3491
|
|
|
|
|
|
|
|
|
3492
|
|
|
|
|
|
|
=item * B<0251> to B<0256> pass with RDFa 1.1 and are skipped in RDFa 1.0 because they use RDFa-1.1-specific syntax. |
|
3493
|
|
|
|
|
|
|
|
|
3494
|
|
|
|
|
|
|
=item * B<0256> is additionally skipped in HTML4 mode, as the author believes xml:lang should be ignored in HTML versions prior to HTML5. |
|
3495
|
|
|
|
|
|
|
|
|
3496
|
|
|
|
|
|
|
=item * B<0303> - wilful violation, as this feature is simply awful. |
|
3497
|
|
|
|
|
|
|
|
|
3498
|
|
|
|
|
|
|
=back |
|
3499
|
|
|
|
|
|
|
|
|
3500
|
|
|
|
|
|
|
Please report any bugs to L<http://rt.cpan.org/>. |
|
3501
|
|
|
|
|
|
|
|
|
3502
|
|
|
|
|
|
|
Common gotchas: |
|
3503
|
|
|
|
|
|
|
|
|
3504
|
|
|
|
|
|
|
=over 8 |
|
3505
|
|
|
|
|
|
|
|
|
3506
|
|
|
|
|
|
|
=item * Are you using the XML catalogue? |
|
3507
|
|
|
|
|
|
|
|
|
3508
|
|
|
|
|
|
|
RDF::RDFa::Parser maintains a locally cached version of the XHTML+RDFa |
|
3509
|
|
|
|
|
|
|
DTD. This will normally be within your Perl module directory, in a subdirectory |
|
3510
|
|
|
|
|
|
|
named "auto/share/dist/RDF-RDFa-Parser/catalogue/". |
|
3511
|
|
|
|
|
|
|
If this is missing, the parser should still work, but will be very slow. |
|
3512
|
|
|
|
|
|
|
|
|
3513
|
|
|
|
|
|
|
=back |
|
3514
|
|
|
|
|
|
|
|
|
3515
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
3516
|
|
|
|
|
|
|
|
|
3517
|
|
|
|
|
|
|
L<RDF::TrineX::Parser::RDFa> provides a saner interface for this module. |
|
3518
|
|
|
|
|
|
|
|
|
3519
|
|
|
|
|
|
|
L<RDF::RDFa::Parser::Config>. |
|
3520
|
|
|
|
|
|
|
|
|
3521
|
|
|
|
|
|
|
L<XML::LibXML>, L<RDF::Trine>, L<HTML::HTML5::Parser>, L<HTML::HTML5::Sanity>, |
|
3522
|
|
|
|
|
|
|
L<RDF::RDFa::Generator>, L<RDF::RDFa::Linter>. |
|
3523
|
|
|
|
|
|
|
|
|
3524
|
|
|
|
|
|
|
L<http://www.perlrdf.org/>, L<http://rdfa.info>. |
|
3525
|
|
|
|
|
|
|
|
|
3526
|
|
|
|
|
|
|
=head1 AUTHOR |
|
3527
|
|
|
|
|
|
|
|
|
3528
|
|
|
|
|
|
|
Toby Inkster E<lt>tobyink@cpan.orgE<gt>. |
|
3529
|
|
|
|
|
|
|
|
|
3530
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
3531
|
|
|
|
|
|
|
|
|
3532
|
|
|
|
|
|
|
Kjetil Kjernsmo E<lt>kjetilk@cpan.orgE<gt> wrote much of the stuff for |
|
3533
|
|
|
|
|
|
|
building RDF::Trine models. Neubert Joachim taught me to use XML |
|
3534
|
|
|
|
|
|
|
catalogues, which massively speeds up parsing of XHTML files that have |
|
3535
|
|
|
|
|
|
|
DTDs. |
|
3536
|
|
|
|
|
|
|
|
|
3537
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENCE |
|
3538
|
|
|
|
|
|
|
|
|
3539
|
|
|
|
|
|
|
Copyright 2008-2012 Toby Inkster |
|
3540
|
|
|
|
|
|
|
|
|
3541
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
|
3542
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
|
3543
|
|
|
|
|
|
|
|
|
3544
|
|
|
|
|
|
|
=head1 DISCLAIMER OF WARRANTIES |
|
3545
|
|
|
|
|
|
|
|
|
3546
|
|
|
|
|
|
|
THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED |
|
3547
|
|
|
|
|
|
|
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF |
|
3548
|
|
|
|
|
|
|
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. |