line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTTP::OAIPMH::Validator; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
HTTP::OAIPMH::Validator - OAI-PMH validator class |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Validation suite for OAI-PMH data providers that checks for responses |
10
|
|
|
|
|
|
|
in accord with OAI-PMH v2 |
11
|
|
|
|
|
|
|
L. |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Typical use: |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use HTTP::OAIPMH::Validator; |
16
|
|
|
|
|
|
|
use Try::Tiny; |
17
|
|
|
|
|
|
|
my $val = HTTP::OAIPMH::Validator->new( base_url=>'http://example.com/oai' ); |
18
|
|
|
|
|
|
|
try { |
19
|
|
|
|
|
|
|
$val->run_complete_validation; |
20
|
|
|
|
|
|
|
} catch { |
21
|
|
|
|
|
|
|
warn "oops, validation didn't run to completion: $!\n"; |
22
|
|
|
|
|
|
|
}; |
23
|
|
|
|
|
|
|
print "Validation status of data provider ".$val->base_url." is ".$val->status."\n"; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=cut |
26
|
|
|
|
|
|
|
|
27
|
1
|
|
|
1
|
|
37934
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
47
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
our $VERSION = '1.05'; |
30
|
|
|
|
|
|
|
|
31
|
1
|
|
|
1
|
|
3
|
use base qw(Class::Accessor::Fast); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
476
|
|
32
|
1
|
|
|
1
|
|
2335
|
use Data::UUID; |
|
1
|
|
|
|
|
566
|
|
|
1
|
|
|
|
|
49
|
|
33
|
1
|
|
|
1
|
|
361
|
use Date::Manip; |
|
1
|
|
|
|
|
100517
|
|
|
1
|
|
|
|
|
119
|
|
34
|
1
|
|
|
1
|
|
397
|
use HTTP::Request; # for rendering http queries |
|
1
|
|
|
|
|
732
|
|
|
1
|
|
|
|
|
20
|
|
35
|
1
|
|
|
1
|
|
5
|
use HTTP::Headers; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
18
|
|
36
|
1
|
|
|
1
|
|
431
|
use HTTP::Request::Common; # makes POST easier |
|
1
|
|
|
|
|
1597
|
|
|
1
|
|
|
|
|
48
|
|
37
|
1
|
|
|
1
|
|
5
|
use HTTP::Status; # for checking error codes |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
218
|
|
38
|
1
|
|
|
1
|
|
667
|
use LWP::UserAgent; # send http requests |
|
1
|
|
|
|
|
9853
|
|
|
1
|
|
|
|
|
27
|
|
39
|
1
|
|
|
1
|
|
494
|
use LWP::Protocol::https; # explicit include so we fail without https support |
|
1
|
|
|
|
|
67356
|
|
|
1
|
|
|
|
|
43
|
|
40
|
1
|
|
|
1
|
|
7
|
use URI::Escape; # excape special characters |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
53
|
|
41
|
1
|
|
|
1
|
|
764
|
use XML::DOM; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
use HTTP::OAIPMH::Log; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head2 METHODS |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head3 new(%args) |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
Create new HTTP::OAIPMH::Validator object and initialize counters. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
The following instance variables may be set via %args and have read-write |
51
|
|
|
|
|
|
|
accessors (via L): |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
base_url - base URL of the data provdier being validated |
54
|
|
|
|
|
|
|
run_id - UUID identifying the run (will be generated if none supplied) |
55
|
|
|
|
|
|
|
protocol_version - protocol version supported |
56
|
|
|
|
|
|
|
admin_email - admin email extracted from Identify response |
57
|
|
|
|
|
|
|
granularity - datestamp granularity (defaults to 'days', else 'seconds') |
58
|
|
|
|
|
|
|
uses_https - set true if the validator sees an https URL at any stage |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
debug - set true to add extra debugging output |
61
|
|
|
|
|
|
|
log - logging object (usually L) |
62
|
|
|
|
|
|
|
parser - XML DOM parser instance |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
identify_response - string of identify response (used for registration record) |
65
|
|
|
|
|
|
|
earliest_datestamp - value extracted from earliestDatestamp in Identify response |
66
|
|
|
|
|
|
|
namespace_id - if the oai-identifier is used then this records the namespace identifier extracted |
67
|
|
|
|
|
|
|
set_names - array of all the set names reported in listSets |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
example_record_id - example id used for tests that require a specific identifier |
70
|
|
|
|
|
|
|
example_set_spec - example setSpec ("&set=name") used for tests that require a set |
71
|
|
|
|
|
|
|
example_metadata_prefix - example metadataPrefix which defaults to 'oai_dc' |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
HTTP::OAIPMH::Validator->mk_accessors( qw( base_url protocol_version |
76
|
|
|
|
|
|
|
admin_email granularity uses_503 uses_https |
77
|
|
|
|
|
|
|
debug parser run_id ua allow_https doc save_all_responses |
78
|
|
|
|
|
|
|
response_number http_timeout max_retries max_size |
79
|
|
|
|
|
|
|
protocol guidelines |
80
|
|
|
|
|
|
|
identify_response earliest_datestamp namespace_id set_names |
81
|
|
|
|
|
|
|
example_record_id example_set_spec example_metadata_prefix |
82
|
|
|
|
|
|
|
log status |
83
|
|
|
|
|
|
|
) ); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub new { |
86
|
|
|
|
|
|
|
my $this=shift; |
87
|
|
|
|
|
|
|
my $class=ref($this) || $this; |
88
|
|
|
|
|
|
|
my $self={ |
89
|
|
|
|
|
|
|
'base_url' => undef, |
90
|
|
|
|
|
|
|
'protocol_version' => undef, |
91
|
|
|
|
|
|
|
# Repository features extracted |
92
|
|
|
|
|
|
|
'granularity' => 'days', # can also be "seconds" |
93
|
|
|
|
|
|
|
'uses_503' => 0, # set true if 503 responses ever used |
94
|
|
|
|
|
|
|
'uses_https' => 0, # set to true if https is ever used |
95
|
|
|
|
|
|
|
# Control |
96
|
|
|
|
|
|
|
'debug' => 0, |
97
|
|
|
|
|
|
|
'parser' => XML::DOM::Parser->new(), |
98
|
|
|
|
|
|
|
'run_id' => undef, |
99
|
|
|
|
|
|
|
'ua' => undef, |
100
|
|
|
|
|
|
|
'allow_https' => 0, # allow https URIs |
101
|
|
|
|
|
|
|
'doc' => undef, # current parsed xml document |
102
|
|
|
|
|
|
|
'save_all_responses' => 0, # set True to save all HTTP responses |
103
|
|
|
|
|
|
|
'response_number' => 1, # initial response number |
104
|
|
|
|
|
|
|
'http_timeout' => 600, |
105
|
|
|
|
|
|
|
'max_retries' => 5, # number of 503's in a row that we will accept |
106
|
|
|
|
|
|
|
'max_size' => 100000000, # max response size in bytes (100MB) |
107
|
|
|
|
|
|
|
'protocol' => 'http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm', #URL of protocol spec |
108
|
|
|
|
|
|
|
'guidelines' => 'http://www.openarchives.org/OAI/2.0/guidelines-repository.htm', #URL of repository guidelines doc |
109
|
|
|
|
|
|
|
# Results |
110
|
|
|
|
|
|
|
'namespace_id' => undef, |
111
|
|
|
|
|
|
|
'set_names' => [], |
112
|
|
|
|
|
|
|
'example_record_id' => undef, |
113
|
|
|
|
|
|
|
'example_set_spec' => undef, |
114
|
|
|
|
|
|
|
'example_metadata_prefix' => 'oai_dc', |
115
|
|
|
|
|
|
|
'log' => HTTP::OAIPMH::Log->new(), |
116
|
|
|
|
|
|
|
'status' => 'unknown', |
117
|
|
|
|
|
|
|
@_}; |
118
|
|
|
|
|
|
|
bless($self, $class); |
119
|
|
|
|
|
|
|
$self->setup_run_id if (not $self->run_id); |
120
|
|
|
|
|
|
|
$self->setup_user_agent if (not $self->ua); |
121
|
|
|
|
|
|
|
return($self); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head3 setup_run_id() |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Set a UUID for the run_id. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=cut |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub setup_run_id { |
131
|
|
|
|
|
|
|
my $self=shift; |
132
|
|
|
|
|
|
|
my $ug=Data::UUID->new; |
133
|
|
|
|
|
|
|
$self->run_id(lc($ug->to_string($ug->create))); |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head3 setup_user_agent() |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Setup L for the validator. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=cut |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub setup_user_agent { |
143
|
|
|
|
|
|
|
my $self=shift; |
144
|
|
|
|
|
|
|
my $ua = LWP::UserAgent->new(); # User agent, to render http requests |
145
|
|
|
|
|
|
|
$ua->timeout($self->http_timeout); # give responses 10 minutes |
146
|
|
|
|
|
|
|
$ua->max_size($self->max_size); # size limit ##seems to break http://eprints.soton.ac.uk/perl/oai2 [Simeon/2005-06-06] |
147
|
|
|
|
|
|
|
$ua->requests_redirectable([]); # we will do redirects manually |
148
|
|
|
|
|
|
|
$ua->agent('OAIPMH_Validator'); # set user agent |
149
|
|
|
|
|
|
|
$ua->from('https://groups.google.com/d/forum/oai-pmh'); # set a default From: address -> direct to google group for dicussion |
150
|
|
|
|
|
|
|
$self->ua($ua); |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=head3 abort($msg) |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Special purpose "die" routine because tests cannot continue. Logs |
157
|
|
|
|
|
|
|
failure and then dies. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=cut |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub abort { |
162
|
|
|
|
|
|
|
my $self=shift; |
163
|
|
|
|
|
|
|
my ($msg)=@_; |
164
|
|
|
|
|
|
|
$self->log->fail('ABORT: '.$msg); |
165
|
|
|
|
|
|
|
$self->status('FAILED'); |
166
|
|
|
|
|
|
|
die('ABORT: '.$msg."\n"); |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head3 run_complete_validation($skip_test_identify) |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Run all tests for a complete validation and return true is the data provider passes, |
173
|
|
|
|
|
|
|
false otherwise. All actions are logged and may be accessed to provide a report |
174
|
|
|
|
|
|
|
(including warnings that do not indicate failure) after the run. |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Arguments: |
177
|
|
|
|
|
|
|
$skip_identify - set true to skip the text_identify() step |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=cut |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub run_complete_validation { |
182
|
|
|
|
|
|
|
my $self=shift; |
183
|
|
|
|
|
|
|
my ($skip_identify)=@_; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
$self->response_number(1); |
186
|
|
|
|
|
|
|
$self->test_identify unless ($skip_identify); |
187
|
|
|
|
|
|
|
$self->test_list_sets; |
188
|
|
|
|
|
|
|
$self->test_list_identifiers; |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
my $baseURL = $self->base_url; |
191
|
|
|
|
|
|
|
my ($formats, $gotDC) = $self->test_list_metadata_formats; |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# If the repository doesn't support oai_dc then this is a failure (because |
194
|
|
|
|
|
|
|
# the standard demands it) but see whether we can find another metadataPrefix |
195
|
|
|
|
|
|
|
# in order to continue the tests |
196
|
|
|
|
|
|
|
if ( $gotDC ) { |
197
|
|
|
|
|
|
|
$self->log->pass("Data provider supports oai_dc metadataPrefix"); |
198
|
|
|
|
|
|
|
} else { |
199
|
|
|
|
|
|
|
if ($formats and $formats->getLength()>0) { |
200
|
|
|
|
|
|
|
$self->example_metadata_prefix( $formats->item(0)->getFirstChild->getData ); |
201
|
|
|
|
|
|
|
$self->log->fail("Data provider does not support the simple Dublin Core metadata ". |
202
|
|
|
|
|
|
|
"format with metadataPrefix oai_dc. Tests that require a ". |
203
|
|
|
|
|
|
|
"metadataPrefix to be specified will use '". |
204
|
|
|
|
|
|
|
$self->example_metadata_prefix."'"); |
205
|
|
|
|
|
|
|
} else { |
206
|
|
|
|
|
|
|
$self->log->fail("There are no metadata formats available to use with the GetRecord ". |
207
|
|
|
|
|
|
|
"request. The metadataPrefix ". |
208
|
|
|
|
|
|
|
$self->example_metadata_prefix. |
209
|
|
|
|
|
|
|
" will be used for later tests even though it seems unsupported."); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
my ($dateStamp)=$self->test_get_record($self->example_record_id,$self->example_metadata_prefix); |
214
|
|
|
|
|
|
|
$self->test_list_records($dateStamp,$self->example_metadata_prefix); |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# Check responses to erroneous queries |
217
|
|
|
|
|
|
|
$self->test_expected_errors($self->example_record_id); |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
if ($self->protocol_version eq '2.0') { |
220
|
|
|
|
|
|
|
$self->test_expected_v2_errors($self->earliest_datestamp,$self->example_metadata_prefix); |
221
|
|
|
|
|
|
|
# As of version 2.0, data providers must support HTTP POST requests |
222
|
|
|
|
|
|
|
$self->test_post_requests($self->example_metadata_prefix); |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
$self->test_resumption_tokens; |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# Getting here with no failures means that the data provider is compliant |
227
|
|
|
|
|
|
|
# (there may be warnings which are not counted in num_fail) |
228
|
|
|
|
|
|
|
$self->status( $self->log->num_fail==0 ? 'COMPLIANT' : 'FAILED' ); |
229
|
|
|
|
|
|
|
return($self->log->num_fail==0); |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head3 failures() |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
Return Markdown summary of failure log entries, along with the appropriate |
236
|
|
|
|
|
|
|
titles and request details. Will return empty string if there are no |
237
|
|
|
|
|
|
|
failures in the log. |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=cut |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub failures { |
242
|
|
|
|
|
|
|
my $self=shift; |
243
|
|
|
|
|
|
|
return($self->log->failures()); |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head3 summary() |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Return summary statistics for the validation in Markdown (designed to agree |
250
|
|
|
|
|
|
|
with conversion to HTML by L). |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=cut |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub summary { |
255
|
|
|
|
|
|
|
my $self=shift; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
my $sf=($self->log->num_fail>0?'failure':'success'); |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
my $str="\n## Summary - *$sf*\n\n"; |
260
|
|
|
|
|
|
|
my $namespace_id = $self->namespace_id; |
261
|
|
|
|
|
|
|
if ($namespace_id) { |
262
|
|
|
|
|
|
|
if ($namespace_id=~/\./) { #v2.0 |
263
|
|
|
|
|
|
|
$str.=" * Namespace declared for v2.0 oai-identifiers is $namespace_id\n"; |
264
|
|
|
|
|
|
|
} else { #v1.1 |
265
|
|
|
|
|
|
|
$str.=" * Namespace declared for v1.1 oai-identifiers (the repositoryIdentifier) is $namespace_id\n"; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
$str.=" * Uses 503 for flow control\n" if ($self->uses_503); |
269
|
|
|
|
|
|
|
$str.=" * Uses https URIs (not specified in protocol)\n" if ($self->uses_https); |
270
|
|
|
|
|
|
|
$str.=" * Total tests passed: ".$self->log->num_pass."\n"; |
271
|
|
|
|
|
|
|
$str.=" * Total warnings: ".$self->log->num_warn."\n"; |
272
|
|
|
|
|
|
|
$str.=" * Total error count: ".$self->log->num_fail."\n"; |
273
|
|
|
|
|
|
|
$str.=" * Validation status: ".($self->status || 'unknown')."\n"; |
274
|
|
|
|
|
|
|
return($str); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=head2 METHODS TESTING SPECIFIC OAI-PMH VERBS |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=head3 test_identify() |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
Check response to an Identify request. Returns false if tests cannot |
283
|
|
|
|
|
|
|
continue, true otherwise. |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
Side effects based on values extracted: |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
- $self->admin_email set to email extracted from adminEmail element |
288
|
|
|
|
|
|
|
- $self->granularity set to 'days' or 'seconds' |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=cut |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
sub test_identify { |
293
|
|
|
|
|
|
|
my $self=shift; |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
my $cantContinue=0; |
296
|
|
|
|
|
|
|
$self->log->start("Checking Identify response"); |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# Send the verb request to the base URL - vet extracts the email address |
299
|
|
|
|
|
|
|
my $burl=$self->base_url; |
300
|
|
|
|
|
|
|
my $req = $burl."?verb=Identify"; |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
my $response = $self->make_request($req); #don't use make_request_and_validate() just do simplest thing here |
303
|
|
|
|
|
|
|
unless ($response->is_success) { |
304
|
|
|
|
|
|
|
my $r="Server at base URL '$burl' failed to respond to Identify. The HTTP GET request with URL $req received response code '".$response->code()."'."; |
305
|
|
|
|
|
|
|
if ($response->code() == 301) { |
306
|
|
|
|
|
|
|
$self->log->fail("$r HTTP code 301 'Moved Permanently' is not widely supported by ". |
307
|
|
|
|
|
|
|
"harvesters and is anyway inappropriate for registration of a ". |
308
|
|
|
|
|
|
|
"service. If requests must be redirected then an HTTP response 302 ". |
309
|
|
|
|
|
|
|
"may be used as outlined in the guidelines [". |
310
|
|
|
|
|
|
|
$self->guidelines."#LoadBalancing]."); |
311
|
|
|
|
|
|
|
} else { |
312
|
|
|
|
|
|
|
$self->log->fail($r); |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
$self->abort("Failed to get Identify response from server at base URL '$burl'.\n"); |
315
|
|
|
|
|
|
|
return; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# Parse the XML response |
319
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
320
|
|
|
|
|
|
|
$self->log->fail("Failed to parse Identify response"); |
321
|
|
|
|
|
|
|
$self->abort("Failed to parse Identify response from server at base URL '$burl'.\n"); |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# Check that this really is a Identify response |
325
|
|
|
|
|
|
|
my $oaipmhNode=$self->doc->getFirstChild(); |
326
|
|
|
|
|
|
|
# skip over and processing instructions such as XML stylesheets |
327
|
|
|
|
|
|
|
while ($oaipmhNode->getNodeType==PROCESSING_INSTRUCTION_NODE or |
328
|
|
|
|
|
|
|
$oaipmhNode->getNodeType==COMMENT_NODE) { |
329
|
|
|
|
|
|
|
$oaipmhNode=$oaipmhNode->getNextSibling(); |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
unless (defined $oaipmhNode and $oaipmhNode->getNodeName eq 'OAI-PMH') { |
332
|
|
|
|
|
|
|
$self->log->fail("Identify response does not have OAI-PMH as root element! ". |
333
|
|
|
|
|
|
|
"Found node named '".$oaipmhNode->getNodeName."' instead"); |
334
|
|
|
|
|
|
|
$self->abort("Identify response from server at base URL '$burl' does not have ". |
335
|
|
|
|
|
|
|
"OAI-PMH as root element!\n"); |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
my $identifyNode=$oaipmhNode->getElementsByTagName('Identify',0); |
338
|
|
|
|
|
|
|
unless ($identifyNode->getLength()>0) { |
339
|
|
|
|
|
|
|
my $errorNode=$oaipmhNode->getElementsByTagName('error',0); |
340
|
|
|
|
|
|
|
if ($errorNode->getLength()>0) { |
341
|
|
|
|
|
|
|
# give specific message if response is and error |
342
|
|
|
|
|
|
|
$self->log->fail("Error response to Identify request!\n"); |
343
|
|
|
|
|
|
|
$self->abort("Error response to Identify request from server at base URL '$burl'.\n"); |
344
|
|
|
|
|
|
|
return; |
345
|
|
|
|
|
|
|
} else { |
346
|
|
|
|
|
|
|
$self->log->fail("Identify response does not contain <Identify> block.\n"); |
347
|
|
|
|
|
|
|
$self->abort("Identify response does not contain Identify block from server at base URL '$burl'.\n"); |
348
|
|
|
|
|
|
|
return; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# Extract admin email and protocol version numbers, check |
353
|
|
|
|
|
|
|
my ($admin_email,$email_error)=$self->get_admin_email; |
354
|
|
|
|
|
|
|
if (not $admin_email or $email_error) { |
355
|
|
|
|
|
|
|
$self->abort(($email_error || "Failed to extract adminEmail").", aborting.\n"); |
356
|
|
|
|
|
|
|
return; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
$self->admin_email($admin_email); |
359
|
|
|
|
|
|
|
$self->check_protocol_version; # bails if not Version 2.0 |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# URL is valid, Identify response was provided, extract content as string |
362
|
|
|
|
|
|
|
$self->identify_response( $response->content ); |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
my $baseURL = $self->doc->getElementsByTagName('baseURL'); |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# BUG FOUND ON AUGUST 26, 2002: empty baseURL still returns length > 0 |
367
|
|
|
|
|
|
|
# So it is necessary to explicity check for an empty element. |
368
|
|
|
|
|
|
|
if ( $baseURL->getLength() > 0 ) { |
369
|
|
|
|
|
|
|
$baseURL = $baseURL->item(0)->getFirstChild; |
370
|
|
|
|
|
|
|
if ( $baseURL ) { $baseURL = $baseURL->getData; } |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# $burl is the one given on the form; $baseURL is the one in the XML doc. |
373
|
|
|
|
|
|
|
if ($burl eq $baseURL) { |
374
|
|
|
|
|
|
|
$self->log->pass("baseURL supplied matches the Identify response"); |
375
|
|
|
|
|
|
|
} else { |
376
|
|
|
|
|
|
|
# report the error, but keep the form URL |
377
|
|
|
|
|
|
|
# (at least it answered Identify!) |
378
|
|
|
|
|
|
|
$self->log->fail("baseURL supplied '$burl' does not match the baseURL in the ". |
379
|
|
|
|
|
|
|
"Identify response '$baseURL'. The baseURL you enter must EXACTLY ". |
380
|
|
|
|
|
|
|
"match the baseURL returned in the Identify response. It must ". |
381
|
|
|
|
|
|
|
"match in case (http://Wibble.org/ does not match http://wibble.org/) ". |
382
|
|
|
|
|
|
|
"and include any trailing slashes etc."); |
383
|
|
|
|
|
|
|
$cantContinue++; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# For Version 2.0, Check for seconds granularity |
388
|
|
|
|
|
|
|
if ($self->protocol_version eq '2.0') { |
389
|
|
|
|
|
|
|
my $gran_el = $self->doc->getElementsByTagName('granularity'); |
390
|
|
|
|
|
|
|
if ($self->parse_granularity($gran_el)) { |
391
|
|
|
|
|
|
|
$self->log->pass("Datestamp granularity is '".$self->granularity."'"); |
392
|
|
|
|
|
|
|
} else { |
393
|
|
|
|
|
|
|
$cantContinue++; |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# For an exception check new to Version 2.0, extract the earliest date |
398
|
|
|
|
|
|
|
# and also check that its granularity is right |
399
|
|
|
|
|
|
|
if (my $err=$self->get_earliest_datestamp) { |
400
|
|
|
|
|
|
|
$self->log->fail("Bad earliestDatestamp: $err"); |
401
|
|
|
|
|
|
|
$cantContinue++; |
402
|
|
|
|
|
|
|
} else { |
403
|
|
|
|
|
|
|
$self->log->pass("Extracted earliestDatestamp ".$self->earliest_datestamp); |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# Check for OAI-identifier. If already in use by another base URL, bump |
407
|
|
|
|
|
|
|
# the error count to avoid having this URL register. |
408
|
|
|
|
|
|
|
# |
409
|
|
|
|
|
|
|
my $oaiIds = $self->doc->getElementsByTagName('oai-identifier'); |
410
|
|
|
|
|
|
|
if ($oaiIds and $oaiIds->getLength()>0) { |
411
|
|
|
|
|
|
|
if ($oaiIds->getLength()>1) { |
412
|
|
|
|
|
|
|
$self->log->fail("Found more than one oai-identifier element. The intention ". |
413
|
|
|
|
|
|
|
"is that this declaration only be used by repositories ". |
414
|
|
|
|
|
|
|
"declaring the use of a single identifier namespace."); |
415
|
|
|
|
|
|
|
$cantContinue++; |
416
|
|
|
|
|
|
|
} else { |
417
|
|
|
|
|
|
|
$oaiIds=$oaiIds->item(0); |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
# Now find out if this is v1.1 or v2.0 |
420
|
|
|
|
|
|
|
my $oai_id_version='2.0'; |
421
|
|
|
|
|
|
|
if (my $xmlns=$oaiIds->getAttribute('xmlns')) { #FIXME this requires default namespace to be set to oai-id |
422
|
|
|
|
|
|
|
if ($xmlns eq 'http://www.openarchives.org/OAI/2.0/oai-identifier') { |
423
|
|
|
|
|
|
|
$oai_id_version='2.0'; |
424
|
|
|
|
|
|
|
$self->log->pass("oai-identifier description for version $oai_id_version is being used"); |
425
|
|
|
|
|
|
|
} elsif ($xmlns eq 'http://www.openarchives.org/OAI/1.1/oai-identifier') { |
426
|
|
|
|
|
|
|
$oai_id_version='1.1'; |
427
|
|
|
|
|
|
|
$self->log->pass("oai-identifier description for version $oai_id_version is being used"); |
428
|
|
|
|
|
|
|
} elsif ($xmlns) { |
429
|
|
|
|
|
|
|
$self->log->fail("Unrecognized namespace declaration '$xmlns' for ". |
430
|
|
|
|
|
|
|
"oai-identifier, expected ". |
431
|
|
|
|
|
|
|
"http://www.openarchives.org/OAI/2.0/oai-identifier ". |
432
|
|
|
|
|
|
|
"(for v2.0) or ". |
433
|
|
|
|
|
|
|
"http://www.openarchives.org/OAI/1.1/oai-identifier ". |
434
|
|
|
|
|
|
|
"(for v1.1). Assuming version $oai_id_version."); |
435
|
|
|
|
|
|
|
} else { |
436
|
|
|
|
|
|
|
$self->log->fail("No namespace declaration found for oai-identifier, expected ". |
437
|
|
|
|
|
|
|
"http://www.openarchives.org/OAI/2.0/oai-identifier ". |
438
|
|
|
|
|
|
|
"(for v2.0) or ". |
439
|
|
|
|
|
|
|
"http://www.openarchives.org/OAI/1.1/oai-identifier ". |
440
|
|
|
|
|
|
|
"(for v1.1). Assuming version $oai_id_version/"); |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
} else { |
443
|
|
|
|
|
|
|
$self->log->fail("Can't find namespace declaration for the oai-identifier description. ". |
444
|
|
|
|
|
|
|
"This must be added as ". |
445
|
|
|
|
|
|
|
"(or 1.1), there will likely also be schema validation weeors. Will ". |
446
|
|
|
|
|
|
|
"assume that the oai-identifier is version $oai_id_version for ". |
447
|
|
|
|
|
|
|
"later tests"); |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
my $repoIds = $oaiIds->getElementsByTagName('repositoryIdentifier'); |
450
|
|
|
|
|
|
|
if ($repoIds) { |
451
|
|
|
|
|
|
|
my $temp = $repoIds->item(0); |
452
|
|
|
|
|
|
|
if (!defined($temp)) { |
453
|
|
|
|
|
|
|
$self->log->fail("No namespace-identifier (repositoryIdentifier element) in ". |
454
|
|
|
|
|
|
|
"the oai-identifier block of the Identify description"); |
455
|
|
|
|
|
|
|
return; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
my $nsel = $temp->getFirstChild; |
458
|
|
|
|
|
|
|
unless ( $nsel ) { |
459
|
|
|
|
|
|
|
# Empty repositoryIdentifier element, squawk loudly |
460
|
|
|
|
|
|
|
$self->log->fail("Empty namespace-identifier (repositoryIdentifier element) in ". |
461
|
|
|
|
|
|
|
"the oai-identifier block of the Identify description"); |
462
|
|
|
|
|
|
|
return; |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
my $namespace_id = $nsel->getData; |
465
|
|
|
|
|
|
|
# Having validated the value of namespace-identifier, we can now tell if it is v1.1 or v2.0 based |
466
|
|
|
|
|
|
|
# on whether is has a . in it (i.e. if /\./) |
467
|
|
|
|
|
|
|
if ($oai_id_version eq '2.0') { |
468
|
|
|
|
|
|
|
#schema: |
469
|
|
|
|
|
|
|
unless ($namespace_id=~/^[a-z][a-z0-9\-]*(\.[a-z][a-z0-9\-]+)+$/i) { |
470
|
|
|
|
|
|
|
$self->log->fail("Bad namespace-identifier (repositoryIdentifier element) ". |
471
|
|
|
|
|
|
|
"'$namespace_id' in oai-identifier declaration. See section ". |
472
|
|
|
|
|
|
|
"2.1 of the OAI Identifier specification for details ". |
473
|
|
|
|
|
|
|
"(http://www.openarchives.org/OAI/2.0/guidelines-oai-identifier.htm)."); |
474
|
|
|
|
|
|
|
$cantContinue++; |
475
|
|
|
|
|
|
|
} else { |
476
|
|
|
|
|
|
|
$self->log->pass("namespace-identifier (repositoryIdentifier element) in oai-identifier ". |
477
|
|
|
|
|
|
|
"declaration is $namespace_id"); |
478
|
|
|
|
|
|
|
$self->namespace_id( $namespace_id ); |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
} else { #v1.1 schema: |
481
|
|
|
|
|
|
|
unless ($namespace_id=~/^[a-z0-9]+$/i) { |
482
|
|
|
|
|
|
|
$self->log->fail("Bad namespace-identifier (repositoryIdentifier element) ". |
483
|
|
|
|
|
|
|
"'$namespace_id' in oai-identifier declaration. See section ". |
484
|
|
|
|
|
|
|
"2.1 of the OAI Identifier specification for details ". |
485
|
|
|
|
|
|
|
"(http://www.openarchives.org/OAI/1.1/guidelines-oai-identifier.htm)."); |
486
|
|
|
|
|
|
|
$cantContinue++; |
487
|
|
|
|
|
|
|
} else { |
488
|
|
|
|
|
|
|
$self->log->pass("namespace-identifier (repositoryIdentifier element) in oai-identifier ". |
489
|
|
|
|
|
|
|
"declaration is $namespace_id"); |
490
|
|
|
|
|
|
|
$self->namespace_id( $namespace_id ); |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
return(not $cantContinue); |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=head3 test_list_sets() |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Check response to the ListSets verb. |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
Save the setSpecs for later use. |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
Note that the any set might be empty. So if test_list_identifiers doesn't |
507
|
|
|
|
|
|
|
get a match, we need to try the second set identifier, and so on. |
508
|
|
|
|
|
|
|
So keep a list of the setSpec elements. |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=cut |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
sub test_list_sets { |
513
|
|
|
|
|
|
|
my $self=shift; |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
$self->log->start("Checking ListSets response"); |
516
|
|
|
|
|
|
|
my $req=$self->base_url."?verb=ListSets"; |
517
|
|
|
|
|
|
|
my $response = $self->make_request_and_validate("ListSets", $req); |
518
|
|
|
|
|
|
|
unless ($response) { |
519
|
|
|
|
|
|
|
$self->log->fail("Can't check set names"); |
520
|
|
|
|
|
|
|
return; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
524
|
|
|
|
|
|
|
$self->log->fail("Can't parse response"); |
525
|
|
|
|
|
|
|
$self->abort("failed to parse response to ListSets"); |
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
$self->set_names( [] ); |
529
|
|
|
|
|
|
|
$self->example_set_spec( '' ); |
530
|
|
|
|
|
|
|
my $set_elements=$self->doc->getElementsByTagName('setSpec'); |
531
|
|
|
|
|
|
|
if (not defined($set_elements) or ($set_elements->getLength<1)) { |
532
|
|
|
|
|
|
|
# No setSpec elements, so there should be an element |
533
|
|
|
|
|
|
|
my $details={}; |
534
|
|
|
|
|
|
|
if ($self->is_error_response($details)) { |
535
|
|
|
|
|
|
|
if ($details->{'noSetHierarchy'}) { |
536
|
|
|
|
|
|
|
$self->log->pass("Repository does not support sets and the is correctly reported with a ". |
537
|
|
|
|
|
|
|
"noSetHierarchy exception in the ListSets response"); |
538
|
|
|
|
|
|
|
} else { |
539
|
|
|
|
|
|
|
$self->log->fail("Failed to extract any setSpec elements from ListSets ". |
540
|
|
|
|
|
|
|
"but did not find a noSetHierarchy exception. Found instead a '". |
541
|
|
|
|
|
|
|
join(', ',keys %{$details})."' exception(s). See <". |
542
|
|
|
|
|
|
|
$self->protocol."#ListSets>."); |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
} else { |
545
|
|
|
|
|
|
|
$self->log->fail("Failed to extract any setSpec elements from ListSets but did not ". |
546
|
|
|
|
|
|
|
"find an exception message. If sets are not supported by the ". |
547
|
|
|
|
|
|
|
"repository then the ListSets response must be the noSetHierarchy ". |
548
|
|
|
|
|
|
|
"error. See <".$self->protocol."#ListSets>."); |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
} else { |
551
|
|
|
|
|
|
|
# Have setSpec elements, record all set names and pick an example set spec |
552
|
|
|
|
|
|
|
for (my $j=0; $j<$set_elements->getLength; $j++) { |
553
|
|
|
|
|
|
|
my $set_name=$set_elements->item($j)->getFirstChild->getData; |
554
|
|
|
|
|
|
|
##FIXME - should validate each set name |
555
|
|
|
|
|
|
|
push(@{$self->set_names},$set_name); |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
# Sanity check, did we get the number we expected? |
558
|
|
|
|
|
|
|
my $num_sets=scalar(@{$self->set_names}); |
559
|
|
|
|
|
|
|
if ($num_sets!=$set_elements->getLength) { |
560
|
|
|
|
|
|
|
$self->log->fail("Failed to extract the expected number of set names (got ". |
561
|
|
|
|
|
|
|
"$num_sets, expected ".$set_elements->getLength.")"); |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
if ($num_sets>0) { |
564
|
|
|
|
|
|
|
$self->example_set_spec( "&set=".$self->set_names->[0] ); |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
my $msg=''; |
567
|
|
|
|
|
|
|
for (my $j=0; $j<$num_sets and $j<3; $j++) { $msg.=" ".$self->set_names->[$j]; } |
568
|
|
|
|
|
|
|
$msg.=" ..." if ($num_sets>3); |
569
|
|
|
|
|
|
|
$self->log->pass("Extracted $num_sets set names: {$msg }, will use setSpec ". |
570
|
|
|
|
|
|
|
$self->example_set_spec." in tests"); |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=head3 test_list_identifiers() |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
Check response to ListIdentifiers and record an example record id in |
578
|
|
|
|
|
|
|
$self->example_record_id to be used in other tests. |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
If there are no identifiers, but the response is legal, stop the test with |
581
|
|
|
|
|
|
|
errors=0, number of verbs checked is three. |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
As of version 2.0, a metadataPrefix argument is required. Unfortunately |
584
|
|
|
|
|
|
|
we need to call test_list_identifiers first in order to get an id for |
585
|
|
|
|
|
|
|
GetRecord, so we simply use oai_dc. |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=cut |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
sub test_list_identifiers { |
590
|
|
|
|
|
|
|
my $self=shift; |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
$self->log->start("Checking ListIdentifiers response"); |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
### FIXME -- skip the set= restriction because this code doesn't |
595
|
|
|
|
|
|
|
### FIXME work right for set hierarchies - 2002-10-17 |
596
|
|
|
|
|
|
|
### FIXME 2015-01-02 - put/left in, is it OK? |
597
|
|
|
|
|
|
|
my $set_spec = $self->example_set_spec; |
598
|
|
|
|
|
|
|
my $req = $self->base_url."?verb=ListIdentifiers&metadataPrefix=oai_dc".$set_spec; |
599
|
|
|
|
|
|
|
my $response = $self->make_request_and_validate("ListIdentifiers", $req); |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
# Note: $response will come back null if an error code was returned |
602
|
|
|
|
|
|
|
# An error code of "noRecordsMatch" comes back if specified set is |
603
|
|
|
|
|
|
|
# empty. In that case we should drop the set and try again. |
604
|
|
|
|
|
|
|
if ( $set_spec and (! $response or $self->is_no_records_match ) ) { |
605
|
|
|
|
|
|
|
$self->log->note("Empty set made ListIdentifiers fail - trying other sets..."); |
606
|
|
|
|
|
|
|
my $i=1; |
607
|
|
|
|
|
|
|
my $m = scalar(@{$self->set_names}); |
608
|
|
|
|
|
|
|
while ($i<$m and not $response ) { |
609
|
|
|
|
|
|
|
$set_spec = "&set=".$self->set_names->[$i]; |
610
|
|
|
|
|
|
|
$req = $self->base_url."?verb=ListIdentifiers&metadataPrefix=oai_dc".$set_spec; |
611
|
|
|
|
|
|
|
$response = $self->make_request_and_validate("ListIdentifiers", $req); |
612
|
|
|
|
|
|
|
$self->log->note("Trying set ".$set_spec); |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
# If we were successful then set the example_set_spec for any future tests |
615
|
|
|
|
|
|
|
if ($response) { |
616
|
|
|
|
|
|
|
$self->example_set_spec( $set_spec ); |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
# None of the sets had any identifiers in them. Try the whole entire |
621
|
|
|
|
|
|
|
# list of identifiers. |
622
|
|
|
|
|
|
|
if ( $set_spec and !$response ) { |
623
|
|
|
|
|
|
|
$self->log->note("Last attempt is without any sets..."); |
624
|
|
|
|
|
|
|
$req = $self->base_url."?verb=ListIdentifiers&metadataPrefix=oai_dc"; |
625
|
|
|
|
|
|
|
$response = $self->make_request_and_validate("ListIdentifiers",$req); |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
# Now we are for real in trouble if $response is null |
629
|
|
|
|
|
|
|
unless ($response) { |
630
|
|
|
|
|
|
|
$self->log->fail("No ListIdentifiers response with content"); |
631
|
|
|
|
|
|
|
$self->log->note("The base URL did not respond to the ListIdentifiers verb.". |
632
|
|
|
|
|
|
|
"Without that, we cannot proceed with the validation test. Exiting."); |
633
|
|
|
|
|
|
|
$self->abort("The base URL did not respond to the ListIdentifiers verb. Without that, we cannot proceed with the validation test. Exiting."); |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
# Grab the first identifier for later use |
637
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
638
|
|
|
|
|
|
|
$self->log->fail("Can't parse ListIdentifiers response"); |
639
|
|
|
|
|
|
|
$self->abort("unable to parse response"); |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
# |
642
|
|
|
|
|
|
|
# Now look for the identifier of a non-deleted record |
643
|
|
|
|
|
|
|
# If there are no identifiers to be harvested, we cannot complete the |
644
|
|
|
|
|
|
|
# validation test. |
645
|
|
|
|
|
|
|
# |
646
|
|
|
|
|
|
|
# FIXME - this still doesn't solve the problem that there may be no |
647
|
|
|
|
|
|
|
# non-deleted items listed in the particular response or partial |
648
|
|
|
|
|
|
|
# response that we are looking at [Simeon/2005-07-20] |
649
|
|
|
|
|
|
|
# |
650
|
|
|
|
|
|
|
my $headers = $self->doc->getElementsByTagName('header'); |
651
|
|
|
|
|
|
|
my $h; |
652
|
|
|
|
|
|
|
my $record_id; |
653
|
|
|
|
|
|
|
for ($h=0; $h<$headers->getLength(); $h++) { |
654
|
|
|
|
|
|
|
my $hdnode=$headers->item($h); |
655
|
|
|
|
|
|
|
my $idnode=$hdnode->getElementsByTagName('identifier',0); |
656
|
|
|
|
|
|
|
next unless ($idnode and $idnode->getLength()==1); |
657
|
|
|
|
|
|
|
$record_id=$idnode->item(0)->getFirstChild->getData; |
658
|
|
|
|
|
|
|
last unless ($hdnode->getAttribute('status') and $hdnode->getAttribute('status') eq 'deleted'); |
659
|
|
|
|
|
|
|
$self->log->warn("Identifier ".($h+1).", '$record_id', is for a deleted record, skipping"); |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
if ($h==$headers->getLength()) { |
662
|
|
|
|
|
|
|
# No identifiers were in the ListIdentifiers response. Further testing |
663
|
|
|
|
|
|
|
# is not possible. |
664
|
|
|
|
|
|
|
$self->log->fail("The response to the ListIdentifiers verb with metadataPrefix oai_dc ". |
665
|
|
|
|
|
|
|
"contained no identifiers. Without at least one identifier, we cannot ". |
666
|
|
|
|
|
|
|
"proceed with the validation tests."); |
667
|
|
|
|
|
|
|
$self->abort("No identifiers in response to ListIdentifiers. Without an identifier ". |
668
|
|
|
|
|
|
|
"we cannot proceed with validation tests."); |
669
|
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
|
$self->log->pass("Good ListIdentifiers response, extracted id '$record_id' for use in future tests."); |
671
|
|
|
|
|
|
|
$self->example_record_id( $record_id ); |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
=head3 test_list_metadata_formats() |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
Vet the verb as usual, and then make sure that Dublin Core in included |
678
|
|
|
|
|
|
|
In particular, we will check the metadata formats available for "record_id", |
679
|
|
|
|
|
|
|
obtained from checking the ListIdentifier verb. |
680
|
|
|
|
|
|
|
Side effect: save available formats for later use (global "formats"). |
681
|
|
|
|
|
|
|
NOTE:if there are no formats, error will be picked up by getRecord |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
=cut |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
sub test_list_metadata_formats { |
686
|
|
|
|
|
|
|
my $self=shift; |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
$self->log->start("Checking ListMetadataFormats response"); |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
# Do we have an example record id to check with? |
691
|
|
|
|
|
|
|
my $record_id = $self->example_record_id; |
692
|
|
|
|
|
|
|
unless ($record_id) { |
693
|
|
|
|
|
|
|
$self->log->fail("Cannot check ListMetadataFormats as we do not have an example id"); |
694
|
|
|
|
|
|
|
return; |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
my $req = $self->base_url."?verb=ListMetadataFormats&identifier=".url_encode($record_id); |
698
|
|
|
|
|
|
|
my $response = $self->make_request_and_validate("ListMetadataFormats",$req); |
699
|
|
|
|
|
|
|
unless ($response) { |
700
|
|
|
|
|
|
|
$self->log->fail("Can't check metadataFormats available for item $record_id, no ". |
701
|
|
|
|
|
|
|
"response to ListMetadataFormats request."); |
702
|
|
|
|
|
|
|
return; |
703
|
|
|
|
|
|
|
} |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
# Check for Dublin Core |
706
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
707
|
|
|
|
|
|
|
$self->log->fail("Can't parse response to ListMetadataFormats request for item $record_id."); |
708
|
|
|
|
|
|
|
return; |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
my $formats = $self->doc->getElementsByTagName('metadataPrefix'); |
712
|
|
|
|
|
|
|
unless ($formats->getLength() > 0) { |
713
|
|
|
|
|
|
|
$self->log->fail("No metadata formats are listed in the response to a ListMetadataFormats ". |
714
|
|
|
|
|
|
|
"request for item $record_id."); |
715
|
|
|
|
|
|
|
return; |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
if ($self->debug) { |
719
|
|
|
|
|
|
|
$self->log->note("debug: ".$formats->getLength()." formats supported for identifier '$record_id'"); |
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
my $gotDC=0; |
722
|
|
|
|
|
|
|
for my $i (0..$formats->getLength()-1) { |
723
|
|
|
|
|
|
|
my $format = $formats->item($i); |
724
|
|
|
|
|
|
|
#assume this node has only one child, and its data for a format |
725
|
|
|
|
|
|
|
if ( $format->getFirstChild->getData =~ /^\s*oai_dc\s*$/ ) { |
726
|
|
|
|
|
|
|
$gotDC = 1; |
727
|
|
|
|
|
|
|
last; |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
} |
730
|
|
|
|
|
|
|
if ($gotDC) { |
731
|
|
|
|
|
|
|
$self->log->pass("Good ListMetadataFormats response, includes oai_dc"); |
732
|
|
|
|
|
|
|
} else { |
733
|
|
|
|
|
|
|
$self->log->pass("Good ListMetadataFormats response, BUT DID NOT FIND oai_dc"); |
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
return($formats, $gotDC); |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=head3 test_get_record($record_id, $format) |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
Try to get record $record_id in $format. |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
If either $record_id or $format are undef then we have an error |
744
|
|
|
|
|
|
|
right off the bat. Else make the request and return the |
745
|
|
|
|
|
|
|
datestamp of the record. |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
=cut |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
sub test_get_record { |
750
|
|
|
|
|
|
|
my $self=shift; |
751
|
|
|
|
|
|
|
my ($record_id, $format)=@_; |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
$self->log->start("Checking GetRecord response"); |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
unless (defined $format) { |
756
|
|
|
|
|
|
|
$self->log->fail("Skipping GetRecord test as no metadata format is listed as being available."); |
757
|
|
|
|
|
|
|
return; |
758
|
|
|
|
|
|
|
} |
759
|
|
|
|
|
|
|
unless (defined $record_id) { |
760
|
|
|
|
|
|
|
$self->log->fail("Skipping GetRecord test as no items are listed as having metadata available."); |
761
|
|
|
|
|
|
|
return; |
762
|
|
|
|
|
|
|
} |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
my $numerr=0; #count up non-fatal errors |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
my $req = $self->base_url."?verb=GetRecord&identifier=".url_encode($record_id)."&metadataPrefix=".url_encode($format); |
767
|
|
|
|
|
|
|
my $response = $self->make_request_and_validate("GetRecord", $req); |
768
|
|
|
|
|
|
|
unless ($response) { |
769
|
|
|
|
|
|
|
$self->log->fail("Can't complete datestamp check for GetRecord"); |
770
|
|
|
|
|
|
|
$self->abort("Can't complete datestamp check for GetRecord"); |
771
|
|
|
|
|
|
|
} |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
# Save the datestamp for later use by ListRecords |
774
|
|
|
|
|
|
|
# As of version 2.0, Identify response can have a granularity and the |
775
|
|
|
|
|
|
|
# datestamp MUST be in the finest granularity supported by the repository |
776
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
777
|
|
|
|
|
|
|
$self->log->fail("Can't parse response"); |
778
|
|
|
|
|
|
|
$self->abort("Unable to parse response from GetRecord"); |
779
|
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
if (my $msg=$self->is_error_response) { |
782
|
|
|
|
|
|
|
$self->log->fail("The response to the GetRecord verb was the OAI exception $msg. ". |
783
|
|
|
|
|
|
|
"It is this not possible to extract a valid datestamp for remaining tests"); |
784
|
|
|
|
|
|
|
$self->abort("Unexpected OAI exception response"); |
785
|
|
|
|
|
|
|
} |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
my $datestamps = $self->doc->getElementsByTagName('datestamp'); |
788
|
|
|
|
|
|
|
# If there is no there is no datestamp ... but there should be a record |
789
|
|
|
|
|
|
|
unless ( $datestamps->getLength() > 0 ) { |
790
|
|
|
|
|
|
|
$self->log->fail("The response to the GetRecord verb did not have a datestamp, which is ". |
791
|
|
|
|
|
|
|
"needed to continue checking verbs."); |
792
|
|
|
|
|
|
|
$self->abort("No datestamp in the response for GetRecord"); |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
my $datestamp=undef; |
796
|
|
|
|
|
|
|
eval { |
797
|
|
|
|
|
|
|
$datestamp = $datestamps->item(0)->getFirstChild->getData; |
798
|
|
|
|
|
|
|
}; |
799
|
|
|
|
|
|
|
if (not defined($datestamp)) { |
800
|
|
|
|
|
|
|
$self->log->fail("Failed to extract datestamp from the GetRecord response. See <". |
801
|
|
|
|
|
|
|
$self->protocol."#Dates>."); |
802
|
|
|
|
|
|
|
$numerr++; |
803
|
|
|
|
|
|
|
} elsif ( my $granularity=$self->get_datestamp_granularity($datestamp) ) { |
804
|
|
|
|
|
|
|
$self->log->pass("Datestamp in GetRecord response ($datestamp) has the correct form for ". |
805
|
|
|
|
|
|
|
"$granularity granularity."); |
806
|
|
|
|
|
|
|
if ( $granularity eq $self->granularity ) { |
807
|
|
|
|
|
|
|
# The granularity in v2.0 must match the finest granularity supported (see sec3.3.2) |
808
|
|
|
|
|
|
|
$self->log->pass("Datestamp in GetRecord response ($datestamp) matched the ". |
809
|
|
|
|
|
|
|
$self->granularity." granularity specified in the Identify response. "); |
810
|
|
|
|
|
|
|
} else { |
811
|
|
|
|
|
|
|
$self->log->fail("Datestamp in GetRecord response ($datestamp) is not consistent ". |
812
|
|
|
|
|
|
|
"with the ".$self->granularity." granularity specified in the ". |
813
|
|
|
|
|
|
|
"Identify response"); |
814
|
|
|
|
|
|
|
$numerr++; |
815
|
|
|
|
|
|
|
} |
816
|
|
|
|
|
|
|
} else { |
817
|
|
|
|
|
|
|
$self->log->fail("Datestamp in GetRecord response ($datestamp) is not valid. See <". |
818
|
|
|
|
|
|
|
$self->protocol."#Dates>."); |
819
|
|
|
|
|
|
|
$numerr++; |
820
|
|
|
|
|
|
|
} |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
# As of OAI-PMH Version 2.0, GetRecord must return a set spec if the |
823
|
|
|
|
|
|
|
# repository supports sets and the item is in a set |
824
|
|
|
|
|
|
|
if (not $self->example_set_spec) { |
825
|
|
|
|
|
|
|
$self->log->pass("Valid GetRecord response") unless ($numerr>0); |
826
|
|
|
|
|
|
|
return($datestamp); |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
my $set_list = $self->doc->getElementsByTagName('setSpec'); |
830
|
|
|
|
|
|
|
my $set_value = $self->example_set_spec; |
831
|
|
|
|
|
|
|
$set_value =~ s/&set=//; |
832
|
|
|
|
|
|
|
$self->log->note("Looking for set '".$set_value."' or a descendant set.") if $self->debug; |
833
|
|
|
|
|
|
|
my $i; |
834
|
|
|
|
|
|
|
my $subset_str = ''; |
835
|
|
|
|
|
|
|
for ($i=0; $i<$set_list->getLength; $i++) { |
836
|
|
|
|
|
|
|
my $s = $set_list->item($i)->getFirstChild->getData; |
837
|
|
|
|
|
|
|
last if ($s eq $set_value); |
838
|
|
|
|
|
|
|
if ($s =~ m/^${set_value}:/) { |
839
|
|
|
|
|
|
|
$subset_str = " (implied by a descendant setSpec)"; |
840
|
|
|
|
|
|
|
last; |
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
if ($i==$set_list->getLength) { # error |
844
|
|
|
|
|
|
|
$self->log->fail("Expected setSpec was missing from the response. The GetRecord ". |
845
|
|
|
|
|
|
|
"response for identifier $record_id did not contain a set ". |
846
|
|
|
|
|
|
|
"specification for $set_value"); |
847
|
|
|
|
|
|
|
} else { |
848
|
|
|
|
|
|
|
$self->log->pass("Expected setSpec was returned in the response".$subset_str); |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
return($datestamp); |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
=head3 test_list_records($datestamp,$metadata_prefix) |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
Test the response for the ListRecords verb. In addition, if there is |
857
|
|
|
|
|
|
|
no Dublin Core available for this repository, this is an error. |
858
|
|
|
|
|
|
|
(And the error has already been counted in test_get_record) |
859
|
|
|
|
|
|
|
We can still test the verb, however, with one of the available |
860
|
|
|
|
|
|
|
formats found by testGetMetadataFormats. Since the output of |
861
|
|
|
|
|
|
|
ListRecords is likely to be large, use the datestamp of the one |
862
|
|
|
|
|
|
|
record we did retrieve to limit the output. |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=cut |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
sub test_list_records { |
867
|
|
|
|
|
|
|
my $self=shift; |
868
|
|
|
|
|
|
|
my ($datestamp,$metadata_prefix)=@_; |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
$self->log->start("Checking ListRecords response"); |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
my $req = $self->base_url."?verb=ListRecords"; |
873
|
|
|
|
|
|
|
if ($datestamp) { |
874
|
|
|
|
|
|
|
$req.="&from=".$datestamp."&until=".$datestamp; |
875
|
|
|
|
|
|
|
} else { |
876
|
|
|
|
|
|
|
$self->log->warn("Omitting datestamp parameter as none was obtained from earlier test"); |
877
|
|
|
|
|
|
|
} |
878
|
|
|
|
|
|
|
$req.="&metadataPrefix=".$metadata_prefix; |
879
|
|
|
|
|
|
|
my $list_not_complete=1; |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
while ($list_not_complete) { |
882
|
|
|
|
|
|
|
$list_not_complete=0; |
883
|
|
|
|
|
|
|
my $response = $self->make_request_and_validate("ListRecords", $req); |
884
|
|
|
|
|
|
|
unless ($response) { |
885
|
|
|
|
|
|
|
#Nothing else to say since we don't do other tests |
886
|
|
|
|
|
|
|
return; |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
if ($self->parse_response($req,$response)) { |
890
|
|
|
|
|
|
|
$self->log->pass("Response is well formed"); |
891
|
|
|
|
|
|
|
} else { |
892
|
|
|
|
|
|
|
$self->log->fail("The ListRecords response was not well formed XML"); |
893
|
|
|
|
|
|
|
} |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
# Now check to make sure that we got back the record for the identifier |
896
|
|
|
|
|
|
|
# $self->example_record_id if there is one specified, else fail that |
897
|
|
|
|
|
|
|
# test. |
898
|
|
|
|
|
|
|
my $record_id=$self->example_record_id; |
899
|
|
|
|
|
|
|
unless ($record_id) { |
900
|
|
|
|
|
|
|
$self->log->fail("Cannot check for correct record inclusion without an example record id"); |
901
|
|
|
|
|
|
|
return; |
902
|
|
|
|
|
|
|
} |
903
|
|
|
|
|
|
|
my $details={}; |
904
|
|
|
|
|
|
|
if ($self->is_error_response($details)) { |
905
|
|
|
|
|
|
|
if ($details->{'noRecordsMatch'}) { |
906
|
|
|
|
|
|
|
$self->log->fail("ListRecords response gave a noRecordsMatch error when it should ". |
907
|
|
|
|
|
|
|
"have included at least the record with identifier $record_id. ". |
908
|
|
|
|
|
|
|
"The from and until parameters of the request were set to the ". |
909
|
|
|
|
|
|
|
"datestamp of this record ($datestamp). The from and until parameters ". |
910
|
|
|
|
|
|
|
"are inclusive, see protocol spec section 2.7.1. The message ". |
911
|
|
|
|
|
|
|
"included in the error response was: '". |
912
|
|
|
|
|
|
|
$details->{'noRecordsMatch'}."'"); |
913
|
|
|
|
|
|
|
} else { |
914
|
|
|
|
|
|
|
my @txt=(); |
915
|
|
|
|
|
|
|
foreach my $k (keys %$details) { |
916
|
|
|
|
|
|
|
push(@txt,"$k (".$details->{$k}.")"); |
917
|
|
|
|
|
|
|
} |
918
|
|
|
|
|
|
|
$self->log->fail("ListRecords gave an unexpected error response to a request using ". |
919
|
|
|
|
|
|
|
"from and until datestamps taken from the previous GetRecord response: ". |
920
|
|
|
|
|
|
|
join(', ',@txt)); |
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
} else { |
923
|
|
|
|
|
|
|
my $id_list = $self->doc->getElementsByTagName('identifier'); |
924
|
|
|
|
|
|
|
my $i; |
925
|
|
|
|
|
|
|
my $badly_formed=0; |
926
|
|
|
|
|
|
|
for ($i=0; $i<$id_list->getLength; $i++) { |
927
|
|
|
|
|
|
|
if (my $child=$id_list->item($i)->getFirstChild()) { |
928
|
|
|
|
|
|
|
last if ($id_list->item($i)->getFirstChild->getData eq $record_id); |
929
|
|
|
|
|
|
|
} else { |
930
|
|
|
|
|
|
|
$badly_formed++; |
931
|
|
|
|
|
|
|
last; |
932
|
|
|
|
|
|
|
} |
933
|
|
|
|
|
|
|
} |
934
|
|
|
|
|
|
|
if ($badly_formed) { |
935
|
|
|
|
|
|
|
$self->log->fail("ListRecords response badly formed, identifier element for record ". |
936
|
|
|
|
|
|
|
($i+1)." is empty"); |
937
|
|
|
|
|
|
|
} elsif ($i<$id_list->getLength) { |
938
|
|
|
|
|
|
|
$self->log->pass("ListRecords response correctly included record with identifier $record_id"); |
939
|
|
|
|
|
|
|
} elsif (my $token=$self->get_resumption_token) { |
940
|
|
|
|
|
|
|
# More responses to come, may just not have got to the |
941
|
|
|
|
|
|
|
# record yet... roll around for more: |
942
|
|
|
|
|
|
|
$self->log->pass("ListRecords response includes resumptionToken. Haven't found ". |
943
|
|
|
|
|
|
|
"record with identifier $record_id yet, will continue with resumptionToken..."); |
944
|
|
|
|
|
|
|
$list_not_complete=1; |
945
|
|
|
|
|
|
|
$req = $self->base_url."?verb=ListRecords&resumptionToken=".url_encode($token); |
946
|
|
|
|
|
|
|
} else { |
947
|
|
|
|
|
|
|
$self->log->fail("ListRecords response did not include the identifier $record_id ". |
948
|
|
|
|
|
|
|
"which should have been included because both the from and until ". |
949
|
|
|
|
|
|
|
"parameters were set to the datestamp of this record ($datestamp). ". |
950
|
|
|
|
|
|
|
"The from and until parameters are inclusive, see protocol spec ". |
951
|
|
|
|
|
|
|
"section 2.7.1"); |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
} |
954
|
|
|
|
|
|
|
} |
955
|
|
|
|
|
|
|
} |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
=head3 test_resumption_tokens() |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
Request an unlimited ListRecords. If there is a resumption token, see |
961
|
|
|
|
|
|
|
if it works. It is an error if resumption is there but doesn't work. |
962
|
|
|
|
|
|
|
Empty resumption tokens are OK -- this ends the list. |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
CGI takes care of URL-encoding the resumption token. |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
=cut |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
sub test_resumption_tokens { |
969
|
|
|
|
|
|
|
my $self=shift; |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
$self->log->start("Checking for correct use of resumptionToken (if used)"); |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
my $req = $self->base_url."?verb=ListRecords&metadataPrefix=oai_dc"; |
974
|
|
|
|
|
|
|
my $response = $self->make_request($req); |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
# was there a resumption token? |
977
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
978
|
|
|
|
|
|
|
$self->log->fail("Can't parse malformed XML in response to ListRecords request. ". |
979
|
|
|
|
|
|
|
"Cannot complete test for correct use of resumptionToken (if used)"); |
980
|
|
|
|
|
|
|
return; |
981
|
|
|
|
|
|
|
} |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
my $tokenList = $self->doc->getElementsByTagName('resumptionToken'); |
984
|
|
|
|
|
|
|
if ( !$tokenList or $tokenList->getLength()==0 ) { |
985
|
|
|
|
|
|
|
$self->log->pass("resumptionToken not used"); |
986
|
|
|
|
|
|
|
return; |
987
|
|
|
|
|
|
|
} |
988
|
|
|
|
|
|
|
if ( $tokenList->getLength()>1 ) { |
989
|
|
|
|
|
|
|
$self->log->fail("More than one resumptionToken in response!"); |
990
|
|
|
|
|
|
|
return; |
991
|
|
|
|
|
|
|
} |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
# Dig out the resumption token from the document |
994
|
|
|
|
|
|
|
my $tokenElement = $tokenList->item(0); |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
# Try getting the resumption token, $token will be will be undefined |
997
|
|
|
|
|
|
|
# unless the element has content |
998
|
|
|
|
|
|
|
my $token = $tokenElement->getFirstChild; |
999
|
|
|
|
|
|
|
my $tokenString; |
1000
|
|
|
|
|
|
|
if ($token) { |
1001
|
|
|
|
|
|
|
$tokenString = $token->getData; |
1002
|
|
|
|
|
|
|
} |
1003
|
|
|
|
|
|
|
unless ($tokenString) { |
1004
|
|
|
|
|
|
|
$self->log->fail("Empty resumption token in response to $req There should never ". |
1005
|
|
|
|
|
|
|
"be an empty resumptionToken in response to a request without a ". |
1006
|
|
|
|
|
|
|
"resumptionToken argument"); |
1007
|
|
|
|
|
|
|
return; |
1008
|
|
|
|
|
|
|
} |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
# If there us a 'cursor' value given then check that it is |
1011
|
|
|
|
|
|
|
# correct. It must have the value 0 in the first response |
1012
|
|
|
|
|
|
|
my $usingCursor=0; |
1013
|
|
|
|
|
|
|
if (my $cursor=$tokenElement->getAttribute('cursor')) { |
1014
|
|
|
|
|
|
|
$usingCursor=1; |
1015
|
|
|
|
|
|
|
if ($cursor==0) { |
1016
|
|
|
|
|
|
|
$self->log->pass("A cursor value was supplied with the resumptionToken and it ". |
1017
|
|
|
|
|
|
|
"correctly had the value zero in the first response"); |
1018
|
|
|
|
|
|
|
} else { |
1019
|
|
|
|
|
|
|
$self->log->fail("A cursor value was supplied with the resumptionToken but it ". |
1020
|
|
|
|
|
|
|
"did not have the correct value zero for the first response. ". |
1021
|
|
|
|
|
|
|
"The value was '$cursor'."); |
1022
|
|
|
|
|
|
|
} |
1023
|
|
|
|
|
|
|
} |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
$self->log->note("Got resumptionToken ".$tokenString); |
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
# Try using the resumption token. Before including a resumptionToken in |
1028
|
|
|
|
|
|
|
# the URL of a subsequent request, we must encode all special characters |
1029
|
|
|
|
|
|
|
# getData in this version of XML::DOM expands entitities |
1030
|
|
|
|
|
|
|
$req = $self->base_url."?verb=ListRecords&resumptionToken=".url_encode($tokenString); |
1031
|
|
|
|
|
|
|
$response = $self->make_request($req); |
1032
|
|
|
|
|
|
|
unless ( $response ) { |
1033
|
|
|
|
|
|
|
$self->log->fail("Site failed to respond to request using resumptionToken: $req"); |
1034
|
|
|
|
|
|
|
return; |
1035
|
|
|
|
|
|
|
} |
1036
|
|
|
|
|
|
|
unless ( $self->parse_response($req,$response)) { |
1037
|
|
|
|
|
|
|
$self->log->fail("Response to request is using resumptionToken not valid XML: $req"); |
1038
|
|
|
|
|
|
|
return; |
1039
|
|
|
|
|
|
|
} |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
my $errorList = $self->doc->getElementsByTagName('error'); |
1042
|
|
|
|
|
|
|
if ( $errorList and $errorList->getLength() > 0 ) { |
1043
|
|
|
|
|
|
|
$self->log->fail("Response to request using resumptionToken was an error code: $req"); |
1044
|
|
|
|
|
|
|
return; |
1045
|
|
|
|
|
|
|
} |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
###FIXME: put in test for cursor again, should be number of items returned in the |
1048
|
|
|
|
|
|
|
###FIXME: first response [Simeon/2005-10-11] |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
$self->log->pass("Resumption tokens appear to work"); |
1051
|
|
|
|
|
|
|
} |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
=head2 METHODS CHECKING ERRORS AND EXCEPTIONS |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
=head3 test_expected_errors($record_id) |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
Each one of these requests should get a 400 response in OAI-PHM v1.1, |
1059
|
|
|
|
|
|
|
or a 200 response in 2.0, along with a Reason_Phrase. Bump error_count |
1060
|
|
|
|
|
|
|
if this does not hold. Return the number of errorneous responses. |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
$record_id is a valid record identifier to be used in tests that require |
1063
|
|
|
|
|
|
|
one. |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
=cut |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
sub test_expected_errors { |
1068
|
|
|
|
|
|
|
my $self=shift; |
1069
|
|
|
|
|
|
|
my ($record_id)=@_; |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
$self->log->start("Checking exception handling (errors)"); |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
my @request_list = ( |
1074
|
|
|
|
|
|
|
[ 'junk', [ 'badVerb' ], '', '' ], |
1075
|
|
|
|
|
|
|
[ 'verb=junk', [ 'badVerb' ], '', '' ], |
1076
|
|
|
|
|
|
|
[ 'verb=GetRecord&metadataPrefix=oai_dc', [ 'badArgument' ], '', '' ], |
1077
|
|
|
|
|
|
|
[ 'verb=GetRecord&identifier='.$record_id, [ 'badArgument' ], '', '' ], |
1078
|
|
|
|
|
|
|
[ 'verb=GetRecord&identifier=invalid"id&metadataPrefix=oai_dc', [ 'badArgument','idDoesNotExist' ], 'An XML parsing error may be due to incorrectly including the invalid identifier in the element of your XML error response; only valid arguments should be included. A response that includes ..baseURL.. is not well-formed XML because of the quotation mark (") in the identifier.', 'Either the badArgument or idDoesNotExist error codes would be appropriate to report this case.' ], |
1079
|
|
|
|
|
|
|
[ 'verb=ListIdentifiers&until=junk', [ 'badArgument' ], '', '' ], |
1080
|
|
|
|
|
|
|
[ 'verb=ListIdentifiers&from=junk', [ 'badArgument' ], '', '' ], |
1081
|
|
|
|
|
|
|
[ 'verb=ListIdentifiers&resumptionToken=junk&until=2000-02-05', [ 'badArgument','badResumptionToken' ], '', 'Either the badArgument and/or badResumptionToken error codes may be reported in this case. If only one is reported then the badArgument error is to be preferred because the resumptionToken and until parameters are exclusive.' ], |
1082
|
|
|
|
|
|
|
[ 'verb=ListRecords&metadataPrefix=oai_dc&from=junk', [ 'badArgument' ], '', '' ], |
1083
|
|
|
|
|
|
|
[ 'verb=ListRecords&resumptionToken=junk', [ 'badResumptionToken' ], '', '' ], |
1084
|
|
|
|
|
|
|
[ 'verb=ListRecords&metadataPrefix=oai_dc&resumptionToken=junk&until=1990-01-10', [ 'badArgument','badResumptionToken' ] , '', 'Either the badArgument and/or badResumptionToken error codes may be reported in this case. If only one is reported then the badArgument error is to be preferred because the resumptionToken and until parameters are exclusive.' ], |
1085
|
|
|
|
|
|
|
[ 'verb=ListRecords&metadataPrefix=oai_dc&until=junk', [ 'badArgument' ], '', '' ], |
1086
|
|
|
|
|
|
|
[ 'verb=ListRecords', [ 'badArgument' ], '', '' ] |
1087
|
|
|
|
|
|
|
); |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
my $n=0; |
1090
|
|
|
|
|
|
|
foreach my $rrr ( @request_list ) { |
1091
|
|
|
|
|
|
|
my ($request_string, $error_codes, $xml_reason, $reason)=@$rrr; |
1092
|
|
|
|
|
|
|
my $req = $self->base_url.'?'.$request_string; |
1093
|
|
|
|
|
|
|
my $ok_errors=join(' or ',@$error_codes); |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
my $response=$self->make_request($req); |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
# TBD: $response->status_line should also be checked? see output from |
1098
|
|
|
|
|
|
|
# physnet.uni-oldenburg.de/oai/oai.php |
1099
|
|
|
|
|
|
|
if ($self->protocol_version eq "1.1") { |
1100
|
|
|
|
|
|
|
if ($response->code ne "400") { |
1101
|
|
|
|
|
|
|
$self->log->note("Invalid requests which failed to return 400:") if $n == 0; |
1102
|
|
|
|
|
|
|
$n++; |
1103
|
|
|
|
|
|
|
$self->log->fail("Expected 400 from: $request_string"); |
1104
|
|
|
|
|
|
|
} |
1105
|
|
|
|
|
|
|
} elsif ($self->protocol_version eq "2.0") { |
1106
|
|
|
|
|
|
|
# The document must contain the proper error code |
1107
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response,$xml_reason)) { |
1108
|
|
|
|
|
|
|
$self->log->fail("Can't parse malformed response. ".html_escape($xml_reason)); |
1109
|
|
|
|
|
|
|
$n++; |
1110
|
|
|
|
|
|
|
next; |
1111
|
|
|
|
|
|
|
} |
1112
|
|
|
|
|
|
|
# check that the error code is in the error_list |
1113
|
|
|
|
|
|
|
my $error_elements = $self->doc->getElementsByTagName('error'); |
1114
|
|
|
|
|
|
|
if (my $matching_code=$self->error_elements_include($error_elements, $error_codes)) { |
1115
|
|
|
|
|
|
|
$self->log->pass("Error response correctly includes error code '$matching_code'"); |
1116
|
|
|
|
|
|
|
} else { |
1117
|
|
|
|
|
|
|
$self->log->fail("Exception/error response did not contain error code ". |
1118
|
|
|
|
|
|
|
"'$ok_errors' ".html_escape($reason)); |
1119
|
|
|
|
|
|
|
$n++; |
1120
|
|
|
|
|
|
|
next; |
1121
|
|
|
|
|
|
|
} |
1122
|
|
|
|
|
|
|
} else { |
1123
|
|
|
|
|
|
|
$self->log->fail("Invalid protocol version returned"); |
1124
|
|
|
|
|
|
|
$self->abort("test_expected_errors - invalid protocol version"); |
1125
|
|
|
|
|
|
|
} |
1126
|
|
|
|
|
|
|
} |
1127
|
|
|
|
|
|
|
my $total = scalar @request_list; |
1128
|
|
|
|
|
|
|
if ($n==0) { |
1129
|
|
|
|
|
|
|
$self->log->pass("All $total error requests properly handled"); |
1130
|
|
|
|
|
|
|
} else { |
1131
|
|
|
|
|
|
|
$self->log->warn("Only ".($total-$n)." out of $total error requests properly handled"); |
1132
|
|
|
|
|
|
|
} |
1133
|
|
|
|
|
|
|
return($n); |
1134
|
|
|
|
|
|
|
} |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
=head3 test_expected_v2_errors($earliest_datestamp,$metadata_prefix) |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
There are some additional exception tests for OAI-PMH version 2.0. |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
=cut |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
sub test_expected_v2_errors { |
1144
|
|
|
|
|
|
|
my $self=shift; |
1145
|
|
|
|
|
|
|
my ($earliest_datestamp,$metadata_prefix)=@_; |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
$self->log->start("Checking for version 2.0 specific exceptions"); |
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
my $too_early_date=one_year_before($earliest_datestamp); |
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
# format of entries: [ request_string, [error_codes_accepable], resaon ] |
1152
|
|
|
|
|
|
|
my @request_list = ( |
1153
|
|
|
|
|
|
|
[ "verb=ListRecords&metadataPrefix=".url_encode($metadata_prefix)."&from=2002-02-05&until=2002-02-06T05:35:00Z", ['badArgument'], |
1154
|
|
|
|
|
|
|
'The request has different granularities for the from and until parameters.' ], |
1155
|
|
|
|
|
|
|
[ "verb=ListRecords&metadataPrefix=".url_encode($metadata_prefix)."&until=$too_early_date" , ['noRecordsMatch'], |
1156
|
|
|
|
|
|
|
'The request specified a date one year before the earliestDatestamp given in the Identify response. '. |
1157
|
|
|
|
|
|
|
'There should therefore not be any records with datestamps on or before this date and a noRecordsMatch '. |
1158
|
|
|
|
|
|
|
'error code should be returned.' ] |
1159
|
|
|
|
|
|
|
); |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
foreach my $rrr ( @request_list ) { |
1162
|
|
|
|
|
|
|
my ($request_string,$error_codes,$reason)=@$rrr; |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
my $req=$self->base_url."?$request_string"; |
1165
|
|
|
|
|
|
|
my $response = $self->make_request($req); |
1166
|
|
|
|
|
|
|
# parse the response content for the desired error code |
1167
|
|
|
|
|
|
|
unless ( $self->parse_response($req,$response) ) { |
1168
|
|
|
|
|
|
|
$self->log->fail("Error in parsing XML response to exception request: $request_string"); |
1169
|
|
|
|
|
|
|
next; |
1170
|
|
|
|
|
|
|
} |
1171
|
|
|
|
|
|
|
# check that there is at least the desired error code |
1172
|
|
|
|
|
|
|
my $ok_errors=join(' or ',@$error_codes); |
1173
|
|
|
|
|
|
|
my $error_elements = $self->doc->getElementsByTagName('error'); |
1174
|
|
|
|
|
|
|
if ( !$error_elements or $error_elements->getLength==0 ) { |
1175
|
|
|
|
|
|
|
$self->log->fail("Failed to extract error code from the response to request: ". |
1176
|
|
|
|
|
|
|
"$request_string $reason"); |
1177
|
|
|
|
|
|
|
} elsif (my $matching_code=$self->error_elements_include($error_elements, $error_codes) ) { |
1178
|
|
|
|
|
|
|
$self->log->pass("Error response correctly includes error code '$matching_code'"); |
1179
|
|
|
|
|
|
|
} else { |
1180
|
|
|
|
|
|
|
$self->log->fail("Error code $ok_errors not found in response but should be given ". |
1181
|
|
|
|
|
|
|
"to the request: $request_string $reason"); |
1182
|
|
|
|
|
|
|
} |
1183
|
|
|
|
|
|
|
} |
1184
|
|
|
|
|
|
|
return; |
1185
|
|
|
|
|
|
|
} |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
=head2 METHODS TO TEST USE OF HTTP POST |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
=head3 test_post_requests() |
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
Test responses to POST requests. Do both the simplest possible -- the Identify |
1193
|
|
|
|
|
|
|
verb -- and a GetRecord request which uses two additional parameters. |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
=cut |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
sub test_post_requests { |
1198
|
|
|
|
|
|
|
my $self=shift; |
1199
|
|
|
|
|
|
|
my ($metadata_prefix)=@_; |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
$self->log->start("Checking that HTTP POST requests are handled correctly"); |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
$self->test_post_request(1,{verb => "Identify"}); |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
my $record_id=$self->example_record_id; |
1206
|
|
|
|
|
|
|
if ($record_id) { |
1207
|
|
|
|
|
|
|
$self->test_post_request(2,{verb => "GetRecord", 'identifier' => $record_id, 'metadataPrefix' => $metadata_prefix}); |
1208
|
|
|
|
|
|
|
} else { |
1209
|
|
|
|
|
|
|
$self->log->fail("Cannot test GetRecord via POST without and example record identifier"); |
1210
|
|
|
|
|
|
|
} |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
# Called just by test_post_requests to actually run the test |
1215
|
|
|
|
|
|
|
# |
1216
|
|
|
|
|
|
|
sub test_post_request { |
1217
|
|
|
|
|
|
|
my $self=shift; |
1218
|
|
|
|
|
|
|
my ($num, $post_data) = @_; |
1219
|
|
|
|
|
|
|
my $response = $self->make_request($self->base_url, $post_data); |
1220
|
|
|
|
|
|
|
if ($response->is_success) { |
1221
|
|
|
|
|
|
|
my $verb = $post_data->{verb}; |
1222
|
|
|
|
|
|
|
if ( $self->is_verb_response($response,$verb) ) { |
1223
|
|
|
|
|
|
|
$self->log->pass("POST test $num for $verb was successful"); |
1224
|
|
|
|
|
|
|
} elsif ( $self->check_error_response($response) ) { |
1225
|
|
|
|
|
|
|
$self->log->fail("POST test $num for $verb was unsuccessful, an OAI error ". |
1226
|
|
|
|
|
|
|
"response was received"); |
1227
|
|
|
|
|
|
|
} else { |
1228
|
|
|
|
|
|
|
$self->log->fail("POST test $num for $verb was unsuccessful, got neither a ". |
1229
|
|
|
|
|
|
|
"valid response nor an error"); |
1230
|
|
|
|
|
|
|
} |
1231
|
|
|
|
|
|
|
} else { |
1232
|
|
|
|
|
|
|
$self->log->fail("POST test $num was unsuccessful. Server returned HTTP Status: '". |
1233
|
|
|
|
|
|
|
$response->status_line."'"); |
1234
|
|
|
|
|
|
|
} |
1235
|
|
|
|
|
|
|
} |
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
=head2 METHODS CHECKING ELEMENTS WITHIN VERB AND ERROR RESPONSES |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
=head3 check_response_date($req, $doc) |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
Check responseDate for being in UTC format |
1243
|
|
|
|
|
|
|
(should perhaps also check that it is at least the current day?) |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
=cut |
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
sub check_response_date { |
1248
|
|
|
|
|
|
|
my $self=shift; |
1249
|
|
|
|
|
|
|
my ($req, $doc) = @_; |
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
my $elements = $self->doc->getElementsByTagName('responseDate'); |
1252
|
|
|
|
|
|
|
# assume rest of validity already checked, just take first |
1253
|
|
|
|
|
|
|
my $item; |
1254
|
|
|
|
|
|
|
my $child; |
1255
|
|
|
|
|
|
|
if ($elements and $item=$elements->item(0) and $child=$item->getFirstChild()) { |
1256
|
|
|
|
|
|
|
my $date = $child->getData(); |
1257
|
|
|
|
|
|
|
if ($date=~/\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ/) { |
1258
|
|
|
|
|
|
|
$self->log->pass("responseDate has correct format: $date"); |
1259
|
|
|
|
|
|
|
} else { |
1260
|
|
|
|
|
|
|
$self->log->fail("Bad responseDate of $date, this is not in UTC DateTime ". |
1261
|
|
|
|
|
|
|
"(YYYY-MM-DDThh:mm:ssZ) format"); |
1262
|
|
|
|
|
|
|
} |
1263
|
|
|
|
|
|
|
} else { |
1264
|
|
|
|
|
|
|
$self->log->fail("Failed to extract responseDate"); |
1265
|
|
|
|
|
|
|
} |
1266
|
|
|
|
|
|
|
} |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
=head3 check_schema_name($req, $doc) |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
Given the response to one of the OAI verbs, make sure that it it |
1272
|
|
|
|
|
|
|
going to be validated against the "official" OAI schema, and not |
1273
|
|
|
|
|
|
|
one that the repository made up for itself. If the response can't |
1274
|
|
|
|
|
|
|
be parsed, or if there is no OAI-PMH element, or if the schema is |
1275
|
|
|
|
|
|
|
incorrect, print an error message and bump the error_count. |
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
Return true if the schema name and date check out, else return undef |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
=cut |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
sub check_schema_name { |
1282
|
|
|
|
|
|
|
my $self=shift; |
1283
|
|
|
|
|
|
|
my ($req, $doc) = @_; |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
my $namespace = 'http://www.openarchives.org/OAI/2.0/'; |
1286
|
|
|
|
|
|
|
my $location = 'http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd'; |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
my $elements = $self->doc->getElementsByTagName('OAI-PMH'); #NodeList |
1289
|
|
|
|
|
|
|
unless ( $elements->getLength() > 0 ) { |
1290
|
|
|
|
|
|
|
$self->log->fail("Response to $req did not contain a OAI-PMH element"); |
1291
|
|
|
|
|
|
|
return(0); |
1292
|
|
|
|
|
|
|
} |
1293
|
|
|
|
|
|
|
my $attributes = $elements->item(0)->getAttributes; #Node->NamedNodeMap |
1294
|
|
|
|
|
|
|
my $attr = $attributes->getNamedItem('xsi:schemaLocation'); #Node |
1295
|
|
|
|
|
|
|
unless ( $attr ) { |
1296
|
|
|
|
|
|
|
$self->log->fail("No xsi:schemaLocation attribute for the OAI-PMH element was ". |
1297
|
|
|
|
|
|
|
"found, expected xsi:schemaLocation=\"$namespace $location\""); |
1298
|
|
|
|
|
|
|
return(0); |
1299
|
|
|
|
|
|
|
} |
1300
|
|
|
|
|
|
|
$attr = $attributes->getNamedItem('xsi:schemaLocation'); #Node |
1301
|
|
|
|
|
|
|
my $pair = $attr->getNodeValue(); # must pair OAI namespace with schema |
1302
|
|
|
|
|
|
|
unless ( $pair =~ /^\s?$namespace\s*$location/ ) { |
1303
|
|
|
|
|
|
|
$self->log->fail("Error in pairing OAI namespace with schema location, expected: ". |
1304
|
|
|
|
|
|
|
"xsi:schemaLocation=\"$namespace $location\" but got $pair"); |
1305
|
|
|
|
|
|
|
return(0); |
1306
|
|
|
|
|
|
|
} |
1307
|
|
|
|
|
|
|
return(1); |
1308
|
|
|
|
|
|
|
} |
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
=head3 check_protocol_version |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
Extract the protocol version being used from the Identify response, check that it is |
1314
|
|
|
|
|
|
|
valid and then abort unless 2.0. |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
=cut |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
sub check_protocol_version { |
1319
|
|
|
|
|
|
|
my $self=shift; |
1320
|
|
|
|
|
|
|
my $doc; |
1321
|
|
|
|
|
|
|
# Extract the version number of the validator to run |
1322
|
|
|
|
|
|
|
my $x = $self->doc->getElementsByTagName('protocolVersion'); |
1323
|
|
|
|
|
|
|
if (not $x) { |
1324
|
|
|
|
|
|
|
$self->abort("Unknown protocol version, failed to extract protocolVersion element from Identify response"); |
1325
|
|
|
|
|
|
|
} |
1326
|
|
|
|
|
|
|
my $protocol_version = $x->item(0)->getFirstChild->getData; |
1327
|
|
|
|
|
|
|
if ($protocol_version ne '2.0' and |
1328
|
|
|
|
|
|
|
$protocol_version ne '1.1' and |
1329
|
|
|
|
|
|
|
$protocol_version ne '1.0') { |
1330
|
|
|
|
|
|
|
$self->abort("Invalid protocol version ($protocol_version)"); |
1331
|
|
|
|
|
|
|
} |
1332
|
|
|
|
|
|
|
$self->protocol_version( $protocol_version ); |
1333
|
|
|
|
|
|
|
if ($protocol_version ne '2.0') { |
1334
|
|
|
|
|
|
|
$self->abort("Repository reports OAI-PMH protocol version $protocol_version and will not be validated. Guidelines for upgrading to 2.0 can be found at http://www.openarchives.org/OAI/2.0/migration.htm\n\n"); |
1335
|
|
|
|
|
|
|
} |
1336
|
|
|
|
|
|
|
$self->log->pass("Correctly reports OAI-PMH protocol version 2.0"); |
1337
|
|
|
|
|
|
|
} |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
=head2 is_verb_response($reponse,$verb) |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
Return true if $response is a response for the specified $verb. |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
FIXME -- need better checks! |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
=cut |
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
sub is_verb_response { |
1349
|
|
|
|
|
|
|
my $self=shift; |
1350
|
|
|
|
|
|
|
my ($response,$verb) = @_; |
1351
|
|
|
|
|
|
|
my $doc; |
1352
|
|
|
|
|
|
|
eval { $doc=$self->parser->parse($response->content); }; |
1353
|
|
|
|
|
|
|
return unless $doc; # We can't parse it so it isn't a valid doc |
1354
|
|
|
|
|
|
|
my $verb_elements = $doc->getElementsByTagName($verb); |
1355
|
|
|
|
|
|
|
return(1) if ( $verb_elements and $verb_elements->getLength==1 ); |
1356
|
|
|
|
|
|
|
return; # not the one element we expected |
1357
|
|
|
|
|
|
|
} |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
=head3 error_elements_include($error_elements,$error_codes) |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
Determine whether the list of error elements ($error_elements) includes at least |
1363
|
|
|
|
|
|
|
one of the desired codes. Return string with first matching error code, else |
1364
|
|
|
|
|
|
|
return false/nothing. |
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
Does a sanity check on $error_list to check that it is set and has length>0 |
1367
|
|
|
|
|
|
|
before trying to match, so cose calling it can simply do a |
1368
|
|
|
|
|
|
|
getElementsByTagName or similar before caling. |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
=cut |
1371
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
sub error_elements_include { |
1373
|
|
|
|
|
|
|
my $self=shift; |
1374
|
|
|
|
|
|
|
my ($error_elements, $error_codes) = @_; |
1375
|
|
|
|
|
|
|
# sanity check |
1376
|
|
|
|
|
|
|
return if (!$error_elements or $error_elements->getLength==0); |
1377
|
|
|
|
|
|
|
for (my $i=0; $i<$error_elements->getLength; $i++) { |
1378
|
|
|
|
|
|
|
foreach my $ec (@$error_codes) { |
1379
|
|
|
|
|
|
|
my $code = $error_elements->item($i)->getAttribute('code') || 'no-code-attribute'; |
1380
|
|
|
|
|
|
|
$self->log->note("$code =? $ec") if ($self->debug); |
1381
|
|
|
|
|
|
|
return($ec) if ($code eq $ec); |
1382
|
|
|
|
|
|
|
} |
1383
|
|
|
|
|
|
|
} |
1384
|
|
|
|
|
|
|
return; |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
=head3 check_error_response($response) |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
Given the response to an HTTP request, make sure it is not an |
1392
|
|
|
|
|
|
|
OAI-PMH error message. The $response is a success. If it is an |
1393
|
|
|
|
|
|
|
OAI error message, return 2; if the response cannot be parsed, return |
1394
|
|
|
|
|
|
|
-1; otherwise return undef (it must be a real Identify response). |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
FIXME -- need better checks! |
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
FIXME -- need to merge thic functionality in with is_error_response |
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
=cut |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
sub check_error_response { |
1403
|
|
|
|
|
|
|
my $self=shift; |
1404
|
|
|
|
|
|
|
my ($response) = @_; |
1405
|
|
|
|
|
|
|
my $doc; |
1406
|
|
|
|
|
|
|
eval { $doc=$self->parser->parse($response->content); }; |
1407
|
|
|
|
|
|
|
return unless $doc; # We can't parse it so it isn't a valid error |
1408
|
|
|
|
|
|
|
my $error_elements = $doc->getElementsByTagName('error'); |
1409
|
|
|
|
|
|
|
return(1) if ( $error_elements and $error_elements->getLength() > 0 ); |
1410
|
|
|
|
|
|
|
return; # no error codes so not an error response |
1411
|
|
|
|
|
|
|
} |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
=head3 get_earliest_datestamp() |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
A new exception check for Version 2.0 raises noRecordsMatch errorcode |
1417
|
|
|
|
|
|
|
if the set of records returned by ListRecords is empty. This requires |
1418
|
|
|
|
|
|
|
that we know the earliest date in the repository. Also check that the |
1419
|
|
|
|
|
|
|
earliest date matches the specified granularity. |
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
Called only for version 2.0 or greater. |
1422
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
Since the Identify response has already been validated, we know |
1424
|
|
|
|
|
|
|
there is exactly one earliestDatestamp element in the current document. |
1425
|
|
|
|
|
|
|
Extract this value, check it, and if it looks good then set |
1426
|
|
|
|
|
|
|
$self->earliest_datestamp and return false. |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
If there is an error then return string explaining that. |
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=cut |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
sub get_earliest_datestamp { |
1433
|
|
|
|
|
|
|
my $self=shift; |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
my $earliest = $self->doc->getElementsByTagName('earliestDatestamp'); |
1436
|
|
|
|
|
|
|
my $el = $earliest->item(0); |
1437
|
|
|
|
|
|
|
return("Can't get earliestDatestamp element from Identify response.") unless ($el); |
1438
|
|
|
|
|
|
|
return("earliestDatestamp element is empty in Identify response.") unless ($el->getFirstChild); |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
my $error=''; |
1441
|
|
|
|
|
|
|
my $earliest_datestamp = $el->getFirstChild->getData; |
1442
|
|
|
|
|
|
|
$self->log->note("Earliest datestamp in repository is $earliest_datestamp") if $self->debug; |
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
$earliest_datestamp =~ /^([0-9]{4})-([0-9][0-9])-([0-9][0-9])(.*)$/; |
1445
|
|
|
|
|
|
|
if ($1 eq '' || $2 eq '' || $3 eq '') { |
1446
|
|
|
|
|
|
|
$error="is not in ISO8601 format"; |
1447
|
|
|
|
|
|
|
} elsif ( $4 eq '' and $self->granularity eq 'seconds') { |
1448
|
|
|
|
|
|
|
$error="must have seconds granularity (format YYYY-MM-DDThh:mm:ssZ) to match ". |
1449
|
|
|
|
|
|
|
"the granularity for the repository. The granularity has been set to seconds ". |
1450
|
|
|
|
|
|
|
"by the granularity element of the Identify response.\n"; |
1451
|
|
|
|
|
|
|
} elsif ( $4 ne '' and $self->granularity eq 'days') { |
1452
|
|
|
|
|
|
|
$error="must have days granularity (format YYYY-MM-DD) to match the granularity for ". |
1453
|
|
|
|
|
|
|
"the repository. The granularity has been set to days by the granularity ". |
1454
|
|
|
|
|
|
|
"element of the Identify response (or that element is bad/missing).\n"; |
1455
|
|
|
|
|
|
|
} elsif ( $self->granularity eq 'seconds' and $4 !~ /^T\d\d:\d\d:\d\d(\.\d+)?Z$/ ) { |
1456
|
|
|
|
|
|
|
$error="does not have the correct format for the time part of the UTCdatetime. The ". |
1457
|
|
|
|
|
|
|
"overall format must be YYYY-MM-DDThh:mm:ssZ.\n"; |
1458
|
|
|
|
|
|
|
} |
1459
|
|
|
|
|
|
|
if ($error) { |
1460
|
|
|
|
|
|
|
# Sanitize for error message |
1461
|
|
|
|
|
|
|
return("The earliestDatestamp in the identify response (". |
1462
|
|
|
|
|
|
|
sanitize($earliest_datestamp).") $error"); |
1463
|
|
|
|
|
|
|
} else { |
1464
|
|
|
|
|
|
|
$self->earliest_datestamp($earliest_datestamp); |
1465
|
|
|
|
|
|
|
return; |
1466
|
|
|
|
|
|
|
} |
1467
|
|
|
|
|
|
|
} |
1468
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
=head3 parse_granularity($granularity_element) |
1471
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
Parse contents of the granularity element of the Identify response. Returns either |
1473
|
|
|
|
|
|
|
'days', 'seconds' or nothing on failure. Sets $self->granularity if valid, otherwise |
1474
|
|
|
|
|
|
|
does not change setting. |
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
As of v2.0 the granularity element is mandatory, see: |
1477
|
|
|
|
|
|
|
http://www.openarchives.org/OAI/openarchivesprotocol.html#Identify |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
=cut |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
sub parse_granularity { |
1482
|
|
|
|
|
|
|
my $self=shift; |
1483
|
|
|
|
|
|
|
my ($gran) = @_; |
1484
|
|
|
|
|
|
|
if (!$gran or $gran->getLength==0) { |
1485
|
|
|
|
|
|
|
$self->log->fail("Missing granularity element"); |
1486
|
|
|
|
|
|
|
return; |
1487
|
|
|
|
|
|
|
} elsif ($gran->getLength>1) { |
1488
|
|
|
|
|
|
|
$self->log->fail("Multiple granularity elements"); |
1489
|
|
|
|
|
|
|
return; |
1490
|
|
|
|
|
|
|
} |
1491
|
|
|
|
|
|
|
#schema validation guarantees that there is a spec here |
1492
|
|
|
|
|
|
|
my $el=$gran->item(0)->getFirstChild->getData; |
1493
|
|
|
|
|
|
|
if ($el eq 'YYYY-MM-DD') { |
1494
|
|
|
|
|
|
|
$self->granularity('days'); |
1495
|
|
|
|
|
|
|
return($self->granularity); |
1496
|
|
|
|
|
|
|
} elsif ($el eq 'YYYY-MM-DDThh:mm:ssZ') { |
1497
|
|
|
|
|
|
|
$self->granularity('seconds'); |
1498
|
|
|
|
|
|
|
return($self->granularity); |
1499
|
|
|
|
|
|
|
} else { |
1500
|
|
|
|
|
|
|
$self->log->fail("Bad value for the granularity element '$el', must be either ". |
1501
|
|
|
|
|
|
|
"YYYY-MM-DD or YYYY-MM-DDThh:mm:ssZ"); |
1502
|
|
|
|
|
|
|
return; |
1503
|
|
|
|
|
|
|
} |
1504
|
|
|
|
|
|
|
} |
1505
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
=head3 get_datestamp_granularity($datestamp) |
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
Parse the datestamp supplied and return 'days' if it is valid with granularity |
1510
|
|
|
|
|
|
|
of days, 'seconds' if it is valid for seconds granularity, and nothing if it is not |
1511
|
|
|
|
|
|
|
valid. |
1512
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
# FIXME - should add more validation |
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
=cut |
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
sub get_datestamp_granularity { |
1518
|
|
|
|
|
|
|
my $self=shift; |
1519
|
|
|
|
|
|
|
my ($datestamp)=@_; |
1520
|
|
|
|
|
|
|
if ($datestamp=~/^(\d\d\d\d)-(\d\d)-(\d\d)$/) { |
1521
|
|
|
|
|
|
|
return 'days' if ($2>=1 and $2<=12 and $3>=1 and $3<=31); |
1522
|
|
|
|
|
|
|
} elsif ($datestamp=~/^(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)(\.\d+)?Z$/) { |
1523
|
|
|
|
|
|
|
return 'seconds' if ($2>=1 and $2<=12 and $3>=1 and $3<=31 and $4<24 and $5<60); |
1524
|
|
|
|
|
|
|
} |
1525
|
|
|
|
|
|
|
return; |
1526
|
|
|
|
|
|
|
} |
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
=head3 is_no_records_match |
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
Returns true if the current document contains and error code element with the code "noRecordsMatch" |
1532
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
### FIXME - should be merged into an extended is_error_response |
1534
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
=cut |
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
sub is_no_records_match { |
1538
|
|
|
|
|
|
|
my $self=shift; |
1539
|
|
|
|
|
|
|
my $error_elements = $self->doc->getElementsByTagName('error'); |
1540
|
|
|
|
|
|
|
return( $self->error_elements_include($error_elements, ['noRecordsMatch']) ); |
1541
|
|
|
|
|
|
|
} |
1542
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
=head3 get_resumption_token() |
1545
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
See if there is a resumptionToken with this response, return |
1547
|
|
|
|
|
|
|
value if present, empty if not or if there is some other error. |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
=cut |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
sub get_resumption_token { |
1552
|
|
|
|
|
|
|
my $self=shift; |
1553
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
my $tokenList = $self->doc->getElementsByTagName('resumptionToken'); |
1555
|
|
|
|
|
|
|
if ( !$tokenList or $tokenList->getLength()==0 ) { |
1556
|
|
|
|
|
|
|
return; #no resumptionToken |
1557
|
|
|
|
|
|
|
} |
1558
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
# Dig out the resumption token from the document |
1560
|
|
|
|
|
|
|
my $token = $tokenList->item(0)->getFirstChild(); |
1561
|
|
|
|
|
|
|
|
1562
|
|
|
|
|
|
|
# Try getting the resumption token, $token will be will be undefined |
1563
|
|
|
|
|
|
|
# unless the element has content |
1564
|
|
|
|
|
|
|
if ($token) { |
1565
|
|
|
|
|
|
|
return($token->getData()); |
1566
|
|
|
|
|
|
|
} |
1567
|
|
|
|
|
|
|
return; |
1568
|
|
|
|
|
|
|
} |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
|
1571
|
|
|
|
|
|
|
=head3 is_error_response($details) |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
Look at the parsed response in $self->doc to see if it is an error response, |
1574
|
|
|
|
|
|
|
parse data and return true if it is. |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
Returns true (a printable string containing the error messages) if response was a valid |
1577
|
|
|
|
|
|
|
OAI_PMH error response, codes in %$details if a hash reference is passed in. |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
=cut |
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
sub is_error_response { |
1582
|
|
|
|
|
|
|
my $self=shift; |
1583
|
|
|
|
|
|
|
my ($details)=@_; |
1584
|
|
|
|
|
|
|
$details={} unless (ref($details) eq 'HASH'); #dummy hash unless one supplied |
1585
|
|
|
|
|
|
|
# |
1586
|
|
|
|
|
|
|
my $error_elements = $self->doc->getElementsByTagName('error'); |
1587
|
|
|
|
|
|
|
if ($error_elements and $error_elements->getLength()>=1) { |
1588
|
|
|
|
|
|
|
my $msg=''; |
1589
|
|
|
|
|
|
|
for (my $i=0; $i<$error_elements->getLength; $i++) { |
1590
|
|
|
|
|
|
|
my $code=$error_elements->item($i)->getAttribute("code"); |
1591
|
|
|
|
|
|
|
my $child=$error_elements->item($i)->getFirstChild(); |
1592
|
|
|
|
|
|
|
unless ($child) { |
1593
|
|
|
|
|
|
|
# Warn about no content unless it is the special case of noSetHierarchy |
1594
|
|
|
|
|
|
|
# where the error code really is sufficient |
1595
|
|
|
|
|
|
|
unless ($code eq 'noSetHierarchy') { |
1596
|
|
|
|
|
|
|
$self->log->warn("No human readable message included in error element for ". |
1597
|
|
|
|
|
|
|
"$code error, this is discouraged"); |
1598
|
|
|
|
|
|
|
} |
1599
|
|
|
|
|
|
|
$details->{$code}='[NO MESSAGE RETURNED]'; |
1600
|
|
|
|
|
|
|
$msg.="[$code] "; |
1601
|
|
|
|
|
|
|
} else { |
1602
|
|
|
|
|
|
|
$details->{$code}=$child->getData(); |
1603
|
|
|
|
|
|
|
$msg.="[$code: $details->{$code}] "; |
1604
|
|
|
|
|
|
|
} |
1605
|
|
|
|
|
|
|
} |
1606
|
|
|
|
|
|
|
return($msg); |
1607
|
|
|
|
|
|
|
} else { |
1608
|
|
|
|
|
|
|
return; |
1609
|
|
|
|
|
|
|
} |
1610
|
|
|
|
|
|
|
} |
1611
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
=head3 get_admin_email() |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
Extract admin email from a parsed Identify response in $self->doc). |
1616
|
|
|
|
|
|
|
Also note that the email target may have been set via form option |
1617
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
Returns the pair of ($email,$error) where $email is the combined |
1619
|
|
|
|
|
|
|
set of email addresses (comma separated). $error will be undef |
1620
|
|
|
|
|
|
|
or a string with error message to users. |
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
=cut |
1623
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
sub get_admin_email { |
1625
|
|
|
|
|
|
|
my $self=shift; |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
my $adminEmailElements = $self->doc->getElementsByTagName('adminEmail'); |
1628
|
|
|
|
|
|
|
my @emails=(); |
1629
|
|
|
|
|
|
|
my $n = $adminEmailElements->getLength; |
1630
|
|
|
|
|
|
|
if ($n > 0) { |
1631
|
|
|
|
|
|
|
my $name_node = $adminEmailElements->item(0)->getFirstChild; |
1632
|
|
|
|
|
|
|
if ($name_node) { |
1633
|
|
|
|
|
|
|
for (my $i=0; $i<$n; $i++) { |
1634
|
|
|
|
|
|
|
my $e=$adminEmailElements->item($i)->getFirstChild->getData; |
1635
|
|
|
|
|
|
|
if ($e=~s/mailto://g) { |
1636
|
|
|
|
|
|
|
$self->log->warn("Stripped mailto: prefix from adminEmail address, this ". |
1637
|
|
|
|
|
|
|
"should not be included."); |
1638
|
|
|
|
|
|
|
} |
1639
|
|
|
|
|
|
|
if (my $msg=$self->bad_admin_email($e)) { |
1640
|
|
|
|
|
|
|
return(undef,$msg); |
1641
|
|
|
|
|
|
|
} |
1642
|
|
|
|
|
|
|
push(@emails,$e); |
1643
|
|
|
|
|
|
|
} |
1644
|
|
|
|
|
|
|
} else { |
1645
|
|
|
|
|
|
|
$self->log->fail("adminEmail element is empty!"); |
1646
|
|
|
|
|
|
|
return(undef); |
1647
|
|
|
|
|
|
|
} |
1648
|
|
|
|
|
|
|
} else { |
1649
|
|
|
|
|
|
|
$self->log->fail("No adminEmail element!"); |
1650
|
|
|
|
|
|
|
return(undef); |
1651
|
|
|
|
|
|
|
} |
1652
|
|
|
|
|
|
|
my $email=join(',',@emails); |
1653
|
|
|
|
|
|
|
$self->log->pass("Administrator email address is '$email'"); |
1654
|
|
|
|
|
|
|
return($email); |
1655
|
|
|
|
|
|
|
} |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
=head3 bad_admin_email($admin_email) |
1659
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
Check for some stupid email addresses to avoid so much bounced email. |
1661
|
|
|
|
|
|
|
Returns a string (True) if bad, else nothing. |
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
=cut |
1664
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
sub bad_admin_email { |
1666
|
|
|
|
|
|
|
my $self=shift; |
1667
|
|
|
|
|
|
|
my ($admin_email)=@_; |
1668
|
|
|
|
|
|
|
if ($admin_email=~/\@localhost$/) { |
1669
|
|
|
|
|
|
|
$self->log->fail("adminEmail '$admin_email' is local. This must be corrected to a ". |
1670
|
|
|
|
|
|
|
"valid globally resolvable email address before tests can continue"); |
1671
|
|
|
|
|
|
|
return("local adminEmail"); |
1672
|
|
|
|
|
|
|
} elsif ($admin_email!~/^\w[\w\-\.]+\@[a-zA-Z0-9\-\.]+\.[a-z]{2,}$/) { |
1673
|
|
|
|
|
|
|
$self->log->fail("adminEmail '$admin_email' looks bogus. This must be corrected to ". |
1674
|
|
|
|
|
|
|
"a valid email address before tests can continue"); |
1675
|
|
|
|
|
|
|
return("looks like bogus adminEmail"); |
1676
|
|
|
|
|
|
|
} |
1677
|
|
|
|
|
|
|
return; |
1678
|
|
|
|
|
|
|
} |
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
=head2 METHODS FOR MAKING REQUESTS AND PARSING RESPONSES |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
=head3 make_request_and_validate($verb, $req) |
1684
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
Given the base URL that we are validating, the Verb that we are checking |
1686
|
|
|
|
|
|
|
and the complete query to be sent to the OAI server, get the response to |
1687
|
|
|
|
|
|
|
the verb. Validation has already been done, so we need only do some |
1688
|
|
|
|
|
|
|
special checks here. Return the response to the OAI verb, |
1689
|
|
|
|
|
|
|
or undef if the OAI server failed to respond to that verb. |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
Side effects: errors may be printed and error_count bumped. |
1692
|
|
|
|
|
|
|
If the verb involved is "Identify" then set the version number and the |
1693
|
|
|
|
|
|
|
email address, assuming that some response has been obtained. |
1694
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
Simple well-formedness is checked by this routine. An undef exit means |
1696
|
|
|
|
|
|
|
that any calling code should fail the test but need not report 'no response'. |
1697
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
If the response is true then $self->doc contains a parsed XML |
1699
|
|
|
|
|
|
|
document. |
1700
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
This is the usual way we make requests with integrated parsing and error |
1702
|
|
|
|
|
|
|
checking. This method is built around calls to L and |
1703
|
|
|
|
|
|
|
L. |
1704
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
=cut |
1706
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
sub make_request_and_validate { |
1708
|
|
|
|
|
|
|
my $self=shift; |
1709
|
|
|
|
|
|
|
my ($verb, $req) = @_; |
1710
|
|
|
|
|
|
|
|
1711
|
|
|
|
|
|
|
my $response = $self->make_request($req); |
1712
|
|
|
|
|
|
|
|
1713
|
|
|
|
|
|
|
unless ( $response->is_success ) { |
1714
|
|
|
|
|
|
|
my $status = $response->status_line; |
1715
|
|
|
|
|
|
|
my $age = $response->current_age; |
1716
|
|
|
|
|
|
|
my $lifetime = $response->freshness_lifetime; |
1717
|
|
|
|
|
|
|
my $is_fresh = $response->is_fresh; |
1718
|
|
|
|
|
|
|
$self->log->fail("Server failed to respond to the $verb request (HTTP header ". |
1719
|
|
|
|
|
|
|
"values: status=$status, age=$age, lifetime=$lifetime, ". |
1720
|
|
|
|
|
|
|
"is fresh:=$is_fresh)"); |
1721
|
|
|
|
|
|
|
return; |
1722
|
|
|
|
|
|
|
} |
1723
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
unless ($self->parse_response($req, $response)) { |
1725
|
|
|
|
|
|
|
$self->log->fail("Failed to parse response"); |
1726
|
|
|
|
|
|
|
return; |
1727
|
|
|
|
|
|
|
} |
1728
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
# Check that the responseDate is in UTC format |
1730
|
|
|
|
|
|
|
$self->check_response_date($req,$self->doc); |
1731
|
|
|
|
|
|
|
# Check that the response refers to the "official" OAI schema |
1732
|
|
|
|
|
|
|
$self->check_schema_name($req,$self->doc); |
1733
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
return($response); |
1735
|
|
|
|
|
|
|
} |
1736
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
=head3 make_request($url,$post_data) |
1739
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
Routine to GET or POST a request, handle 503's, and return the response |
1741
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
Second parameter, $post_data, must be hasfref to POST data to indicate that |
1743
|
|
|
|
|
|
|
the request should be an HTTP POST request instead of a GET. |
1744
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
=cut |
1746
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
sub make_request { |
1748
|
|
|
|
|
|
|
my $self=shift; |
1749
|
|
|
|
|
|
|
my ($url,$post_data) = @_; |
1750
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
# Is this https and do we allow that? |
1752
|
|
|
|
|
|
|
if (is_https_uri($url)) { |
1753
|
|
|
|
|
|
|
$self->uses_https(1); |
1754
|
|
|
|
|
|
|
if (not $self->allow_https) { |
1755
|
|
|
|
|
|
|
$self->abort("URI $url is https. Use of https URIs is not allowed ". |
1756
|
|
|
|
|
|
|
"by the OAI-PMH v2.0 specification"); |
1757
|
|
|
|
|
|
|
} |
1758
|
|
|
|
|
|
|
} |
1759
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
my $request; |
1761
|
|
|
|
|
|
|
if ($post_data) { |
1762
|
|
|
|
|
|
|
my $content_msg=''; #nice string to report |
1763
|
|
|
|
|
|
|
# Sort keys in alpha order for consistent behavior |
1764
|
|
|
|
|
|
|
foreach my $k (sort keys(%$post_data)) { |
1765
|
|
|
|
|
|
|
my $v=$post_data->{$k}; |
1766
|
|
|
|
|
|
|
$content_msg.="$k:$v "; |
1767
|
|
|
|
|
|
|
} |
1768
|
|
|
|
|
|
|
$self->log->request($url,'POST',$content_msg); |
1769
|
|
|
|
|
|
|
$request = POST($url,'Content'=>$post_data); |
1770
|
|
|
|
|
|
|
} else { |
1771
|
|
|
|
|
|
|
$self->log->request($url,'GET'); |
1772
|
|
|
|
|
|
|
$request = GET($url); |
1773
|
|
|
|
|
|
|
} |
1774
|
|
|
|
|
|
|
my $response; |
1775
|
|
|
|
|
|
|
my $tries=0; |
1776
|
|
|
|
|
|
|
my $try_again = 1; |
1777
|
|
|
|
|
|
|
while ( $try_again ) { |
1778
|
|
|
|
|
|
|
#$ua->max_redirect(0); |
1779
|
|
|
|
|
|
|
$response = $self->ua->request($request); |
1780
|
|
|
|
|
|
|
# |
1781
|
|
|
|
|
|
|
# Write response if requested |
1782
|
|
|
|
|
|
|
if ($self->save_all_responses) { |
1783
|
|
|
|
|
|
|
my $response_file="/tmp/".$self->run_id.".".$self->response_number; |
1784
|
|
|
|
|
|
|
open(my $fh,'>',$response_file) || $self->abort("Can't write response $response_file: $!"); |
1785
|
|
|
|
|
|
|
print {$fh} $response->content(); |
1786
|
|
|
|
|
|
|
$self->log->note("Response saved as $response_file") if ($self->debug); |
1787
|
|
|
|
|
|
|
close($fh); |
1788
|
|
|
|
|
|
|
$self->{response_number}++; |
1789
|
|
|
|
|
|
|
} |
1790
|
|
|
|
|
|
|
$tries++; |
1791
|
|
|
|
|
|
|
if ($tries > $self->max_retries) { |
1792
|
|
|
|
|
|
|
$self->abort("Too many 503 Retry-After or 302 Redirect responses received in a row"); |
1793
|
|
|
|
|
|
|
} |
1794
|
|
|
|
|
|
|
# |
1795
|
|
|
|
|
|
|
# Check response for 503 and 302 |
1796
|
|
|
|
|
|
|
if ($response->code eq '503') { |
1797
|
|
|
|
|
|
|
# 503 (Retry-After), expect to get a time too |
1798
|
|
|
|
|
|
|
$self->uses_503(1); |
1799
|
|
|
|
|
|
|
if (defined $response->header("Retry-After")) { |
1800
|
|
|
|
|
|
|
my $retryAfter=$response->header("Retry-After"); |
1801
|
|
|
|
|
|
|
if ($retryAfter=~/^\d+$/) { |
1802
|
|
|
|
|
|
|
if ($retryAfter<=3600) { |
1803
|
|
|
|
|
|
|
###FIXME: Should check the Retry-After value carefully and barf if bad |
1804
|
|
|
|
|
|
|
my $sleep_time = 1 + $response->header("Retry-After"); |
1805
|
|
|
|
|
|
|
$self->log->note("Status: ".$response->code(). |
1806
|
|
|
|
|
|
|
" -- going to sleep for $sleep_time seconds."); |
1807
|
|
|
|
|
|
|
sleep $sleep_time; |
1808
|
|
|
|
|
|
|
} else { |
1809
|
|
|
|
|
|
|
$self->abort("503 response with Retry-After > 1hour (3600s), aborting"); |
1810
|
|
|
|
|
|
|
} |
1811
|
|
|
|
|
|
|
} else { |
1812
|
|
|
|
|
|
|
$self->log->fail("503 response with bad (non-numeric) Retry-After time, ". |
1813
|
|
|
|
|
|
|
"will wait 10s"); |
1814
|
|
|
|
|
|
|
sleep 10; |
1815
|
|
|
|
|
|
|
} |
1816
|
|
|
|
|
|
|
} else { |
1817
|
|
|
|
|
|
|
$self->log->warn("503 response without Retry-After time, will wait 10s"); |
1818
|
|
|
|
|
|
|
sleep 10; |
1819
|
|
|
|
|
|
|
} |
1820
|
|
|
|
|
|
|
} elsif ($response->code eq '302') { |
1821
|
|
|
|
|
|
|
# 302 (Found) redirect |
1822
|
|
|
|
|
|
|
my $loc=$response->header('Location'); |
1823
|
|
|
|
|
|
|
if ($loc!~m%^http://([^\?&]+)%) { |
1824
|
|
|
|
|
|
|
if (is_https_uri($loc)) { |
1825
|
|
|
|
|
|
|
$self->uses_https(1); |
1826
|
|
|
|
|
|
|
if (not $self->allow_https) { |
1827
|
|
|
|
|
|
|
$self->abort("Redirect URI specified in 302 response is https. Use of ". |
1828
|
|
|
|
|
|
|
"https URIs is not allowed by the OAI-PMH v2.0 specification"); |
1829
|
|
|
|
|
|
|
} |
1830
|
|
|
|
|
|
|
} else { |
1831
|
|
|
|
|
|
|
$self->abort("Bad redirect URI specified in 302 response"); |
1832
|
|
|
|
|
|
|
} |
1833
|
|
|
|
|
|
|
} |
1834
|
|
|
|
|
|
|
# Make new request |
1835
|
|
|
|
|
|
|
if ($post_data and $loc!~/\?/) { #don't do POST if new Location includes ? |
1836
|
|
|
|
|
|
|
$request = POST($loc,'Content'=>$post_data); |
1837
|
|
|
|
|
|
|
} else { |
1838
|
|
|
|
|
|
|
$request = GET($loc); |
1839
|
|
|
|
|
|
|
} |
1840
|
|
|
|
|
|
|
} elsif ($response->code eq '501') { |
1841
|
|
|
|
|
|
|
$self->abort("Got 501 Not Implemented response which may either have come from ". |
1842
|
|
|
|
|
|
|
"the server or have been generated within the validator because the ". |
1843
|
|
|
|
|
|
|
"request type (perhaps https) is not supported."); |
1844
|
|
|
|
|
|
|
} else { |
1845
|
|
|
|
|
|
|
$try_again=0; |
1846
|
|
|
|
|
|
|
} |
1847
|
|
|
|
|
|
|
} |
1848
|
|
|
|
|
|
|
# Check for oversize limit (indicated by X-Content-Range header) |
1849
|
|
|
|
|
|
|
if (defined $response->header('X-Content-Range')) { |
1850
|
|
|
|
|
|
|
$self->log->fail("Response to <$url> exceeds maximum size limit (".$self->max_size." bytes), discarded. ". |
1851
|
|
|
|
|
|
|
"While this limit is set only in this validation program you should not use excessively ". |
1852
|
|
|
|
|
|
|
"large responses as service providers will likely have problems parsing the XML. You ". |
1853
|
|
|
|
|
|
|
"should split the responses using the resumptionToken mechanism. (X-Content-Range: '". |
1854
|
|
|
|
|
|
|
$response->header('X-Content-Range')."' Content-Length: '".$response->content_length."')\n"); |
1855
|
|
|
|
|
|
|
$response->content(''); |
1856
|
|
|
|
|
|
|
} |
1857
|
|
|
|
|
|
|
return $response; |
1858
|
|
|
|
|
|
|
} |
1859
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
=head3 parse_response($request_url,$response,$xml_reason) |
1862
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
Attempt to parse the HTTP response $response, examining both the response code |
1864
|
|
|
|
|
|
|
and then attempting to parse the content as XML. |
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
If $xml_reason is specified then this is added to the failure message, if |
1867
|
|
|
|
|
|
|
nothing is specified then a standard message about UTF-8 issues is |
1868
|
|
|
|
|
|
|
added. |
1869
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
Returns true on success and sets $self->doc with the parsed XML document. |
1871
|
|
|
|
|
|
|
If unsuccessful, log an error message, bump the error count, and |
1872
|
|
|
|
|
|
|
return false. |
1873
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
=cut |
1875
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
sub parse_response { |
1877
|
|
|
|
|
|
|
my $self=shift; |
1878
|
|
|
|
|
|
|
my ($request_url,$response,$xml_reason) = @_; |
1879
|
|
|
|
|
|
|
$xml_reason='' unless (defined $xml_reason); |
1880
|
|
|
|
|
|
|
# |
1881
|
|
|
|
|
|
|
# Fail if reponse=undef, else check to see if response is ref to |
1882
|
|
|
|
|
|
|
# response object or is string |
1883
|
|
|
|
|
|
|
if (!defined($response) or not ref($response)) { |
1884
|
|
|
|
|
|
|
$self->log->warn("Bad response from server"); |
1885
|
|
|
|
|
|
|
return; |
1886
|
|
|
|
|
|
|
} |
1887
|
|
|
|
|
|
|
# Unpack the bits we want from response object |
1888
|
|
|
|
|
|
|
my $code=$response->code; |
1889
|
|
|
|
|
|
|
my $content=$response->content; |
1890
|
|
|
|
|
|
|
# Check return code (if given) |
1891
|
|
|
|
|
|
|
if ($code and $code=~/^[45]/) { |
1892
|
|
|
|
|
|
|
$self->log->warn("Bad HTTP status code from server: $code"); |
1893
|
|
|
|
|
|
|
return; |
1894
|
|
|
|
|
|
|
} |
1895
|
|
|
|
|
|
|
# |
1896
|
|
|
|
|
|
|
# Check content |
1897
|
|
|
|
|
|
|
my $doc; |
1898
|
|
|
|
|
|
|
eval { $doc=$self->parser->parse($content); }; |
1899
|
|
|
|
|
|
|
unless ( $doc ) { |
1900
|
|
|
|
|
|
|
my $err=$@; |
1901
|
|
|
|
|
|
|
$err=~s/^\s+//; |
1902
|
|
|
|
|
|
|
$err=~s%at\s+/usr/lib/perl.*%%i; #trim stuff about our perl installation |
1903
|
|
|
|
|
|
|
if ($request_url) { |
1904
|
|
|
|
|
|
|
unless ($xml_reason) { |
1905
|
|
|
|
|
|
|
$xml_reason="The most common reason for malformed responses is illegal bytes in ". |
1906
|
|
|
|
|
|
|
"UTF-8 streams (e.g. the inclusion of Latin1 characters with codes>127 ". |
1907
|
|
|
|
|
|
|
"without creating proper UTF-8 mutli-byte sequences). You might find ". |
1908
|
|
|
|
|
|
|
"the utf8conditioner, found on the OAI tools page helpful for debugging."; |
1909
|
|
|
|
|
|
|
} |
1910
|
|
|
|
|
|
|
$self->log->warn("Malformed response: $err. $xml_reason"); |
1911
|
|
|
|
|
|
|
} |
1912
|
|
|
|
|
|
|
return; |
1913
|
|
|
|
|
|
|
} |
1914
|
|
|
|
|
|
|
# Set parsed document |
1915
|
|
|
|
|
|
|
$self->doc( $doc ); |
1916
|
|
|
|
|
|
|
return(1); |
1917
|
|
|
|
|
|
|
} |
1918
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
|
1920
|
|
|
|
|
|
|
=head2 UTILITY FUNCTIONS |
1921
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
=head3 html_escape($str) |
1923
|
|
|
|
|
|
|
|
1924
|
|
|
|
|
|
|
Escapes characters which have special meanings in HTML |
1925
|
|
|
|
|
|
|
|
1926
|
|
|
|
|
|
|
=cut |
1927
|
|
|
|
|
|
|
|
1928
|
|
|
|
|
|
|
sub html_escape { |
1929
|
|
|
|
|
|
|
my $string = shift; |
1930
|
|
|
|
|
|
|
$string =~ s/&/&/g; #must be first! |
1931
|
|
|
|
|
|
|
$string =~ s/</g; |
1932
|
|
|
|
|
|
|
$string =~ s/>/>/g; |
1933
|
|
|
|
|
|
|
$string =~ s/"/"/g; |
1934
|
|
|
|
|
|
|
$string =~ s/'/'/g; |
1935
|
|
|
|
|
|
|
return $string; |
1936
|
|
|
|
|
|
|
} |
1937
|
|
|
|
|
|
|
|
1938
|
|
|
|
|
|
|
=head3 one_year_before($date) |
1939
|
|
|
|
|
|
|
|
1940
|
|
|
|
|
|
|
Assumes properly formatted date, decrements year by one |
1941
|
|
|
|
|
|
|
via string manipulation and returns date. |
1942
|
|
|
|
|
|
|
|
1943
|
|
|
|
|
|
|
=cut |
1944
|
|
|
|
|
|
|
|
1945
|
|
|
|
|
|
|
sub one_year_before { |
1946
|
|
|
|
|
|
|
my ($date)=@_; |
1947
|
|
|
|
|
|
|
my ($year) = $date =~ /^([0-9]{4})/; |
1948
|
|
|
|
|
|
|
my $year_minus_one = sprintf('%04d',($year - 1)); #make sure we get leading zeros |
1949
|
|
|
|
|
|
|
$date =~ s/^$year/$year_minus_one/; |
1950
|
|
|
|
|
|
|
return($date); |
1951
|
|
|
|
|
|
|
} |
1952
|
|
|
|
|
|
|
|
1953
|
|
|
|
|
|
|
=head3 url_encode($str) |
1954
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
Escape/encode any characters that aren't in the small safe set for URLs |
1956
|
|
|
|
|
|
|
|
1957
|
|
|
|
|
|
|
=cut |
1958
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
sub url_encode { |
1960
|
|
|
|
|
|
|
my $str=shift; |
1961
|
|
|
|
|
|
|
$str =~ s/([^\w\/\,\- ])/sprintf("%%%02X",ord($1))/eg; |
1962
|
|
|
|
|
|
|
$str =~ tr/ /+/; |
1963
|
|
|
|
|
|
|
return($str); |
1964
|
|
|
|
|
|
|
} |
1965
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
=head3 is_https_uri($uri) |
1968
|
|
|
|
|
|
|
|
1969
|
|
|
|
|
|
|
Return true if the URI is an https URI, false otherwise. |
1970
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
=cut |
1972
|
|
|
|
|
|
|
|
1973
|
|
|
|
|
|
|
sub is_https_uri { |
1974
|
|
|
|
|
|
|
my $uri=shift; |
1975
|
|
|
|
|
|
|
return($uri=~m%^https:%); |
1976
|
|
|
|
|
|
|
} |
1977
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
=head3 sanitize($str) |
1980
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
Return a sanitized version of $str that doesn't contain odd |
1982
|
|
|
|
|
|
|
characters and it not over 80 chars long. Will have the |
1983
|
|
|
|
|
|
|
string '(sanitized)' appended if changed. |
1984
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
=cut |
1986
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
sub sanitize { |
1988
|
|
|
|
|
|
|
my ($str)=@_; |
1989
|
|
|
|
|
|
|
my $out=$str; |
1990
|
|
|
|
|
|
|
$out=~s/[^\w\-:;.!@#%^*\(\) ]/_/g; |
1991
|
|
|
|
|
|
|
$out=substr($out,0,80); |
1992
|
|
|
|
|
|
|
if ($out ne $str) { |
1993
|
|
|
|
|
|
|
$out.='(sanitized)'; |
1994
|
|
|
|
|
|
|
} |
1995
|
|
|
|
|
|
|
return($out); |
1996
|
|
|
|
|
|
|
} |
1997
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
=head1 SUPPORT |
2000
|
|
|
|
|
|
|
|
2001
|
|
|
|
|
|
|
Please report any bugs of questions about validation via the |
2002
|
|
|
|
|
|
|
OAI-PMH discussion list at L. |
2003
|
|
|
|
|
|
|
Be sure to make it clear that you are talking about the |
2004
|
|
|
|
|
|
|
HTTP::OAIPMH::Validator module. |
2005
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
=head1 AUTHORS |
2007
|
|
|
|
|
|
|
|
2008
|
|
|
|
|
|
|
Simeon Warner, Donna Bergmark |
2009
|
|
|
|
|
|
|
|
2010
|
|
|
|
|
|
|
=head1 HISTORY |
2011
|
|
|
|
|
|
|
|
2012
|
|
|
|
|
|
|
This module is based on an OAI-PMH validator first written by Donna Bergmark |
2013
|
|
|
|
|
|
|
(Cornell University) in 2001-01 for the OAI-PMH validation and registration |
2014
|
|
|
|
|
|
|
service (L). |
2015
|
|
|
|
|
|
|
Simeon Warner (Cornell University) took over the validator and operation of |
2016
|
|
|
|
|
|
|
the registration service in 2004-01, and then did a significant tidy/rework |
2017
|
|
|
|
|
|
|
of the code. That code ran the validation and registration service with |
2018
|
|
|
|
|
|
|
few changes through 2015-01. Some of the early work on the OAI-PMH validation |
2019
|
|
|
|
|
|
|
service was supported through NSF award number 0127308. |
2020
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
Code was abstracted into this module 2015-01 by Simeon Warner and is |
2022
|
|
|
|
|
|
|
used for the OAI-PMH validation and registration service on |
2023
|
|
|
|
|
|
|
L. |
2024
|
|
|
|
|
|
|
|
2025
|
|
|
|
|
|
|
=head1 COPYRIGHT |
2026
|
|
|
|
|
|
|
|
2027
|
|
|
|
|
|
|
Copyright 2001..2017 by Simeon Warner, Donna Bergmark. |
2028
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify it under |
2030
|
|
|
|
|
|
|
the same terms as Perl itself. |
2031
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
=cut |
2033
|
|
|
|
|
|
|
|
2034
|
|
|
|
|
|
|
1; |