| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTTP::OAIPMH::Validator; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
HTTP::OAIPMH::Validator - OAI-PMH validator class |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Validation suite for OAI-PMH data providers that checks for responses |
|
10
|
|
|
|
|
|
|
in accord with OAI-PMH v2 |
|
11
|
|
|
|
|
|
|
L. |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Typical use: |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use HTTP::OAIPMH::Validator; |
|
16
|
|
|
|
|
|
|
use Try::Tiny; |
|
17
|
|
|
|
|
|
|
my $val = HTTP::OAIPMH::Validator->new( base_url=>'http://example.com/oai' ); |
|
18
|
|
|
|
|
|
|
try { |
|
19
|
|
|
|
|
|
|
$val->run_complete_validation; |
|
20
|
|
|
|
|
|
|
} catch { |
|
21
|
|
|
|
|
|
|
warn "oops, validation didn't run to completion: $!\n"; |
|
22
|
|
|
|
|
|
|
}; |
|
23
|
|
|
|
|
|
|
print "Validation status of data provider ".$val->base_url." is ".$val->status."\n"; |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=cut |
|
26
|
|
|
|
|
|
|
|
|
27
|
1
|
|
|
1
|
|
37934
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
47
|
|
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
our $VERSION = '1.05'; |
|
30
|
|
|
|
|
|
|
|
|
31
|
1
|
|
|
1
|
|
3
|
use base qw(Class::Accessor::Fast); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
476
|
|
|
32
|
1
|
|
|
1
|
|
2335
|
use Data::UUID; |
|
|
1
|
|
|
|
|
566
|
|
|
|
1
|
|
|
|
|
49
|
|
|
33
|
1
|
|
|
1
|
|
361
|
use Date::Manip; |
|
|
1
|
|
|
|
|
100517
|
|
|
|
1
|
|
|
|
|
119
|
|
|
34
|
1
|
|
|
1
|
|
397
|
use HTTP::Request; # for rendering http queries |
|
|
1
|
|
|
|
|
732
|
|
|
|
1
|
|
|
|
|
20
|
|
|
35
|
1
|
|
|
1
|
|
5
|
use HTTP::Headers; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
18
|
|
|
36
|
1
|
|
|
1
|
|
431
|
use HTTP::Request::Common; # makes POST easier |
|
|
1
|
|
|
|
|
1597
|
|
|
|
1
|
|
|
|
|
48
|
|
|
37
|
1
|
|
|
1
|
|
5
|
use HTTP::Status; # for checking error codes |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
218
|
|
|
38
|
1
|
|
|
1
|
|
667
|
use LWP::UserAgent; # send http requests |
|
|
1
|
|
|
|
|
9853
|
|
|
|
1
|
|
|
|
|
27
|
|
|
39
|
1
|
|
|
1
|
|
494
|
use LWP::Protocol::https; # explicit include so we fail without https support |
|
|
1
|
|
|
|
|
67356
|
|
|
|
1
|
|
|
|
|
43
|
|
|
40
|
1
|
|
|
1
|
|
7
|
use URI::Escape; # excape special characters |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
53
|
|
|
41
|
1
|
|
|
1
|
|
764
|
use XML::DOM; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
use HTTP::OAIPMH::Log; |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head2 METHODS |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head3 new(%args) |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
Create new HTTP::OAIPMH::Validator object and initialize counters. |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
The following instance variables may be set via %args and have read-write |
|
51
|
|
|
|
|
|
|
accessors (via L): |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
base_url - base URL of the data provdier being validated |
|
54
|
|
|
|
|
|
|
run_id - UUID identifying the run (will be generated if none supplied) |
|
55
|
|
|
|
|
|
|
protocol_version - protocol version supported |
|
56
|
|
|
|
|
|
|
admin_email - admin email extracted from Identify response |
|
57
|
|
|
|
|
|
|
granularity - datestamp granularity (defaults to 'days', else 'seconds') |
|
58
|
|
|
|
|
|
|
uses_https - set true if the validator sees an https URL at any stage |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
debug - set true to add extra debugging output |
|
61
|
|
|
|
|
|
|
log - logging object (usually L) |
|
62
|
|
|
|
|
|
|
parser - XML DOM parser instance |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
identify_response - string of identify response (used for registration record) |
|
65
|
|
|
|
|
|
|
earliest_datestamp - value extracted from earliestDatestamp in Identify response |
|
66
|
|
|
|
|
|
|
namespace_id - if the oai-identifier is used then this records the namespace identifier extracted |
|
67
|
|
|
|
|
|
|
set_names - array of all the set names reported in listSets |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
example_record_id - example id used for tests that require a specific identifier |
|
70
|
|
|
|
|
|
|
example_set_spec - example setSpec ("&set=name") used for tests that require a set |
|
71
|
|
|
|
|
|
|
example_metadata_prefix - example metadataPrefix which defaults to 'oai_dc' |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
HTTP::OAIPMH::Validator->mk_accessors( qw( base_url protocol_version |
|
76
|
|
|
|
|
|
|
admin_email granularity uses_503 uses_https |
|
77
|
|
|
|
|
|
|
debug parser run_id ua allow_https doc save_all_responses |
|
78
|
|
|
|
|
|
|
response_number http_timeout max_retries max_size |
|
79
|
|
|
|
|
|
|
protocol guidelines |
|
80
|
|
|
|
|
|
|
identify_response earliest_datestamp namespace_id set_names |
|
81
|
|
|
|
|
|
|
example_record_id example_set_spec example_metadata_prefix |
|
82
|
|
|
|
|
|
|
log status |
|
83
|
|
|
|
|
|
|
) ); |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub new { |
|
86
|
|
|
|
|
|
|
my $this=shift; |
|
87
|
|
|
|
|
|
|
my $class=ref($this) || $this; |
|
88
|
|
|
|
|
|
|
my $self={ |
|
89
|
|
|
|
|
|
|
'base_url' => undef, |
|
90
|
|
|
|
|
|
|
'protocol_version' => undef, |
|
91
|
|
|
|
|
|
|
# Repository features extracted |
|
92
|
|
|
|
|
|
|
'granularity' => 'days', # can also be "seconds" |
|
93
|
|
|
|
|
|
|
'uses_503' => 0, # set true if 503 responses ever used |
|
94
|
|
|
|
|
|
|
'uses_https' => 0, # set to true if https is ever used |
|
95
|
|
|
|
|
|
|
# Control |
|
96
|
|
|
|
|
|
|
'debug' => 0, |
|
97
|
|
|
|
|
|
|
'parser' => XML::DOM::Parser->new(), |
|
98
|
|
|
|
|
|
|
'run_id' => undef, |
|
99
|
|
|
|
|
|
|
'ua' => undef, |
|
100
|
|
|
|
|
|
|
'allow_https' => 0, # allow https URIs |
|
101
|
|
|
|
|
|
|
'doc' => undef, # current parsed xml document |
|
102
|
|
|
|
|
|
|
'save_all_responses' => 0, # set True to save all HTTP responses |
|
103
|
|
|
|
|
|
|
'response_number' => 1, # initial response number |
|
104
|
|
|
|
|
|
|
'http_timeout' => 600, |
|
105
|
|
|
|
|
|
|
'max_retries' => 5, # number of 503's in a row that we will accept |
|
106
|
|
|
|
|
|
|
'max_size' => 100000000, # max response size in bytes (100MB) |
|
107
|
|
|
|
|
|
|
'protocol' => 'http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm', #URL of protocol spec |
|
108
|
|
|
|
|
|
|
'guidelines' => 'http://www.openarchives.org/OAI/2.0/guidelines-repository.htm', #URL of repository guidelines doc |
|
109
|
|
|
|
|
|
|
# Results |
|
110
|
|
|
|
|
|
|
'namespace_id' => undef, |
|
111
|
|
|
|
|
|
|
'set_names' => [], |
|
112
|
|
|
|
|
|
|
'example_record_id' => undef, |
|
113
|
|
|
|
|
|
|
'example_set_spec' => undef, |
|
114
|
|
|
|
|
|
|
'example_metadata_prefix' => 'oai_dc', |
|
115
|
|
|
|
|
|
|
'log' => HTTP::OAIPMH::Log->new(), |
|
116
|
|
|
|
|
|
|
'status' => 'unknown', |
|
117
|
|
|
|
|
|
|
@_}; |
|
118
|
|
|
|
|
|
|
bless($self, $class); |
|
119
|
|
|
|
|
|
|
$self->setup_run_id if (not $self->run_id); |
|
120
|
|
|
|
|
|
|
$self->setup_user_agent if (not $self->ua); |
|
121
|
|
|
|
|
|
|
return($self); |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head3 setup_run_id() |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Set a UUID for the run_id. |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=cut |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub setup_run_id { |
|
131
|
|
|
|
|
|
|
my $self=shift; |
|
132
|
|
|
|
|
|
|
my $ug=Data::UUID->new; |
|
133
|
|
|
|
|
|
|
$self->run_id(lc($ug->to_string($ug->create))); |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head3 setup_user_agent() |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Setup L for the validator. |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=cut |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub setup_user_agent { |
|
143
|
|
|
|
|
|
|
my $self=shift; |
|
144
|
|
|
|
|
|
|
my $ua = LWP::UserAgent->new(); # User agent, to render http requests |
|
145
|
|
|
|
|
|
|
$ua->timeout($self->http_timeout); # give responses 10 minutes |
|
146
|
|
|
|
|
|
|
$ua->max_size($self->max_size); # size limit ##seems to break http://eprints.soton.ac.uk/perl/oai2 [Simeon/2005-06-06] |
|
147
|
|
|
|
|
|
|
$ua->requests_redirectable([]); # we will do redirects manually |
|
148
|
|
|
|
|
|
|
$ua->agent('OAIPMH_Validator'); # set user agent |
|
149
|
|
|
|
|
|
|
$ua->from('https://groups.google.com/d/forum/oai-pmh'); # set a default From: address -> direct to google group for dicussion |
|
150
|
|
|
|
|
|
|
$self->ua($ua); |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=head3 abort($msg) |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Special purpose "die" routine because tests cannot continue. Logs |
|
157
|
|
|
|
|
|
|
failure and then dies. |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=cut |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub abort { |
|
162
|
|
|
|
|
|
|
my $self=shift; |
|
163
|
|
|
|
|
|
|
my ($msg)=@_; |
|
164
|
|
|
|
|
|
|
$self->log->fail('ABORT: '.$msg); |
|
165
|
|
|
|
|
|
|
$self->status('FAILED'); |
|
166
|
|
|
|
|
|
|
die('ABORT: '.$msg."\n"); |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head3 run_complete_validation($skip_test_identify) |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Run all tests for a complete validation and return true is the data provider passes, |
|
173
|
|
|
|
|
|
|
false otherwise. All actions are logged and may be accessed to provide a report |
|
174
|
|
|
|
|
|
|
(including warnings that do not indicate failure) after the run. |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Arguments: |
|
177
|
|
|
|
|
|
|
$skip_identify - set true to skip the text_identify() step |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=cut |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub run_complete_validation { |
|
182
|
|
|
|
|
|
|
my $self=shift; |
|
183
|
|
|
|
|
|
|
my ($skip_identify)=@_; |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
$self->response_number(1); |
|
186
|
|
|
|
|
|
|
$self->test_identify unless ($skip_identify); |
|
187
|
|
|
|
|
|
|
$self->test_list_sets; |
|
188
|
|
|
|
|
|
|
$self->test_list_identifiers; |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
my $baseURL = $self->base_url; |
|
191
|
|
|
|
|
|
|
my ($formats, $gotDC) = $self->test_list_metadata_formats; |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# If the repository doesn't support oai_dc then this is a failure (because |
|
194
|
|
|
|
|
|
|
# the standard demands it) but see whether we can find another metadataPrefix |
|
195
|
|
|
|
|
|
|
# in order to continue the tests |
|
196
|
|
|
|
|
|
|
if ( $gotDC ) { |
|
197
|
|
|
|
|
|
|
$self->log->pass("Data provider supports oai_dc metadataPrefix"); |
|
198
|
|
|
|
|
|
|
} else { |
|
199
|
|
|
|
|
|
|
if ($formats and $formats->getLength()>0) { |
|
200
|
|
|
|
|
|
|
$self->example_metadata_prefix( $formats->item(0)->getFirstChild->getData ); |
|
201
|
|
|
|
|
|
|
$self->log->fail("Data provider does not support the simple Dublin Core metadata ". |
|
202
|
|
|
|
|
|
|
"format with metadataPrefix oai_dc. Tests that require a ". |
|
203
|
|
|
|
|
|
|
"metadataPrefix to be specified will use '". |
|
204
|
|
|
|
|
|
|
$self->example_metadata_prefix."'"); |
|
205
|
|
|
|
|
|
|
} else { |
|
206
|
|
|
|
|
|
|
$self->log->fail("There are no metadata formats available to use with the GetRecord ". |
|
207
|
|
|
|
|
|
|
"request. The metadataPrefix ". |
|
208
|
|
|
|
|
|
|
$self->example_metadata_prefix. |
|
209
|
|
|
|
|
|
|
" will be used for later tests even though it seems unsupported."); |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
my ($dateStamp)=$self->test_get_record($self->example_record_id,$self->example_metadata_prefix); |
|
214
|
|
|
|
|
|
|
$self->test_list_records($dateStamp,$self->example_metadata_prefix); |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# Check responses to erroneous queries |
|
217
|
|
|
|
|
|
|
$self->test_expected_errors($self->example_record_id); |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
if ($self->protocol_version eq '2.0') { |
|
220
|
|
|
|
|
|
|
$self->test_expected_v2_errors($self->earliest_datestamp,$self->example_metadata_prefix); |
|
221
|
|
|
|
|
|
|
# As of version 2.0, data providers must support HTTP POST requests |
|
222
|
|
|
|
|
|
|
$self->test_post_requests($self->example_metadata_prefix); |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
$self->test_resumption_tokens; |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# Getting here with no failures means that the data provider is compliant |
|
227
|
|
|
|
|
|
|
# (there may be warnings which are not counted in num_fail) |
|
228
|
|
|
|
|
|
|
$self->status( $self->log->num_fail==0 ? 'COMPLIANT' : 'FAILED' ); |
|
229
|
|
|
|
|
|
|
return($self->log->num_fail==0); |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head3 failures() |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
Return Markdown summary of failure log entries, along with the appropriate |
|
236
|
|
|
|
|
|
|
titles and request details. Will return empty string if there are no |
|
237
|
|
|
|
|
|
|
failures in the log. |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=cut |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub failures { |
|
242
|
|
|
|
|
|
|
my $self=shift; |
|
243
|
|
|
|
|
|
|
return($self->log->failures()); |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head3 summary() |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Return summary statistics for the validation in Markdown (designed to agree |
|
250
|
|
|
|
|
|
|
with conversion to HTML by L). |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=cut |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub summary { |
|
255
|
|
|
|
|
|
|
my $self=shift; |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
my $sf=($self->log->num_fail>0?'failure':'success'); |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
my $str="\n## Summary - *$sf*\n\n"; |
|
260
|
|
|
|
|
|
|
my $namespace_id = $self->namespace_id; |
|
261
|
|
|
|
|
|
|
if ($namespace_id) { |
|
262
|
|
|
|
|
|
|
if ($namespace_id=~/\./) { #v2.0 |
|
263
|
|
|
|
|
|
|
$str.=" * Namespace declared for v2.0 oai-identifiers is $namespace_id\n"; |
|
264
|
|
|
|
|
|
|
} else { #v1.1 |
|
265
|
|
|
|
|
|
|
$str.=" * Namespace declared for v1.1 oai-identifiers (the repositoryIdentifier) is $namespace_id\n"; |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
$str.=" * Uses 503 for flow control\n" if ($self->uses_503); |
|
269
|
|
|
|
|
|
|
$str.=" * Uses https URIs (not specified in protocol)\n" if ($self->uses_https); |
|
270
|
|
|
|
|
|
|
$str.=" * Total tests passed: ".$self->log->num_pass."\n"; |
|
271
|
|
|
|
|
|
|
$str.=" * Total warnings: ".$self->log->num_warn."\n"; |
|
272
|
|
|
|
|
|
|
$str.=" * Total error count: ".$self->log->num_fail."\n"; |
|
273
|
|
|
|
|
|
|
$str.=" * Validation status: ".($self->status || 'unknown')."\n"; |
|
274
|
|
|
|
|
|
|
return($str); |
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=head2 METHODS TESTING SPECIFIC OAI-PMH VERBS |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=head3 test_identify() |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
Check response to an Identify request. Returns false if tests cannot |
|
283
|
|
|
|
|
|
|
continue, true otherwise. |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
Side effects based on values extracted: |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
- $self->admin_email set to email extracted from adminEmail element |
|
288
|
|
|
|
|
|
|
- $self->granularity set to 'days' or 'seconds' |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=cut |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
sub test_identify { |
|
293
|
|
|
|
|
|
|
my $self=shift; |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
my $cantContinue=0; |
|
296
|
|
|
|
|
|
|
$self->log->start("Checking Identify response"); |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# Send the verb request to the base URL - vet extracts the email address |
|
299
|
|
|
|
|
|
|
my $burl=$self->base_url; |
|
300
|
|
|
|
|
|
|
my $req = $burl."?verb=Identify"; |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
my $response = $self->make_request($req); #don't use make_request_and_validate() just do simplest thing here |
|
303
|
|
|
|
|
|
|
unless ($response->is_success) { |
|
304
|
|
|
|
|
|
|
my $r="Server at base URL '$burl' failed to respond to Identify. The HTTP GET request with URL $req received response code '".$response->code()."'."; |
|
305
|
|
|
|
|
|
|
if ($response->code() == 301) { |
|
306
|
|
|
|
|
|
|
$self->log->fail("$r HTTP code 301 'Moved Permanently' is not widely supported by ". |
|
307
|
|
|
|
|
|
|
"harvesters and is anyway inappropriate for registration of a ". |
|
308
|
|
|
|
|
|
|
"service. If requests must be redirected then an HTTP response 302 ". |
|
309
|
|
|
|
|
|
|
"may be used as outlined in the guidelines [". |
|
310
|
|
|
|
|
|
|
$self->guidelines."#LoadBalancing]."); |
|
311
|
|
|
|
|
|
|
} else { |
|
312
|
|
|
|
|
|
|
$self->log->fail($r); |
|
313
|
|
|
|
|
|
|
} |
|
314
|
|
|
|
|
|
|
$self->abort("Failed to get Identify response from server at base URL '$burl'.\n"); |
|
315
|
|
|
|
|
|
|
return; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# Parse the XML response |
|
319
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
|
320
|
|
|
|
|
|
|
$self->log->fail("Failed to parse Identify response"); |
|
321
|
|
|
|
|
|
|
$self->abort("Failed to parse Identify response from server at base URL '$burl'.\n"); |
|
322
|
|
|
|
|
|
|
} |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# Check that this really is a Identify response |
|
325
|
|
|
|
|
|
|
my $oaipmhNode=$self->doc->getFirstChild(); |
|
326
|
|
|
|
|
|
|
# skip over and processing instructions such as XML stylesheets |
|
327
|
|
|
|
|
|
|
while ($oaipmhNode->getNodeType==PROCESSING_INSTRUCTION_NODE or |
|
328
|
|
|
|
|
|
|
$oaipmhNode->getNodeType==COMMENT_NODE) { |
|
329
|
|
|
|
|
|
|
$oaipmhNode=$oaipmhNode->getNextSibling(); |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
unless (defined $oaipmhNode and $oaipmhNode->getNodeName eq 'OAI-PMH') { |
|
332
|
|
|
|
|
|
|
$self->log->fail("Identify response does not have OAI-PMH as root element! ". |
|
333
|
|
|
|
|
|
|
"Found node named '".$oaipmhNode->getNodeName."' instead"); |
|
334
|
|
|
|
|
|
|
$self->abort("Identify response from server at base URL '$burl' does not have ". |
|
335
|
|
|
|
|
|
|
"OAI-PMH as root element!\n"); |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
my $identifyNode=$oaipmhNode->getElementsByTagName('Identify',0); |
|
338
|
|
|
|
|
|
|
unless ($identifyNode->getLength()>0) { |
|
339
|
|
|
|
|
|
|
my $errorNode=$oaipmhNode->getElementsByTagName('error',0); |
|
340
|
|
|
|
|
|
|
if ($errorNode->getLength()>0) { |
|
341
|
|
|
|
|
|
|
# give specific message if response is and error |
|
342
|
|
|
|
|
|
|
$self->log->fail("Error response to Identify request!\n"); |
|
343
|
|
|
|
|
|
|
$self->abort("Error response to Identify request from server at base URL '$burl'.\n"); |
|
344
|
|
|
|
|
|
|
return; |
|
345
|
|
|
|
|
|
|
} else { |
|
346
|
|
|
|
|
|
|
$self->log->fail("Identify response does not contain <Identify> block.\n"); |
|
347
|
|
|
|
|
|
|
$self->abort("Identify response does not contain Identify block from server at base URL '$burl'.\n"); |
|
348
|
|
|
|
|
|
|
return; |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# Extract admin email and protocol version numbers, check |
|
353
|
|
|
|
|
|
|
my ($admin_email,$email_error)=$self->get_admin_email; |
|
354
|
|
|
|
|
|
|
if (not $admin_email or $email_error) { |
|
355
|
|
|
|
|
|
|
$self->abort(($email_error || "Failed to extract adminEmail").", aborting.\n"); |
|
356
|
|
|
|
|
|
|
return; |
|
357
|
|
|
|
|
|
|
} |
|
358
|
|
|
|
|
|
|
$self->admin_email($admin_email); |
|
359
|
|
|
|
|
|
|
$self->check_protocol_version; # bails if not Version 2.0 |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# URL is valid, Identify response was provided, extract content as string |
|
362
|
|
|
|
|
|
|
$self->identify_response( $response->content ); |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
my $baseURL = $self->doc->getElementsByTagName('baseURL'); |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# BUG FOUND ON AUGUST 26, 2002: empty baseURL still returns length > 0 |
|
367
|
|
|
|
|
|
|
# So it is necessary to explicity check for an empty element. |
|
368
|
|
|
|
|
|
|
if ( $baseURL->getLength() > 0 ) { |
|
369
|
|
|
|
|
|
|
$baseURL = $baseURL->item(0)->getFirstChild; |
|
370
|
|
|
|
|
|
|
if ( $baseURL ) { $baseURL = $baseURL->getData; } |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# $burl is the one given on the form; $baseURL is the one in the XML doc. |
|
373
|
|
|
|
|
|
|
if ($burl eq $baseURL) { |
|
374
|
|
|
|
|
|
|
$self->log->pass("baseURL supplied matches the Identify response"); |
|
375
|
|
|
|
|
|
|
} else { |
|
376
|
|
|
|
|
|
|
# report the error, but keep the form URL |
|
377
|
|
|
|
|
|
|
# (at least it answered Identify!) |
|
378
|
|
|
|
|
|
|
$self->log->fail("baseURL supplied '$burl' does not match the baseURL in the ". |
|
379
|
|
|
|
|
|
|
"Identify response '$baseURL'. The baseURL you enter must EXACTLY ". |
|
380
|
|
|
|
|
|
|
"match the baseURL returned in the Identify response. It must ". |
|
381
|
|
|
|
|
|
|
"match in case (http://Wibble.org/ does not match http://wibble.org/) ". |
|
382
|
|
|
|
|
|
|
"and include any trailing slashes etc."); |
|
383
|
|
|
|
|
|
|
$cantContinue++; |
|
384
|
|
|
|
|
|
|
} |
|
385
|
|
|
|
|
|
|
} |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# For Version 2.0, Check for seconds granularity |
|
388
|
|
|
|
|
|
|
if ($self->protocol_version eq '2.0') { |
|
389
|
|
|
|
|
|
|
my $gran_el = $self->doc->getElementsByTagName('granularity'); |
|
390
|
|
|
|
|
|
|
if ($self->parse_granularity($gran_el)) { |
|
391
|
|
|
|
|
|
|
$self->log->pass("Datestamp granularity is '".$self->granularity."'"); |
|
392
|
|
|
|
|
|
|
} else { |
|
393
|
|
|
|
|
|
|
$cantContinue++; |
|
394
|
|
|
|
|
|
|
} |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# For an exception check new to Version 2.0, extract the earliest date |
|
398
|
|
|
|
|
|
|
# and also check that its granularity is right |
|
399
|
|
|
|
|
|
|
if (my $err=$self->get_earliest_datestamp) { |
|
400
|
|
|
|
|
|
|
$self->log->fail("Bad earliestDatestamp: $err"); |
|
401
|
|
|
|
|
|
|
$cantContinue++; |
|
402
|
|
|
|
|
|
|
} else { |
|
403
|
|
|
|
|
|
|
$self->log->pass("Extracted earliestDatestamp ".$self->earliest_datestamp); |
|
404
|
|
|
|
|
|
|
} |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# Check for OAI-identifier. If already in use by another base URL, bump |
|
407
|
|
|
|
|
|
|
# the error count to avoid having this URL register. |
|
408
|
|
|
|
|
|
|
# |
|
409
|
|
|
|
|
|
|
my $oaiIds = $self->doc->getElementsByTagName('oai-identifier'); |
|
410
|
|
|
|
|
|
|
if ($oaiIds and $oaiIds->getLength()>0) { |
|
411
|
|
|
|
|
|
|
if ($oaiIds->getLength()>1) { |
|
412
|
|
|
|
|
|
|
$self->log->fail("Found more than one oai-identifier element. The intention ". |
|
413
|
|
|
|
|
|
|
"is that this declaration only be used by repositories ". |
|
414
|
|
|
|
|
|
|
"declaring the use of a single identifier namespace."); |
|
415
|
|
|
|
|
|
|
$cantContinue++; |
|
416
|
|
|
|
|
|
|
} else { |
|
417
|
|
|
|
|
|
|
$oaiIds=$oaiIds->item(0); |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
# Now find out if this is v1.1 or v2.0 |
|
420
|
|
|
|
|
|
|
my $oai_id_version='2.0'; |
|
421
|
|
|
|
|
|
|
if (my $xmlns=$oaiIds->getAttribute('xmlns')) { #FIXME this requires default namespace to be set to oai-id |
|
422
|
|
|
|
|
|
|
if ($xmlns eq 'http://www.openarchives.org/OAI/2.0/oai-identifier') { |
|
423
|
|
|
|
|
|
|
$oai_id_version='2.0'; |
|
424
|
|
|
|
|
|
|
$self->log->pass("oai-identifier description for version $oai_id_version is being used"); |
|
425
|
|
|
|
|
|
|
} elsif ($xmlns eq 'http://www.openarchives.org/OAI/1.1/oai-identifier') { |
|
426
|
|
|
|
|
|
|
$oai_id_version='1.1'; |
|
427
|
|
|
|
|
|
|
$self->log->pass("oai-identifier description for version $oai_id_version is being used"); |
|
428
|
|
|
|
|
|
|
} elsif ($xmlns) { |
|
429
|
|
|
|
|
|
|
$self->log->fail("Unrecognized namespace declaration '$xmlns' for ". |
|
430
|
|
|
|
|
|
|
"oai-identifier, expected ". |
|
431
|
|
|
|
|
|
|
"http://www.openarchives.org/OAI/2.0/oai-identifier ". |
|
432
|
|
|
|
|
|
|
"(for v2.0) or ". |
|
433
|
|
|
|
|
|
|
"http://www.openarchives.org/OAI/1.1/oai-identifier ". |
|
434
|
|
|
|
|
|
|
"(for v1.1). Assuming version $oai_id_version."); |
|
435
|
|
|
|
|
|
|
} else { |
|
436
|
|
|
|
|
|
|
$self->log->fail("No namespace declaration found for oai-identifier, expected ". |
|
437
|
|
|
|
|
|
|
"http://www.openarchives.org/OAI/2.0/oai-identifier ". |
|
438
|
|
|
|
|
|
|
"(for v2.0) or ". |
|
439
|
|
|
|
|
|
|
"http://www.openarchives.org/OAI/1.1/oai-identifier ". |
|
440
|
|
|
|
|
|
|
"(for v1.1). Assuming version $oai_id_version/"); |
|
441
|
|
|
|
|
|
|
} |
|
442
|
|
|
|
|
|
|
} else { |
|
443
|
|
|
|
|
|
|
$self->log->fail("Can't find namespace declaration for the oai-identifier description. ". |
|
444
|
|
|
|
|
|
|
"This must be added as ". |
|
445
|
|
|
|
|
|
|
"(or 1.1), there will likely also be schema validation weeors. Will ". |
|
446
|
|
|
|
|
|
|
"assume that the oai-identifier is version $oai_id_version for ". |
|
447
|
|
|
|
|
|
|
"later tests"); |
|
448
|
|
|
|
|
|
|
} |
|
449
|
|
|
|
|
|
|
my $repoIds = $oaiIds->getElementsByTagName('repositoryIdentifier'); |
|
450
|
|
|
|
|
|
|
if ($repoIds) { |
|
451
|
|
|
|
|
|
|
my $temp = $repoIds->item(0); |
|
452
|
|
|
|
|
|
|
if (!defined($temp)) { |
|
453
|
|
|
|
|
|
|
$self->log->fail("No namespace-identifier (repositoryIdentifier element) in ". |
|
454
|
|
|
|
|
|
|
"the oai-identifier block of the Identify description"); |
|
455
|
|
|
|
|
|
|
return; |
|
456
|
|
|
|
|
|
|
} |
|
457
|
|
|
|
|
|
|
my $nsel = $temp->getFirstChild; |
|
458
|
|
|
|
|
|
|
unless ( $nsel ) { |
|
459
|
|
|
|
|
|
|
# Empty repositoryIdentifier element, squawk loudly |
|
460
|
|
|
|
|
|
|
$self->log->fail("Empty namespace-identifier (repositoryIdentifier element) in ". |
|
461
|
|
|
|
|
|
|
"the oai-identifier block of the Identify description"); |
|
462
|
|
|
|
|
|
|
return; |
|
463
|
|
|
|
|
|
|
} |
|
464
|
|
|
|
|
|
|
my $namespace_id = $nsel->getData; |
|
465
|
|
|
|
|
|
|
# Having validated the value of namespace-identifier, we can now tell if it is v1.1 or v2.0 based |
|
466
|
|
|
|
|
|
|
# on whether is has a . in it (i.e. if /\./) |
|
467
|
|
|
|
|
|
|
if ($oai_id_version eq '2.0') { |
|
468
|
|
|
|
|
|
|
#schema: |
|
469
|
|
|
|
|
|
|
unless ($namespace_id=~/^[a-z][a-z0-9\-]*(\.[a-z][a-z0-9\-]+)+$/i) { |
|
470
|
|
|
|
|
|
|
$self->log->fail("Bad namespace-identifier (repositoryIdentifier element) ". |
|
471
|
|
|
|
|
|
|
"'$namespace_id' in oai-identifier declaration. See section ". |
|
472
|
|
|
|
|
|
|
"2.1 of the OAI Identifier specification for details ". |
|
473
|
|
|
|
|
|
|
"(http://www.openarchives.org/OAI/2.0/guidelines-oai-identifier.htm)."); |
|
474
|
|
|
|
|
|
|
$cantContinue++; |
|
475
|
|
|
|
|
|
|
} else { |
|
476
|
|
|
|
|
|
|
$self->log->pass("namespace-identifier (repositoryIdentifier element) in oai-identifier ". |
|
477
|
|
|
|
|
|
|
"declaration is $namespace_id"); |
|
478
|
|
|
|
|
|
|
$self->namespace_id( $namespace_id ); |
|
479
|
|
|
|
|
|
|
} |
|
480
|
|
|
|
|
|
|
} else { #v1.1 schema: |
|
481
|
|
|
|
|
|
|
unless ($namespace_id=~/^[a-z0-9]+$/i) { |
|
482
|
|
|
|
|
|
|
$self->log->fail("Bad namespace-identifier (repositoryIdentifier element) ". |
|
483
|
|
|
|
|
|
|
"'$namespace_id' in oai-identifier declaration. See section ". |
|
484
|
|
|
|
|
|
|
"2.1 of the OAI Identifier specification for details ". |
|
485
|
|
|
|
|
|
|
"(http://www.openarchives.org/OAI/1.1/guidelines-oai-identifier.htm)."); |
|
486
|
|
|
|
|
|
|
$cantContinue++; |
|
487
|
|
|
|
|
|
|
} else { |
|
488
|
|
|
|
|
|
|
$self->log->pass("namespace-identifier (repositoryIdentifier element) in oai-identifier ". |
|
489
|
|
|
|
|
|
|
"declaration is $namespace_id"); |
|
490
|
|
|
|
|
|
|
$self->namespace_id( $namespace_id ); |
|
491
|
|
|
|
|
|
|
} |
|
492
|
|
|
|
|
|
|
} |
|
493
|
|
|
|
|
|
|
} |
|
494
|
|
|
|
|
|
|
} |
|
495
|
|
|
|
|
|
|
} |
|
496
|
|
|
|
|
|
|
return(not $cantContinue); |
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=head3 test_list_sets() |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Check response to the ListSets verb. |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
Save the setSpecs for later use. |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
Note that the any set might be empty. So if test_list_identifiers doesn't |
|
507
|
|
|
|
|
|
|
get a match, we need to try the second set identifier, and so on. |
|
508
|
|
|
|
|
|
|
So keep a list of the setSpec elements. |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=cut |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
sub test_list_sets { |
|
513
|
|
|
|
|
|
|
my $self=shift; |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
$self->log->start("Checking ListSets response"); |
|
516
|
|
|
|
|
|
|
my $req=$self->base_url."?verb=ListSets"; |
|
517
|
|
|
|
|
|
|
my $response = $self->make_request_and_validate("ListSets", $req); |
|
518
|
|
|
|
|
|
|
unless ($response) { |
|
519
|
|
|
|
|
|
|
$self->log->fail("Can't check set names"); |
|
520
|
|
|
|
|
|
|
return; |
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
|
524
|
|
|
|
|
|
|
$self->log->fail("Can't parse response"); |
|
525
|
|
|
|
|
|
|
$self->abort("failed to parse response to ListSets"); |
|
526
|
|
|
|
|
|
|
} |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
$self->set_names( [] ); |
|
529
|
|
|
|
|
|
|
$self->example_set_spec( '' ); |
|
530
|
|
|
|
|
|
|
my $set_elements=$self->doc->getElementsByTagName('setSpec'); |
|
531
|
|
|
|
|
|
|
if (not defined($set_elements) or ($set_elements->getLength<1)) { |
|
532
|
|
|
|
|
|
|
# No setSpec elements, so there should be an element |
|
533
|
|
|
|
|
|
|
my $details={}; |
|
534
|
|
|
|
|
|
|
if ($self->is_error_response($details)) { |
|
535
|
|
|
|
|
|
|
if ($details->{'noSetHierarchy'}) { |
|
536
|
|
|
|
|
|
|
$self->log->pass("Repository does not support sets and the is correctly reported with a ". |
|
537
|
|
|
|
|
|
|
"noSetHierarchy exception in the ListSets response"); |
|
538
|
|
|
|
|
|
|
} else { |
|
539
|
|
|
|
|
|
|
$self->log->fail("Failed to extract any setSpec elements from ListSets ". |
|
540
|
|
|
|
|
|
|
"but did not find a noSetHierarchy exception. Found instead a '". |
|
541
|
|
|
|
|
|
|
join(', ',keys %{$details})."' exception(s). See <". |
|
542
|
|
|
|
|
|
|
$self->protocol."#ListSets>."); |
|
543
|
|
|
|
|
|
|
} |
|
544
|
|
|
|
|
|
|
} else { |
|
545
|
|
|
|
|
|
|
$self->log->fail("Failed to extract any setSpec elements from ListSets but did not ". |
|
546
|
|
|
|
|
|
|
"find an exception message. If sets are not supported by the ". |
|
547
|
|
|
|
|
|
|
"repository then the ListSets response must be the noSetHierarchy ". |
|
548
|
|
|
|
|
|
|
"error. See <".$self->protocol."#ListSets>."); |
|
549
|
|
|
|
|
|
|
} |
|
550
|
|
|
|
|
|
|
} else { |
|
551
|
|
|
|
|
|
|
# Have setSpec elements, record all set names and pick an example set spec |
|
552
|
|
|
|
|
|
|
for (my $j=0; $j<$set_elements->getLength; $j++) { |
|
553
|
|
|
|
|
|
|
my $set_name=$set_elements->item($j)->getFirstChild->getData; |
|
554
|
|
|
|
|
|
|
##FIXME - should validate each set name |
|
555
|
|
|
|
|
|
|
push(@{$self->set_names},$set_name); |
|
556
|
|
|
|
|
|
|
} |
|
557
|
|
|
|
|
|
|
# Sanity check, did we get the number we expected? |
|
558
|
|
|
|
|
|
|
my $num_sets=scalar(@{$self->set_names}); |
|
559
|
|
|
|
|
|
|
if ($num_sets!=$set_elements->getLength) { |
|
560
|
|
|
|
|
|
|
$self->log->fail("Failed to extract the expected number of set names (got ". |
|
561
|
|
|
|
|
|
|
"$num_sets, expected ".$set_elements->getLength.")"); |
|
562
|
|
|
|
|
|
|
} |
|
563
|
|
|
|
|
|
|
if ($num_sets>0) { |
|
564
|
|
|
|
|
|
|
$self->example_set_spec( "&set=".$self->set_names->[0] ); |
|
565
|
|
|
|
|
|
|
} |
|
566
|
|
|
|
|
|
|
my $msg=''; |
|
567
|
|
|
|
|
|
|
for (my $j=0; $j<$num_sets and $j<3; $j++) { $msg.=" ".$self->set_names->[$j]; } |
|
568
|
|
|
|
|
|
|
$msg.=" ..." if ($num_sets>3); |
|
569
|
|
|
|
|
|
|
$self->log->pass("Extracted $num_sets set names: {$msg }, will use setSpec ". |
|
570
|
|
|
|
|
|
|
$self->example_set_spec." in tests"); |
|
571
|
|
|
|
|
|
|
} |
|
572
|
|
|
|
|
|
|
} |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=head3 test_list_identifiers() |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
Check response to ListIdentifiers and record an example record id in |
|
578
|
|
|
|
|
|
|
$self->example_record_id to be used in other tests. |
|
579
|
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
If there are no identifiers, but the response is legal, stop the test with |
|
581
|
|
|
|
|
|
|
errors=0, number of verbs checked is three. |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
As of version 2.0, a metadataPrefix argument is required. Unfortunately |
|
584
|
|
|
|
|
|
|
we need to call test_list_identifiers first in order to get an id for |
|
585
|
|
|
|
|
|
|
GetRecord, so we simply use oai_dc. |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=cut |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
sub test_list_identifiers { |
|
590
|
|
|
|
|
|
|
my $self=shift; |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
$self->log->start("Checking ListIdentifiers response"); |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
### FIXME -- skip the set= restriction because this code doesn't |
|
595
|
|
|
|
|
|
|
### FIXME work right for set hierarchies - 2002-10-17 |
|
596
|
|
|
|
|
|
|
### FIXME 2015-01-02 - put/left in, is it OK? |
|
597
|
|
|
|
|
|
|
my $set_spec = $self->example_set_spec; |
|
598
|
|
|
|
|
|
|
my $req = $self->base_url."?verb=ListIdentifiers&metadataPrefix=oai_dc".$set_spec; |
|
599
|
|
|
|
|
|
|
my $response = $self->make_request_and_validate("ListIdentifiers", $req); |
|
600
|
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
# Note: $response will come back null if an error code was returned |
|
602
|
|
|
|
|
|
|
# An error code of "noRecordsMatch" comes back if specified set is |
|
603
|
|
|
|
|
|
|
# empty. In that case we should drop the set and try again. |
|
604
|
|
|
|
|
|
|
if ( $set_spec and (! $response or $self->is_no_records_match ) ) { |
|
605
|
|
|
|
|
|
|
$self->log->note("Empty set made ListIdentifiers fail - trying other sets..."); |
|
606
|
|
|
|
|
|
|
my $i=1; |
|
607
|
|
|
|
|
|
|
my $m = scalar(@{$self->set_names}); |
|
608
|
|
|
|
|
|
|
while ($i<$m and not $response ) { |
|
609
|
|
|
|
|
|
|
$set_spec = "&set=".$self->set_names->[$i]; |
|
610
|
|
|
|
|
|
|
$req = $self->base_url."?verb=ListIdentifiers&metadataPrefix=oai_dc".$set_spec; |
|
611
|
|
|
|
|
|
|
$response = $self->make_request_and_validate("ListIdentifiers", $req); |
|
612
|
|
|
|
|
|
|
$self->log->note("Trying set ".$set_spec); |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
# If we were successful then set the example_set_spec for any future tests |
|
615
|
|
|
|
|
|
|
if ($response) { |
|
616
|
|
|
|
|
|
|
$self->example_set_spec( $set_spec ); |
|
617
|
|
|
|
|
|
|
} |
|
618
|
|
|
|
|
|
|
} |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
# None of the sets had any identifiers in them. Try the whole entire |
|
621
|
|
|
|
|
|
|
# list of identifiers. |
|
622
|
|
|
|
|
|
|
if ( $set_spec and !$response ) { |
|
623
|
|
|
|
|
|
|
$self->log->note("Last attempt is without any sets..."); |
|
624
|
|
|
|
|
|
|
$req = $self->base_url."?verb=ListIdentifiers&metadataPrefix=oai_dc"; |
|
625
|
|
|
|
|
|
|
$response = $self->make_request_and_validate("ListIdentifiers",$req); |
|
626
|
|
|
|
|
|
|
} |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
# Now we are for real in trouble if $response is null |
|
629
|
|
|
|
|
|
|
unless ($response) { |
|
630
|
|
|
|
|
|
|
$self->log->fail("No ListIdentifiers response with content"); |
|
631
|
|
|
|
|
|
|
$self->log->note("The base URL did not respond to the ListIdentifiers verb.". |
|
632
|
|
|
|
|
|
|
"Without that, we cannot proceed with the validation test. Exiting."); |
|
633
|
|
|
|
|
|
|
$self->abort("The base URL did not respond to the ListIdentifiers verb. Without that, we cannot proceed with the validation test. Exiting."); |
|
634
|
|
|
|
|
|
|
} |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
# Grab the first identifier for later use |
|
637
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
|
638
|
|
|
|
|
|
|
$self->log->fail("Can't parse ListIdentifiers response"); |
|
639
|
|
|
|
|
|
|
$self->abort("unable to parse response"); |
|
640
|
|
|
|
|
|
|
} |
|
641
|
|
|
|
|
|
|
# |
|
642
|
|
|
|
|
|
|
# Now look for the identifier of a non-deleted record |
|
643
|
|
|
|
|
|
|
# If there are no identifiers to be harvested, we cannot complete the |
|
644
|
|
|
|
|
|
|
# validation test. |
|
645
|
|
|
|
|
|
|
# |
|
646
|
|
|
|
|
|
|
# FIXME - this still doesn't solve the problem that there may be no |
|
647
|
|
|
|
|
|
|
# non-deleted items listed in the particular response or partial |
|
648
|
|
|
|
|
|
|
# response that we are looking at [Simeon/2005-07-20] |
|
649
|
|
|
|
|
|
|
# |
|
650
|
|
|
|
|
|
|
my $headers = $self->doc->getElementsByTagName('header'); |
|
651
|
|
|
|
|
|
|
my $h; |
|
652
|
|
|
|
|
|
|
my $record_id; |
|
653
|
|
|
|
|
|
|
for ($h=0; $h<$headers->getLength(); $h++) { |
|
654
|
|
|
|
|
|
|
my $hdnode=$headers->item($h); |
|
655
|
|
|
|
|
|
|
my $idnode=$hdnode->getElementsByTagName('identifier',0); |
|
656
|
|
|
|
|
|
|
next unless ($idnode and $idnode->getLength()==1); |
|
657
|
|
|
|
|
|
|
$record_id=$idnode->item(0)->getFirstChild->getData; |
|
658
|
|
|
|
|
|
|
last unless ($hdnode->getAttribute('status') and $hdnode->getAttribute('status') eq 'deleted'); |
|
659
|
|
|
|
|
|
|
$self->log->warn("Identifier ".($h+1).", '$record_id', is for a deleted record, skipping"); |
|
660
|
|
|
|
|
|
|
} |
|
661
|
|
|
|
|
|
|
if ($h==$headers->getLength()) { |
|
662
|
|
|
|
|
|
|
# No identifiers were in the ListIdentifiers response. Further testing |
|
663
|
|
|
|
|
|
|
# is not possible. |
|
664
|
|
|
|
|
|
|
$self->log->fail("The response to the ListIdentifiers verb with metadataPrefix oai_dc ". |
|
665
|
|
|
|
|
|
|
"contained no identifiers. Without at least one identifier, we cannot ". |
|
666
|
|
|
|
|
|
|
"proceed with the validation tests."); |
|
667
|
|
|
|
|
|
|
$self->abort("No identifiers in response to ListIdentifiers. Without an identifier ". |
|
668
|
|
|
|
|
|
|
"we cannot proceed with validation tests."); |
|
669
|
|
|
|
|
|
|
} |
|
670
|
|
|
|
|
|
|
$self->log->pass("Good ListIdentifiers response, extracted id '$record_id' for use in future tests."); |
|
671
|
|
|
|
|
|
|
$self->example_record_id( $record_id ); |
|
672
|
|
|
|
|
|
|
} |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
=head3 test_list_metadata_formats() |
|
676
|
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
Vet the verb as usual, and then make sure that Dublin Core in included |
|
678
|
|
|
|
|
|
|
In particular, we will check the metadata formats available for "record_id", |
|
679
|
|
|
|
|
|
|
obtained from checking the ListIdentifier verb. |
|
680
|
|
|
|
|
|
|
Side effect: save available formats for later use (global "formats"). |
|
681
|
|
|
|
|
|
|
NOTE:if there are no formats, error will be picked up by getRecord |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
=cut |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
sub test_list_metadata_formats { |
|
686
|
|
|
|
|
|
|
my $self=shift; |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
$self->log->start("Checking ListMetadataFormats response"); |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
# Do we have an example record id to check with? |
|
691
|
|
|
|
|
|
|
my $record_id = $self->example_record_id; |
|
692
|
|
|
|
|
|
|
unless ($record_id) { |
|
693
|
|
|
|
|
|
|
$self->log->fail("Cannot check ListMetadataFormats as we do not have an example id"); |
|
694
|
|
|
|
|
|
|
return; |
|
695
|
|
|
|
|
|
|
} |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
my $req = $self->base_url."?verb=ListMetadataFormats&identifier=".url_encode($record_id); |
|
698
|
|
|
|
|
|
|
my $response = $self->make_request_and_validate("ListMetadataFormats",$req); |
|
699
|
|
|
|
|
|
|
unless ($response) { |
|
700
|
|
|
|
|
|
|
$self->log->fail("Can't check metadataFormats available for item $record_id, no ". |
|
701
|
|
|
|
|
|
|
"response to ListMetadataFormats request."); |
|
702
|
|
|
|
|
|
|
return; |
|
703
|
|
|
|
|
|
|
} |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
# Check for Dublin Core |
|
706
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
|
707
|
|
|
|
|
|
|
$self->log->fail("Can't parse response to ListMetadataFormats request for item $record_id."); |
|
708
|
|
|
|
|
|
|
return; |
|
709
|
|
|
|
|
|
|
} |
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
my $formats = $self->doc->getElementsByTagName('metadataPrefix'); |
|
712
|
|
|
|
|
|
|
unless ($formats->getLength() > 0) { |
|
713
|
|
|
|
|
|
|
$self->log->fail("No metadata formats are listed in the response to a ListMetadataFormats ". |
|
714
|
|
|
|
|
|
|
"request for item $record_id."); |
|
715
|
|
|
|
|
|
|
return; |
|
716
|
|
|
|
|
|
|
} |
|
717
|
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
if ($self->debug) { |
|
719
|
|
|
|
|
|
|
$self->log->note("debug: ".$formats->getLength()." formats supported for identifier '$record_id'"); |
|
720
|
|
|
|
|
|
|
} |
|
721
|
|
|
|
|
|
|
my $gotDC=0; |
|
722
|
|
|
|
|
|
|
for my $i (0..$formats->getLength()-1) { |
|
723
|
|
|
|
|
|
|
my $format = $formats->item($i); |
|
724
|
|
|
|
|
|
|
#assume this node has only one child, and its data for a format |
|
725
|
|
|
|
|
|
|
if ( $format->getFirstChild->getData =~ /^\s*oai_dc\s*$/ ) { |
|
726
|
|
|
|
|
|
|
$gotDC = 1; |
|
727
|
|
|
|
|
|
|
last; |
|
728
|
|
|
|
|
|
|
} |
|
729
|
|
|
|
|
|
|
} |
|
730
|
|
|
|
|
|
|
if ($gotDC) { |
|
731
|
|
|
|
|
|
|
$self->log->pass("Good ListMetadataFormats response, includes oai_dc"); |
|
732
|
|
|
|
|
|
|
} else { |
|
733
|
|
|
|
|
|
|
$self->log->pass("Good ListMetadataFormats response, BUT DID NOT FIND oai_dc"); |
|
734
|
|
|
|
|
|
|
} |
|
735
|
|
|
|
|
|
|
return($formats, $gotDC); |
|
736
|
|
|
|
|
|
|
} |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=head3 test_get_record($record_id, $format) |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
Try to get record $record_id in $format. |
|
742
|
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
If either $record_id or $format are undef then we have an error |
|
744
|
|
|
|
|
|
|
right off the bat. Else make the request and return the |
|
745
|
|
|
|
|
|
|
datestamp of the record. |
|
746
|
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
=cut |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
sub test_get_record { |
|
750
|
|
|
|
|
|
|
my $self=shift; |
|
751
|
|
|
|
|
|
|
my ($record_id, $format)=@_; |
|
752
|
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
$self->log->start("Checking GetRecord response"); |
|
754
|
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
unless (defined $format) { |
|
756
|
|
|
|
|
|
|
$self->log->fail("Skipping GetRecord test as no metadata format is listed as being available."); |
|
757
|
|
|
|
|
|
|
return; |
|
758
|
|
|
|
|
|
|
} |
|
759
|
|
|
|
|
|
|
unless (defined $record_id) { |
|
760
|
|
|
|
|
|
|
$self->log->fail("Skipping GetRecord test as no items are listed as having metadata available."); |
|
761
|
|
|
|
|
|
|
return; |
|
762
|
|
|
|
|
|
|
} |
|
763
|
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
my $numerr=0; #count up non-fatal errors |
|
765
|
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
my $req = $self->base_url."?verb=GetRecord&identifier=".url_encode($record_id)."&metadataPrefix=".url_encode($format); |
|
767
|
|
|
|
|
|
|
my $response = $self->make_request_and_validate("GetRecord", $req); |
|
768
|
|
|
|
|
|
|
unless ($response) { |
|
769
|
|
|
|
|
|
|
$self->log->fail("Can't complete datestamp check for GetRecord"); |
|
770
|
|
|
|
|
|
|
$self->abort("Can't complete datestamp check for GetRecord"); |
|
771
|
|
|
|
|
|
|
} |
|
772
|
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
# Save the datestamp for later use by ListRecords |
|
774
|
|
|
|
|
|
|
# As of version 2.0, Identify response can have a granularity and the |
|
775
|
|
|
|
|
|
|
# datestamp MUST be in the finest granularity supported by the repository |
|
776
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
|
777
|
|
|
|
|
|
|
$self->log->fail("Can't parse response"); |
|
778
|
|
|
|
|
|
|
$self->abort("Unable to parse response from GetRecord"); |
|
779
|
|
|
|
|
|
|
} |
|
780
|
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
if (my $msg=$self->is_error_response) { |
|
782
|
|
|
|
|
|
|
$self->log->fail("The response to the GetRecord verb was the OAI exception $msg. ". |
|
783
|
|
|
|
|
|
|
"It is this not possible to extract a valid datestamp for remaining tests"); |
|
784
|
|
|
|
|
|
|
$self->abort("Unexpected OAI exception response"); |
|
785
|
|
|
|
|
|
|
} |
|
786
|
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
my $datestamps = $self->doc->getElementsByTagName('datestamp'); |
|
788
|
|
|
|
|
|
|
# If there is no there is no datestamp ... but there should be a record |
|
789
|
|
|
|
|
|
|
unless ( $datestamps->getLength() > 0 ) { |
|
790
|
|
|
|
|
|
|
$self->log->fail("The response to the GetRecord verb did not have a datestamp, which is ". |
|
791
|
|
|
|
|
|
|
"needed to continue checking verbs."); |
|
792
|
|
|
|
|
|
|
$self->abort("No datestamp in the response for GetRecord"); |
|
793
|
|
|
|
|
|
|
} |
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
my $datestamp=undef; |
|
796
|
|
|
|
|
|
|
eval { |
|
797
|
|
|
|
|
|
|
$datestamp = $datestamps->item(0)->getFirstChild->getData; |
|
798
|
|
|
|
|
|
|
}; |
|
799
|
|
|
|
|
|
|
if (not defined($datestamp)) { |
|
800
|
|
|
|
|
|
|
$self->log->fail("Failed to extract datestamp from the GetRecord response. See <". |
|
801
|
|
|
|
|
|
|
$self->protocol."#Dates>."); |
|
802
|
|
|
|
|
|
|
$numerr++; |
|
803
|
|
|
|
|
|
|
} elsif ( my $granularity=$self->get_datestamp_granularity($datestamp) ) { |
|
804
|
|
|
|
|
|
|
$self->log->pass("Datestamp in GetRecord response ($datestamp) has the correct form for ". |
|
805
|
|
|
|
|
|
|
"$granularity granularity."); |
|
806
|
|
|
|
|
|
|
if ( $granularity eq $self->granularity ) { |
|
807
|
|
|
|
|
|
|
# The granularity in v2.0 must match the finest granularity supported (see sec3.3.2) |
|
808
|
|
|
|
|
|
|
$self->log->pass("Datestamp in GetRecord response ($datestamp) matched the ". |
|
809
|
|
|
|
|
|
|
$self->granularity." granularity specified in the Identify response. "); |
|
810
|
|
|
|
|
|
|
} else { |
|
811
|
|
|
|
|
|
|
$self->log->fail("Datestamp in GetRecord response ($datestamp) is not consistent ". |
|
812
|
|
|
|
|
|
|
"with the ".$self->granularity." granularity specified in the ". |
|
813
|
|
|
|
|
|
|
"Identify response"); |
|
814
|
|
|
|
|
|
|
$numerr++; |
|
815
|
|
|
|
|
|
|
} |
|
816
|
|
|
|
|
|
|
} else { |
|
817
|
|
|
|
|
|
|
$self->log->fail("Datestamp in GetRecord response ($datestamp) is not valid. See <". |
|
818
|
|
|
|
|
|
|
$self->protocol."#Dates>."); |
|
819
|
|
|
|
|
|
|
$numerr++; |
|
820
|
|
|
|
|
|
|
} |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
# As of OAI-PMH Version 2.0, GetRecord must return a set spec if the |
|
823
|
|
|
|
|
|
|
# repository supports sets and the item is in a set |
|
824
|
|
|
|
|
|
|
if (not $self->example_set_spec) { |
|
825
|
|
|
|
|
|
|
$self->log->pass("Valid GetRecord response") unless ($numerr>0); |
|
826
|
|
|
|
|
|
|
return($datestamp); |
|
827
|
|
|
|
|
|
|
} |
|
828
|
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
my $set_list = $self->doc->getElementsByTagName('setSpec'); |
|
830
|
|
|
|
|
|
|
my $set_value = $self->example_set_spec; |
|
831
|
|
|
|
|
|
|
$set_value =~ s/&set=//; |
|
832
|
|
|
|
|
|
|
$self->log->note("Looking for set '".$set_value."' or a descendant set.") if $self->debug; |
|
833
|
|
|
|
|
|
|
my $i; |
|
834
|
|
|
|
|
|
|
my $subset_str = ''; |
|
835
|
|
|
|
|
|
|
for ($i=0; $i<$set_list->getLength; $i++) { |
|
836
|
|
|
|
|
|
|
my $s = $set_list->item($i)->getFirstChild->getData; |
|
837
|
|
|
|
|
|
|
last if ($s eq $set_value); |
|
838
|
|
|
|
|
|
|
if ($s =~ m/^${set_value}:/) { |
|
839
|
|
|
|
|
|
|
$subset_str = " (implied by a descendant setSpec)"; |
|
840
|
|
|
|
|
|
|
last; |
|
841
|
|
|
|
|
|
|
} |
|
842
|
|
|
|
|
|
|
} |
|
843
|
|
|
|
|
|
|
if ($i==$set_list->getLength) { # error |
|
844
|
|
|
|
|
|
|
$self->log->fail("Expected setSpec was missing from the response. The GetRecord ". |
|
845
|
|
|
|
|
|
|
"response for identifier $record_id did not contain a set ". |
|
846
|
|
|
|
|
|
|
"specification for $set_value"); |
|
847
|
|
|
|
|
|
|
} else { |
|
848
|
|
|
|
|
|
|
$self->log->pass("Expected setSpec was returned in the response".$subset_str); |
|
849
|
|
|
|
|
|
|
} |
|
850
|
|
|
|
|
|
|
return($datestamp); |
|
851
|
|
|
|
|
|
|
} |
|
852
|
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
=head3 test_list_records($datestamp,$metadata_prefix) |
|
855
|
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
Test the response for the ListRecords verb. In addition, if there is |
|
857
|
|
|
|
|
|
|
no Dublin Core available for this repository, this is an error. |
|
858
|
|
|
|
|
|
|
(And the error has already been counted in test_get_record) |
|
859
|
|
|
|
|
|
|
We can still test the verb, however, with one of the available |
|
860
|
|
|
|
|
|
|
formats found by testGetMetadataFormats. Since the output of |
|
861
|
|
|
|
|
|
|
ListRecords is likely to be large, use the datestamp of the one |
|
862
|
|
|
|
|
|
|
record we did retrieve to limit the output. |
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=cut |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
sub test_list_records { |
|
867
|
|
|
|
|
|
|
my $self=shift; |
|
868
|
|
|
|
|
|
|
my ($datestamp,$metadata_prefix)=@_; |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
$self->log->start("Checking ListRecords response"); |
|
871
|
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
my $req = $self->base_url."?verb=ListRecords"; |
|
873
|
|
|
|
|
|
|
if ($datestamp) { |
|
874
|
|
|
|
|
|
|
$req.="&from=".$datestamp."&until=".$datestamp; |
|
875
|
|
|
|
|
|
|
} else { |
|
876
|
|
|
|
|
|
|
$self->log->warn("Omitting datestamp parameter as none was obtained from earlier test"); |
|
877
|
|
|
|
|
|
|
} |
|
878
|
|
|
|
|
|
|
$req.="&metadataPrefix=".$metadata_prefix; |
|
879
|
|
|
|
|
|
|
my $list_not_complete=1; |
|
880
|
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
while ($list_not_complete) { |
|
882
|
|
|
|
|
|
|
$list_not_complete=0; |
|
883
|
|
|
|
|
|
|
my $response = $self->make_request_and_validate("ListRecords", $req); |
|
884
|
|
|
|
|
|
|
unless ($response) { |
|
885
|
|
|
|
|
|
|
#Nothing else to say since we don't do other tests |
|
886
|
|
|
|
|
|
|
return; |
|
887
|
|
|
|
|
|
|
} |
|
888
|
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
if ($self->parse_response($req,$response)) { |
|
890
|
|
|
|
|
|
|
$self->log->pass("Response is well formed"); |
|
891
|
|
|
|
|
|
|
} else { |
|
892
|
|
|
|
|
|
|
$self->log->fail("The ListRecords response was not well formed XML"); |
|
893
|
|
|
|
|
|
|
} |
|
894
|
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
# Now check to make sure that we got back the record for the identifier |
|
896
|
|
|
|
|
|
|
# $self->example_record_id if there is one specified, else fail that |
|
897
|
|
|
|
|
|
|
# test. |
|
898
|
|
|
|
|
|
|
my $record_id=$self->example_record_id; |
|
899
|
|
|
|
|
|
|
unless ($record_id) { |
|
900
|
|
|
|
|
|
|
$self->log->fail("Cannot check for correct record inclusion without an example record id"); |
|
901
|
|
|
|
|
|
|
return; |
|
902
|
|
|
|
|
|
|
} |
|
903
|
|
|
|
|
|
|
my $details={}; |
|
904
|
|
|
|
|
|
|
if ($self->is_error_response($details)) { |
|
905
|
|
|
|
|
|
|
if ($details->{'noRecordsMatch'}) { |
|
906
|
|
|
|
|
|
|
$self->log->fail("ListRecords response gave a noRecordsMatch error when it should ". |
|
907
|
|
|
|
|
|
|
"have included at least the record with identifier $record_id. ". |
|
908
|
|
|
|
|
|
|
"The from and until parameters of the request were set to the ". |
|
909
|
|
|
|
|
|
|
"datestamp of this record ($datestamp). The from and until parameters ". |
|
910
|
|
|
|
|
|
|
"are inclusive, see protocol spec section 2.7.1. The message ". |
|
911
|
|
|
|
|
|
|
"included in the error response was: '". |
|
912
|
|
|
|
|
|
|
$details->{'noRecordsMatch'}."'"); |
|
913
|
|
|
|
|
|
|
} else { |
|
914
|
|
|
|
|
|
|
my @txt=(); |
|
915
|
|
|
|
|
|
|
foreach my $k (keys %$details) { |
|
916
|
|
|
|
|
|
|
push(@txt,"$k (".$details->{$k}.")"); |
|
917
|
|
|
|
|
|
|
} |
|
918
|
|
|
|
|
|
|
$self->log->fail("ListRecords gave an unexpected error response to a request using ". |
|
919
|
|
|
|
|
|
|
"from and until datestamps taken from the previous GetRecord response: ". |
|
920
|
|
|
|
|
|
|
join(', ',@txt)); |
|
921
|
|
|
|
|
|
|
} |
|
922
|
|
|
|
|
|
|
} else { |
|
923
|
|
|
|
|
|
|
my $id_list = $self->doc->getElementsByTagName('identifier'); |
|
924
|
|
|
|
|
|
|
my $i; |
|
925
|
|
|
|
|
|
|
my $badly_formed=0; |
|
926
|
|
|
|
|
|
|
for ($i=0; $i<$id_list->getLength; $i++) { |
|
927
|
|
|
|
|
|
|
if (my $child=$id_list->item($i)->getFirstChild()) { |
|
928
|
|
|
|
|
|
|
last if ($id_list->item($i)->getFirstChild->getData eq $record_id); |
|
929
|
|
|
|
|
|
|
} else { |
|
930
|
|
|
|
|
|
|
$badly_formed++; |
|
931
|
|
|
|
|
|
|
last; |
|
932
|
|
|
|
|
|
|
} |
|
933
|
|
|
|
|
|
|
} |
|
934
|
|
|
|
|
|
|
if ($badly_formed) { |
|
935
|
|
|
|
|
|
|
$self->log->fail("ListRecords response badly formed, identifier element for record ". |
|
936
|
|
|
|
|
|
|
($i+1)." is empty"); |
|
937
|
|
|
|
|
|
|
} elsif ($i<$id_list->getLength) { |
|
938
|
|
|
|
|
|
|
$self->log->pass("ListRecords response correctly included record with identifier $record_id"); |
|
939
|
|
|
|
|
|
|
} elsif (my $token=$self->get_resumption_token) { |
|
940
|
|
|
|
|
|
|
# More responses to come, may just not have got to the |
|
941
|
|
|
|
|
|
|
# record yet... roll around for more: |
|
942
|
|
|
|
|
|
|
$self->log->pass("ListRecords response includes resumptionToken. Haven't found ". |
|
943
|
|
|
|
|
|
|
"record with identifier $record_id yet, will continue with resumptionToken..."); |
|
944
|
|
|
|
|
|
|
$list_not_complete=1; |
|
945
|
|
|
|
|
|
|
$req = $self->base_url."?verb=ListRecords&resumptionToken=".url_encode($token); |
|
946
|
|
|
|
|
|
|
} else { |
|
947
|
|
|
|
|
|
|
$self->log->fail("ListRecords response did not include the identifier $record_id ". |
|
948
|
|
|
|
|
|
|
"which should have been included because both the from and until ". |
|
949
|
|
|
|
|
|
|
"parameters were set to the datestamp of this record ($datestamp). ". |
|
950
|
|
|
|
|
|
|
"The from and until parameters are inclusive, see protocol spec ". |
|
951
|
|
|
|
|
|
|
"section 2.7.1"); |
|
952
|
|
|
|
|
|
|
} |
|
953
|
|
|
|
|
|
|
} |
|
954
|
|
|
|
|
|
|
} |
|
955
|
|
|
|
|
|
|
} |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
=head3 test_resumption_tokens() |
|
959
|
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
Request an unlimited ListRecords. If there is a resumption token, see |
|
961
|
|
|
|
|
|
|
if it works. It is an error if resumption is there but doesn't work. |
|
962
|
|
|
|
|
|
|
Empty resumption tokens are OK -- this ends the list. |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
CGI takes care of URL-encoding the resumption token. |
|
965
|
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
=cut |
|
967
|
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
sub test_resumption_tokens { |
|
969
|
|
|
|
|
|
|
my $self=shift; |
|
970
|
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
$self->log->start("Checking for correct use of resumptionToken (if used)"); |
|
972
|
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
my $req = $self->base_url."?verb=ListRecords&metadataPrefix=oai_dc"; |
|
974
|
|
|
|
|
|
|
my $response = $self->make_request($req); |
|
975
|
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
# was there a resumption token? |
|
977
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response)) { |
|
978
|
|
|
|
|
|
|
$self->log->fail("Can't parse malformed XML in response to ListRecords request. ". |
|
979
|
|
|
|
|
|
|
"Cannot complete test for correct use of resumptionToken (if used)"); |
|
980
|
|
|
|
|
|
|
return; |
|
981
|
|
|
|
|
|
|
} |
|
982
|
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
my $tokenList = $self->doc->getElementsByTagName('resumptionToken'); |
|
984
|
|
|
|
|
|
|
if ( !$tokenList or $tokenList->getLength()==0 ) { |
|
985
|
|
|
|
|
|
|
$self->log->pass("resumptionToken not used"); |
|
986
|
|
|
|
|
|
|
return; |
|
987
|
|
|
|
|
|
|
} |
|
988
|
|
|
|
|
|
|
if ( $tokenList->getLength()>1 ) { |
|
989
|
|
|
|
|
|
|
$self->log->fail("More than one resumptionToken in response!"); |
|
990
|
|
|
|
|
|
|
return; |
|
991
|
|
|
|
|
|
|
} |
|
992
|
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
# Dig out the resumption token from the document |
|
994
|
|
|
|
|
|
|
my $tokenElement = $tokenList->item(0); |
|
995
|
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
# Try getting the resumption token, $token will be will be undefined |
|
997
|
|
|
|
|
|
|
# unless the element has content |
|
998
|
|
|
|
|
|
|
my $token = $tokenElement->getFirstChild; |
|
999
|
|
|
|
|
|
|
my $tokenString; |
|
1000
|
|
|
|
|
|
|
if ($token) { |
|
1001
|
|
|
|
|
|
|
$tokenString = $token->getData; |
|
1002
|
|
|
|
|
|
|
} |
|
1003
|
|
|
|
|
|
|
unless ($tokenString) { |
|
1004
|
|
|
|
|
|
|
$self->log->fail("Empty resumption token in response to $req There should never ". |
|
1005
|
|
|
|
|
|
|
"be an empty resumptionToken in response to a request without a ". |
|
1006
|
|
|
|
|
|
|
"resumptionToken argument"); |
|
1007
|
|
|
|
|
|
|
return; |
|
1008
|
|
|
|
|
|
|
} |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
# If there us a 'cursor' value given then check that it is |
|
1011
|
|
|
|
|
|
|
# correct. It must have the value 0 in the first response |
|
1012
|
|
|
|
|
|
|
my $usingCursor=0; |
|
1013
|
|
|
|
|
|
|
if (my $cursor=$tokenElement->getAttribute('cursor')) { |
|
1014
|
|
|
|
|
|
|
$usingCursor=1; |
|
1015
|
|
|
|
|
|
|
if ($cursor==0) { |
|
1016
|
|
|
|
|
|
|
$self->log->pass("A cursor value was supplied with the resumptionToken and it ". |
|
1017
|
|
|
|
|
|
|
"correctly had the value zero in the first response"); |
|
1018
|
|
|
|
|
|
|
} else { |
|
1019
|
|
|
|
|
|
|
$self->log->fail("A cursor value was supplied with the resumptionToken but it ". |
|
1020
|
|
|
|
|
|
|
"did not have the correct value zero for the first response. ". |
|
1021
|
|
|
|
|
|
|
"The value was '$cursor'."); |
|
1022
|
|
|
|
|
|
|
} |
|
1023
|
|
|
|
|
|
|
} |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
$self->log->note("Got resumptionToken ".$tokenString); |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
# Try using the resumption token. Before including a resumptionToken in |
|
1028
|
|
|
|
|
|
|
# the URL of a subsequent request, we must encode all special characters |
|
1029
|
|
|
|
|
|
|
# getData in this version of XML::DOM expands entitities |
|
1030
|
|
|
|
|
|
|
$req = $self->base_url."?verb=ListRecords&resumptionToken=".url_encode($tokenString); |
|
1031
|
|
|
|
|
|
|
$response = $self->make_request($req); |
|
1032
|
|
|
|
|
|
|
unless ( $response ) { |
|
1033
|
|
|
|
|
|
|
$self->log->fail("Site failed to respond to request using resumptionToken: $req"); |
|
1034
|
|
|
|
|
|
|
return; |
|
1035
|
|
|
|
|
|
|
} |
|
1036
|
|
|
|
|
|
|
unless ( $self->parse_response($req,$response)) { |
|
1037
|
|
|
|
|
|
|
$self->log->fail("Response to request is using resumptionToken not valid XML: $req"); |
|
1038
|
|
|
|
|
|
|
return; |
|
1039
|
|
|
|
|
|
|
} |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
my $errorList = $self->doc->getElementsByTagName('error'); |
|
1042
|
|
|
|
|
|
|
if ( $errorList and $errorList->getLength() > 0 ) { |
|
1043
|
|
|
|
|
|
|
$self->log->fail("Response to request using resumptionToken was an error code: $req"); |
|
1044
|
|
|
|
|
|
|
return; |
|
1045
|
|
|
|
|
|
|
} |
|
1046
|
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
###FIXME: put in test for cursor again, should be number of items returned in the |
|
1048
|
|
|
|
|
|
|
###FIXME: first response [Simeon/2005-10-11] |
|
1049
|
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
$self->log->pass("Resumption tokens appear to work"); |
|
1051
|
|
|
|
|
|
|
} |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
=head2 METHODS CHECKING ERRORS AND EXCEPTIONS |
|
1055
|
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
=head3 test_expected_errors($record_id) |
|
1057
|
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
Each one of these requests should get a 400 response in OAI-PHM v1.1, |
|
1059
|
|
|
|
|
|
|
or a 200 response in 2.0, along with a Reason_Phrase. Bump error_count |
|
1060
|
|
|
|
|
|
|
if this does not hold. Return the number of errorneous responses. |
|
1061
|
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
$record_id is a valid record identifier to be used in tests that require |
|
1063
|
|
|
|
|
|
|
one. |
|
1064
|
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
=cut |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
sub test_expected_errors { |
|
1068
|
|
|
|
|
|
|
my $self=shift; |
|
1069
|
|
|
|
|
|
|
my ($record_id)=@_; |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
$self->log->start("Checking exception handling (errors)"); |
|
1072
|
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
my @request_list = ( |
|
1074
|
|
|
|
|
|
|
[ 'junk', [ 'badVerb' ], '', '' ], |
|
1075
|
|
|
|
|
|
|
[ 'verb=junk', [ 'badVerb' ], '', '' ], |
|
1076
|
|
|
|
|
|
|
[ 'verb=GetRecord&metadataPrefix=oai_dc', [ 'badArgument' ], '', '' ], |
|
1077
|
|
|
|
|
|
|
[ 'verb=GetRecord&identifier='.$record_id, [ 'badArgument' ], '', '' ], |
|
1078
|
|
|
|
|
|
|
[ 'verb=GetRecord&identifier=invalid"id&metadataPrefix=oai_dc', [ 'badArgument','idDoesNotExist' ], 'An XML parsing error may be due to incorrectly including the invalid identifier in the element of your XML error response; only valid arguments should be included. A response that includes ..baseURL.. is not well-formed XML because of the quotation mark (") in the identifier.', 'Either the badArgument or idDoesNotExist error codes would be appropriate to report this case.' ], |
|
1079
|
|
|
|
|
|
|
[ 'verb=ListIdentifiers&until=junk', [ 'badArgument' ], '', '' ], |
|
1080
|
|
|
|
|
|
|
[ 'verb=ListIdentifiers&from=junk', [ 'badArgument' ], '', '' ], |
|
1081
|
|
|
|
|
|
|
[ 'verb=ListIdentifiers&resumptionToken=junk&until=2000-02-05', [ 'badArgument','badResumptionToken' ], '', 'Either the badArgument and/or badResumptionToken error codes may be reported in this case. If only one is reported then the badArgument error is to be preferred because the resumptionToken and until parameters are exclusive.' ], |
|
1082
|
|
|
|
|
|
|
[ 'verb=ListRecords&metadataPrefix=oai_dc&from=junk', [ 'badArgument' ], '', '' ], |
|
1083
|
|
|
|
|
|
|
[ 'verb=ListRecords&resumptionToken=junk', [ 'badResumptionToken' ], '', '' ], |
|
1084
|
|
|
|
|
|
|
[ 'verb=ListRecords&metadataPrefix=oai_dc&resumptionToken=junk&until=1990-01-10', [ 'badArgument','badResumptionToken' ] , '', 'Either the badArgument and/or badResumptionToken error codes may be reported in this case. If only one is reported then the badArgument error is to be preferred because the resumptionToken and until parameters are exclusive.' ], |
|
1085
|
|
|
|
|
|
|
[ 'verb=ListRecords&metadataPrefix=oai_dc&until=junk', [ 'badArgument' ], '', '' ], |
|
1086
|
|
|
|
|
|
|
[ 'verb=ListRecords', [ 'badArgument' ], '', '' ] |
|
1087
|
|
|
|
|
|
|
); |
|
1088
|
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
my $n=0; |
|
1090
|
|
|
|
|
|
|
foreach my $rrr ( @request_list ) { |
|
1091
|
|
|
|
|
|
|
my ($request_string, $error_codes, $xml_reason, $reason)=@$rrr; |
|
1092
|
|
|
|
|
|
|
my $req = $self->base_url.'?'.$request_string; |
|
1093
|
|
|
|
|
|
|
my $ok_errors=join(' or ',@$error_codes); |
|
1094
|
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
my $response=$self->make_request($req); |
|
1096
|
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
# TBD: $response->status_line should also be checked? see output from |
|
1098
|
|
|
|
|
|
|
# physnet.uni-oldenburg.de/oai/oai.php |
|
1099
|
|
|
|
|
|
|
if ($self->protocol_version eq "1.1") { |
|
1100
|
|
|
|
|
|
|
if ($response->code ne "400") { |
|
1101
|
|
|
|
|
|
|
$self->log->note("Invalid requests which failed to return 400:") if $n == 0; |
|
1102
|
|
|
|
|
|
|
$n++; |
|
1103
|
|
|
|
|
|
|
$self->log->fail("Expected 400 from: $request_string"); |
|
1104
|
|
|
|
|
|
|
} |
|
1105
|
|
|
|
|
|
|
} elsif ($self->protocol_version eq "2.0") { |
|
1106
|
|
|
|
|
|
|
# The document must contain the proper error code |
|
1107
|
|
|
|
|
|
|
unless ($self->parse_response($req,$response,$xml_reason)) { |
|
1108
|
|
|
|
|
|
|
$self->log->fail("Can't parse malformed response. ".html_escape($xml_reason)); |
|
1109
|
|
|
|
|
|
|
$n++; |
|
1110
|
|
|
|
|
|
|
next; |
|
1111
|
|
|
|
|
|
|
} |
|
1112
|
|
|
|
|
|
|
# check that the error code is in the error_list |
|
1113
|
|
|
|
|
|
|
my $error_elements = $self->doc->getElementsByTagName('error'); |
|
1114
|
|
|
|
|
|
|
if (my $matching_code=$self->error_elements_include($error_elements, $error_codes)) { |
|
1115
|
|
|
|
|
|
|
$self->log->pass("Error response correctly includes error code '$matching_code'"); |
|
1116
|
|
|
|
|
|
|
} else { |
|
1117
|
|
|
|
|
|
|
$self->log->fail("Exception/error response did not contain error code ". |
|
1118
|
|
|
|
|
|
|
"'$ok_errors' ".html_escape($reason)); |
|
1119
|
|
|
|
|
|
|
$n++; |
|
1120
|
|
|
|
|
|
|
next; |
|
1121
|
|
|
|
|
|
|
} |
|
1122
|
|
|
|
|
|
|
} else { |
|
1123
|
|
|
|
|
|
|
$self->log->fail("Invalid protocol version returned"); |
|
1124
|
|
|
|
|
|
|
$self->abort("test_expected_errors - invalid protocol version"); |
|
1125
|
|
|
|
|
|
|
} |
|
1126
|
|
|
|
|
|
|
} |
|
1127
|
|
|
|
|
|
|
my $total = scalar @request_list; |
|
1128
|
|
|
|
|
|
|
if ($n==0) { |
|
1129
|
|
|
|
|
|
|
$self->log->pass("All $total error requests properly handled"); |
|
1130
|
|
|
|
|
|
|
} else { |
|
1131
|
|
|
|
|
|
|
$self->log->warn("Only ".($total-$n)." out of $total error requests properly handled"); |
|
1132
|
|
|
|
|
|
|
} |
|
1133
|
|
|
|
|
|
|
return($n); |
|
1134
|
|
|
|
|
|
|
} |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
=head3 test_expected_v2_errors($earliest_datestamp,$metadata_prefix) |
|
1138
|
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
There are some additional exception tests for OAI-PMH version 2.0. |
|
1140
|
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
=cut |
|
1142
|
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
sub test_expected_v2_errors { |
|
1144
|
|
|
|
|
|
|
my $self=shift; |
|
1145
|
|
|
|
|
|
|
my ($earliest_datestamp,$metadata_prefix)=@_; |
|
1146
|
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
$self->log->start("Checking for version 2.0 specific exceptions"); |
|
1148
|
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
my $too_early_date=one_year_before($earliest_datestamp); |
|
1150
|
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
# format of entries: [ request_string, [error_codes_accepable], resaon ] |
|
1152
|
|
|
|
|
|
|
my @request_list = ( |
|
1153
|
|
|
|
|
|
|
[ "verb=ListRecords&metadataPrefix=".url_encode($metadata_prefix)."&from=2002-02-05&until=2002-02-06T05:35:00Z", ['badArgument'], |
|
1154
|
|
|
|
|
|
|
'The request has different granularities for the from and until parameters.' ], |
|
1155
|
|
|
|
|
|
|
[ "verb=ListRecords&metadataPrefix=".url_encode($metadata_prefix)."&until=$too_early_date" , ['noRecordsMatch'], |
|
1156
|
|
|
|
|
|
|
'The request specified a date one year before the earliestDatestamp given in the Identify response. '. |
|
1157
|
|
|
|
|
|
|
'There should therefore not be any records with datestamps on or before this date and a noRecordsMatch '. |
|
1158
|
|
|
|
|
|
|
'error code should be returned.' ] |
|
1159
|
|
|
|
|
|
|
); |
|
1160
|
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
foreach my $rrr ( @request_list ) { |
|
1162
|
|
|
|
|
|
|
my ($request_string,$error_codes,$reason)=@$rrr; |
|
1163
|
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
my $req=$self->base_url."?$request_string"; |
|
1165
|
|
|
|
|
|
|
my $response = $self->make_request($req); |
|
1166
|
|
|
|
|
|
|
# parse the response content for the desired error code |
|
1167
|
|
|
|
|
|
|
unless ( $self->parse_response($req,$response) ) { |
|
1168
|
|
|
|
|
|
|
$self->log->fail("Error in parsing XML response to exception request: $request_string"); |
|
1169
|
|
|
|
|
|
|
next; |
|
1170
|
|
|
|
|
|
|
} |
|
1171
|
|
|
|
|
|
|
# check that there is at least the desired error code |
|
1172
|
|
|
|
|
|
|
my $ok_errors=join(' or ',@$error_codes); |
|
1173
|
|
|
|
|
|
|
my $error_elements = $self->doc->getElementsByTagName('error'); |
|
1174
|
|
|
|
|
|
|
if ( !$error_elements or $error_elements->getLength==0 ) { |
|
1175
|
|
|
|
|
|
|
$self->log->fail("Failed to extract error code from the response to request: ". |
|
1176
|
|
|
|
|
|
|
"$request_string $reason"); |
|
1177
|
|
|
|
|
|
|
} elsif (my $matching_code=$self->error_elements_include($error_elements, $error_codes) ) { |
|
1178
|
|
|
|
|
|
|
$self->log->pass("Error response correctly includes error code '$matching_code'"); |
|
1179
|
|
|
|
|
|
|
} else { |
|
1180
|
|
|
|
|
|
|
$self->log->fail("Error code $ok_errors not found in response but should be given ". |
|
1181
|
|
|
|
|
|
|
"to the request: $request_string $reason"); |
|
1182
|
|
|
|
|
|
|
} |
|
1183
|
|
|
|
|
|
|
} |
|
1184
|
|
|
|
|
|
|
return; |
|
1185
|
|
|
|
|
|
|
} |
|
1186
|
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
=head2 METHODS TO TEST USE OF HTTP POST |
|
1189
|
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
=head3 test_post_requests() |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
Test responses to POST requests. Do both the simplest possible -- the Identify |
|
1193
|
|
|
|
|
|
|
verb -- and a GetRecord request which uses two additional parameters. |
|
1194
|
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
=cut |
|
1196
|
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
sub test_post_requests { |
|
1198
|
|
|
|
|
|
|
my $self=shift; |
|
1199
|
|
|
|
|
|
|
my ($metadata_prefix)=@_; |
|
1200
|
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
$self->log->start("Checking that HTTP POST requests are handled correctly"); |
|
1202
|
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
$self->test_post_request(1,{verb => "Identify"}); |
|
1204
|
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
my $record_id=$self->example_record_id; |
|
1206
|
|
|
|
|
|
|
if ($record_id) { |
|
1207
|
|
|
|
|
|
|
$self->test_post_request(2,{verb => "GetRecord", 'identifier' => $record_id, 'metadataPrefix' => $metadata_prefix}); |
|
1208
|
|
|
|
|
|
|
} else { |
|
1209
|
|
|
|
|
|
|
$self->log->fail("Cannot test GetRecord via POST without and example record identifier"); |
|
1210
|
|
|
|
|
|
|
} |
|
1211
|
|
|
|
|
|
|
} |
|
1212
|
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
# Called just by test_post_requests to actually run the test |
|
1215
|
|
|
|
|
|
|
# |
|
1216
|
|
|
|
|
|
|
sub test_post_request { |
|
1217
|
|
|
|
|
|
|
my $self=shift; |
|
1218
|
|
|
|
|
|
|
my ($num, $post_data) = @_; |
|
1219
|
|
|
|
|
|
|
my $response = $self->make_request($self->base_url, $post_data); |
|
1220
|
|
|
|
|
|
|
if ($response->is_success) { |
|
1221
|
|
|
|
|
|
|
my $verb = $post_data->{verb}; |
|
1222
|
|
|
|
|
|
|
if ( $self->is_verb_response($response,$verb) ) { |
|
1223
|
|
|
|
|
|
|
$self->log->pass("POST test $num for $verb was successful"); |
|
1224
|
|
|
|
|
|
|
} elsif ( $self->check_error_response($response) ) { |
|
1225
|
|
|
|
|
|
|
$self->log->fail("POST test $num for $verb was unsuccessful, an OAI error ". |
|
1226
|
|
|
|
|
|
|
"response was received"); |
|
1227
|
|
|
|
|
|
|
} else { |
|
1228
|
|
|
|
|
|
|
$self->log->fail("POST test $num for $verb was unsuccessful, got neither a ". |
|
1229
|
|
|
|
|
|
|
"valid response nor an error"); |
|
1230
|
|
|
|
|
|
|
} |
|
1231
|
|
|
|
|
|
|
} else { |
|
1232
|
|
|
|
|
|
|
$self->log->fail("POST test $num was unsuccessful. Server returned HTTP Status: '". |
|
1233
|
|
|
|
|
|
|
$response->status_line."'"); |
|
1234
|
|
|
|
|
|
|
} |
|
1235
|
|
|
|
|
|
|
} |
|
1236
|
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
=head2 METHODS CHECKING ELEMENTS WITHIN VERB AND ERROR RESPONSES |
|
1239
|
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
=head3 check_response_date($req, $doc) |
|
1241
|
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
Check responseDate for being in UTC format |
|
1243
|
|
|
|
|
|
|
(should perhaps also check that it is at least the current day?) |
|
1244
|
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
=cut |
|
1246
|
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
sub check_response_date { |
|
1248
|
|
|
|
|
|
|
my $self=shift; |
|
1249
|
|
|
|
|
|
|
my ($req, $doc) = @_; |
|
1250
|
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
my $elements = $self->doc->getElementsByTagName('responseDate'); |
|
1252
|
|
|
|
|
|
|
# assume rest of validity already checked, just take first |
|
1253
|
|
|
|
|
|
|
my $item; |
|
1254
|
|
|
|
|
|
|
my $child; |
|
1255
|
|
|
|
|
|
|
if ($elements and $item=$elements->item(0) and $child=$item->getFirstChild()) { |
|
1256
|
|
|
|
|
|
|
my $date = $child->getData(); |
|
1257
|
|
|
|
|
|
|
if ($date=~/\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ/) { |
|
1258
|
|
|
|
|
|
|
$self->log->pass("responseDate has correct format: $date"); |
|
1259
|
|
|
|
|
|
|
} else { |
|
1260
|
|
|
|
|
|
|
$self->log->fail("Bad responseDate of $date, this is not in UTC DateTime ". |
|
1261
|
|
|
|
|
|
|
"(YYYY-MM-DDThh:mm:ssZ) format"); |
|
1262
|
|
|
|
|
|
|
} |
|
1263
|
|
|
|
|
|
|
} else { |
|
1264
|
|
|
|
|
|
|
$self->log->fail("Failed to extract responseDate"); |
|
1265
|
|
|
|
|
|
|
} |
|
1266
|
|
|
|
|
|
|
} |
|
1267
|
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
=head3 check_schema_name($req, $doc) |
|
1270
|
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
Given the response to one of the OAI verbs, make sure that it it |
|
1272
|
|
|
|
|
|
|
going to be validated against the "official" OAI schema, and not |
|
1273
|
|
|
|
|
|
|
one that the repository made up for itself. If the response can't |
|
1274
|
|
|
|
|
|
|
be parsed, or if there is no OAI-PMH element, or if the schema is |
|
1275
|
|
|
|
|
|
|
incorrect, print an error message and bump the error_count. |
|
1276
|
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
Return true if the schema name and date check out, else return undef |
|
1278
|
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
=cut |
|
1280
|
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
sub check_schema_name { |
|
1282
|
|
|
|
|
|
|
my $self=shift; |
|
1283
|
|
|
|
|
|
|
my ($req, $doc) = @_; |
|
1284
|
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
my $namespace = 'http://www.openarchives.org/OAI/2.0/'; |
|
1286
|
|
|
|
|
|
|
my $location = 'http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd'; |
|
1287
|
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
my $elements = $self->doc->getElementsByTagName('OAI-PMH'); #NodeList |
|
1289
|
|
|
|
|
|
|
unless ( $elements->getLength() > 0 ) { |
|
1290
|
|
|
|
|
|
|
$self->log->fail("Response to $req did not contain a OAI-PMH element"); |
|
1291
|
|
|
|
|
|
|
return(0); |
|
1292
|
|
|
|
|
|
|
} |
|
1293
|
|
|
|
|
|
|
my $attributes = $elements->item(0)->getAttributes; #Node->NamedNodeMap |
|
1294
|
|
|
|
|
|
|
my $attr = $attributes->getNamedItem('xsi:schemaLocation'); #Node |
|
1295
|
|
|
|
|
|
|
unless ( $attr ) { |
|
1296
|
|
|
|
|
|
|
$self->log->fail("No xsi:schemaLocation attribute for the OAI-PMH element was ". |
|
1297
|
|
|
|
|
|
|
"found, expected xsi:schemaLocation=\"$namespace $location\""); |
|
1298
|
|
|
|
|
|
|
return(0); |
|
1299
|
|
|
|
|
|
|
} |
|
1300
|
|
|
|
|
|
|
$attr = $attributes->getNamedItem('xsi:schemaLocation'); #Node |
|
1301
|
|
|
|
|
|
|
my $pair = $attr->getNodeValue(); # must pair OAI namespace with schema |
|
1302
|
|
|
|
|
|
|
unless ( $pair =~ /^\s?$namespace\s*$location/ ) { |
|
1303
|
|
|
|
|
|
|
$self->log->fail("Error in pairing OAI namespace with schema location, expected: ". |
|
1304
|
|
|
|
|
|
|
"xsi:schemaLocation=\"$namespace $location\" but got $pair"); |
|
1305
|
|
|
|
|
|
|
return(0); |
|
1306
|
|
|
|
|
|
|
} |
|
1307
|
|
|
|
|
|
|
return(1); |
|
1308
|
|
|
|
|
|
|
} |
|
1309
|
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
=head3 check_protocol_version |
|
1312
|
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
Extract the protocol version being used from the Identify response, check that it is |
|
1314
|
|
|
|
|
|
|
valid and then abort unless 2.0. |
|
1315
|
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
=cut |
|
1317
|
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
sub check_protocol_version { |
|
1319
|
|
|
|
|
|
|
my $self=shift; |
|
1320
|
|
|
|
|
|
|
my $doc; |
|
1321
|
|
|
|
|
|
|
# Extract the version number of the validator to run |
|
1322
|
|
|
|
|
|
|
my $x = $self->doc->getElementsByTagName('protocolVersion'); |
|
1323
|
|
|
|
|
|
|
if (not $x) { |
|
1324
|
|
|
|
|
|
|
$self->abort("Unknown protocol version, failed to extract protocolVersion element from Identify response"); |
|
1325
|
|
|
|
|
|
|
} |
|
1326
|
|
|
|
|
|
|
my $protocol_version = $x->item(0)->getFirstChild->getData; |
|
1327
|
|
|
|
|
|
|
if ($protocol_version ne '2.0' and |
|
1328
|
|
|
|
|
|
|
$protocol_version ne '1.1' and |
|
1329
|
|
|
|
|
|
|
$protocol_version ne '1.0') { |
|
1330
|
|
|
|
|
|
|
$self->abort("Invalid protocol version ($protocol_version)"); |
|
1331
|
|
|
|
|
|
|
} |
|
1332
|
|
|
|
|
|
|
$self->protocol_version( $protocol_version ); |
|
1333
|
|
|
|
|
|
|
if ($protocol_version ne '2.0') { |
|
1334
|
|
|
|
|
|
|
$self->abort("Repository reports OAI-PMH protocol version $protocol_version and will not be validated. Guidelines for upgrading to 2.0 can be found at http://www.openarchives.org/OAI/2.0/migration.htm\n\n"); |
|
1335
|
|
|
|
|
|
|
} |
|
1336
|
|
|
|
|
|
|
$self->log->pass("Correctly reports OAI-PMH protocol version 2.0"); |
|
1337
|
|
|
|
|
|
|
} |
|
1338
|
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
=head2 is_verb_response($reponse,$verb) |
|
1341
|
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
Return true if $response is a response for the specified $verb. |
|
1343
|
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
FIXME -- need better checks! |
|
1345
|
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
=cut |
|
1347
|
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
sub is_verb_response { |
|
1349
|
|
|
|
|
|
|
my $self=shift; |
|
1350
|
|
|
|
|
|
|
my ($response,$verb) = @_; |
|
1351
|
|
|
|
|
|
|
my $doc; |
|
1352
|
|
|
|
|
|
|
eval { $doc=$self->parser->parse($response->content); }; |
|
1353
|
|
|
|
|
|
|
return unless $doc; # We can't parse it so it isn't a valid doc |
|
1354
|
|
|
|
|
|
|
my $verb_elements = $doc->getElementsByTagName($verb); |
|
1355
|
|
|
|
|
|
|
return(1) if ( $verb_elements and $verb_elements->getLength==1 ); |
|
1356
|
|
|
|
|
|
|
return; # not the one element we expected |
|
1357
|
|
|
|
|
|
|
} |
|
1358
|
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
=head3 error_elements_include($error_elements,$error_codes) |
|
1361
|
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
Determine whether the list of error elements ($error_elements) includes at least |
|
1363
|
|
|
|
|
|
|
one of the desired codes. Return string with first matching error code, else |
|
1364
|
|
|
|
|
|
|
return false/nothing. |
|
1365
|
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
Does a sanity check on $error_list to check that it is set and has length>0 |
|
1367
|
|
|
|
|
|
|
before trying to match, so cose calling it can simply do a |
|
1368
|
|
|
|
|
|
|
getElementsByTagName or similar before caling. |
|
1369
|
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
=cut |
|
1371
|
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
sub error_elements_include { |
|
1373
|
|
|
|
|
|
|
my $self=shift; |
|
1374
|
|
|
|
|
|
|
my ($error_elements, $error_codes) = @_; |
|
1375
|
|
|
|
|
|
|
# sanity check |
|
1376
|
|
|
|
|
|
|
return if (!$error_elements or $error_elements->getLength==0); |
|
1377
|
|
|
|
|
|
|
for (my $i=0; $i<$error_elements->getLength; $i++) { |
|
1378
|
|
|
|
|
|
|
foreach my $ec (@$error_codes) { |
|
1379
|
|
|
|
|
|
|
my $code = $error_elements->item($i)->getAttribute('code') || 'no-code-attribute'; |
|
1380
|
|
|
|
|
|
|
$self->log->note("$code =? $ec") if ($self->debug); |
|
1381
|
|
|
|
|
|
|
return($ec) if ($code eq $ec); |
|
1382
|
|
|
|
|
|
|
} |
|
1383
|
|
|
|
|
|
|
} |
|
1384
|
|
|
|
|
|
|
return; |
|
1385
|
|
|
|
|
|
|
} |
|
1386
|
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
=head3 check_error_response($response) |
|
1390
|
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
Given the response to an HTTP request, make sure it is not an |
|
1392
|
|
|
|
|
|
|
OAI-PMH error message. The $response is a success. If it is an |
|
1393
|
|
|
|
|
|
|
OAI error message, return 2; if the response cannot be parsed, return |
|
1394
|
|
|
|
|
|
|
-1; otherwise return undef (it must be a real Identify response). |
|
1395
|
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
FIXME -- need better checks! |
|
1397
|
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
FIXME -- need to merge thic functionality in with is_error_response |
|
1399
|
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
=cut |
|
1401
|
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
sub check_error_response { |
|
1403
|
|
|
|
|
|
|
my $self=shift; |
|
1404
|
|
|
|
|
|
|
my ($response) = @_; |
|
1405
|
|
|
|
|
|
|
my $doc; |
|
1406
|
|
|
|
|
|
|
eval { $doc=$self->parser->parse($response->content); }; |
|
1407
|
|
|
|
|
|
|
return unless $doc; # We can't parse it so it isn't a valid error |
|
1408
|
|
|
|
|
|
|
my $error_elements = $doc->getElementsByTagName('error'); |
|
1409
|
|
|
|
|
|
|
return(1) if ( $error_elements and $error_elements->getLength() > 0 ); |
|
1410
|
|
|
|
|
|
|
return; # no error codes so not an error response |
|
1411
|
|
|
|
|
|
|
} |
|
1412
|
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
=head3 get_earliest_datestamp() |
|
1415
|
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
A new exception check for Version 2.0 raises noRecordsMatch errorcode |
|
1417
|
|
|
|
|
|
|
if the set of records returned by ListRecords is empty. This requires |
|
1418
|
|
|
|
|
|
|
that we know the earliest date in the repository. Also check that the |
|
1419
|
|
|
|
|
|
|
earliest date matches the specified granularity. |
|
1420
|
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
Called only for version 2.0 or greater. |
|
1422
|
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
Since the Identify response has already been validated, we know |
|
1424
|
|
|
|
|
|
|
there is exactly one earliestDatestamp element in the current document. |
|
1425
|
|
|
|
|
|
|
Extract this value, check it, and if it looks good then set |
|
1426
|
|
|
|
|
|
|
$self->earliest_datestamp and return false. |
|
1427
|
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
If there is an error then return string explaining that. |
|
1429
|
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=cut |
|
1431
|
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
sub get_earliest_datestamp { |
|
1433
|
|
|
|
|
|
|
my $self=shift; |
|
1434
|
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
my $earliest = $self->doc->getElementsByTagName('earliestDatestamp'); |
|
1436
|
|
|
|
|
|
|
my $el = $earliest->item(0); |
|
1437
|
|
|
|
|
|
|
return("Can't get earliestDatestamp element from Identify response.") unless ($el); |
|
1438
|
|
|
|
|
|
|
return("earliestDatestamp element is empty in Identify response.") unless ($el->getFirstChild); |
|
1439
|
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
my $error=''; |
|
1441
|
|
|
|
|
|
|
my $earliest_datestamp = $el->getFirstChild->getData; |
|
1442
|
|
|
|
|
|
|
$self->log->note("Earliest datestamp in repository is $earliest_datestamp") if $self->debug; |
|
1443
|
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
$earliest_datestamp =~ /^([0-9]{4})-([0-9][0-9])-([0-9][0-9])(.*)$/; |
|
1445
|
|
|
|
|
|
|
if ($1 eq '' || $2 eq '' || $3 eq '') { |
|
1446
|
|
|
|
|
|
|
$error="is not in ISO8601 format"; |
|
1447
|
|
|
|
|
|
|
} elsif ( $4 eq '' and $self->granularity eq 'seconds') { |
|
1448
|
|
|
|
|
|
|
$error="must have seconds granularity (format YYYY-MM-DDThh:mm:ssZ) to match ". |
|
1449
|
|
|
|
|
|
|
"the granularity for the repository. The granularity has been set to seconds ". |
|
1450
|
|
|
|
|
|
|
"by the granularity element of the Identify response.\n"; |
|
1451
|
|
|
|
|
|
|
} elsif ( $4 ne '' and $self->granularity eq 'days') { |
|
1452
|
|
|
|
|
|
|
$error="must have days granularity (format YYYY-MM-DD) to match the granularity for ". |
|
1453
|
|
|
|
|
|
|
"the repository. The granularity has been set to days by the granularity ". |
|
1454
|
|
|
|
|
|
|
"element of the Identify response (or that element is bad/missing).\n"; |
|
1455
|
|
|
|
|
|
|
} elsif ( $self->granularity eq 'seconds' and $4 !~ /^T\d\d:\d\d:\d\d(\.\d+)?Z$/ ) { |
|
1456
|
|
|
|
|
|
|
$error="does not have the correct format for the time part of the UTCdatetime. The ". |
|
1457
|
|
|
|
|
|
|
"overall format must be YYYY-MM-DDThh:mm:ssZ.\n"; |
|
1458
|
|
|
|
|
|
|
} |
|
1459
|
|
|
|
|
|
|
if ($error) { |
|
1460
|
|
|
|
|
|
|
# Sanitize for error message |
|
1461
|
|
|
|
|
|
|
return("The earliestDatestamp in the identify response (". |
|
1462
|
|
|
|
|
|
|
sanitize($earliest_datestamp).") $error"); |
|
1463
|
|
|
|
|
|
|
} else { |
|
1464
|
|
|
|
|
|
|
$self->earliest_datestamp($earliest_datestamp); |
|
1465
|
|
|
|
|
|
|
return; |
|
1466
|
|
|
|
|
|
|
} |
|
1467
|
|
|
|
|
|
|
} |
|
1468
|
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
=head3 parse_granularity($granularity_element) |
|
1471
|
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
Parse contents of the granularity element of the Identify response. Returns either |
|
1473
|
|
|
|
|
|
|
'days', 'seconds' or nothing on failure. Sets $self->granularity if valid, otherwise |
|
1474
|
|
|
|
|
|
|
does not change setting. |
|
1475
|
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
As of v2.0 the granularity element is mandatory, see: |
|
1477
|
|
|
|
|
|
|
http://www.openarchives.org/OAI/openarchivesprotocol.html#Identify |
|
1478
|
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
=cut |
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
sub parse_granularity { |
|
1482
|
|
|
|
|
|
|
my $self=shift; |
|
1483
|
|
|
|
|
|
|
my ($gran) = @_; |
|
1484
|
|
|
|
|
|
|
if (!$gran or $gran->getLength==0) { |
|
1485
|
|
|
|
|
|
|
$self->log->fail("Missing granularity element"); |
|
1486
|
|
|
|
|
|
|
return; |
|
1487
|
|
|
|
|
|
|
} elsif ($gran->getLength>1) { |
|
1488
|
|
|
|
|
|
|
$self->log->fail("Multiple granularity elements"); |
|
1489
|
|
|
|
|
|
|
return; |
|
1490
|
|
|
|
|
|
|
} |
|
1491
|
|
|
|
|
|
|
#schema validation guarantees that there is a spec here |
|
1492
|
|
|
|
|
|
|
my $el=$gran->item(0)->getFirstChild->getData; |
|
1493
|
|
|
|
|
|
|
if ($el eq 'YYYY-MM-DD') { |
|
1494
|
|
|
|
|
|
|
$self->granularity('days'); |
|
1495
|
|
|
|
|
|
|
return($self->granularity); |
|
1496
|
|
|
|
|
|
|
} elsif ($el eq 'YYYY-MM-DDThh:mm:ssZ') { |
|
1497
|
|
|
|
|
|
|
$self->granularity('seconds'); |
|
1498
|
|
|
|
|
|
|
return($self->granularity); |
|
1499
|
|
|
|
|
|
|
} else { |
|
1500
|
|
|
|
|
|
|
$self->log->fail("Bad value for the granularity element '$el', must be either ". |
|
1501
|
|
|
|
|
|
|
"YYYY-MM-DD or YYYY-MM-DDThh:mm:ssZ"); |
|
1502
|
|
|
|
|
|
|
return; |
|
1503
|
|
|
|
|
|
|
} |
|
1504
|
|
|
|
|
|
|
} |
|
1505
|
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
=head3 get_datestamp_granularity($datestamp) |
|
1508
|
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
Parse the datestamp supplied and return 'days' if it is valid with granularity |
|
1510
|
|
|
|
|
|
|
of days, 'seconds' if it is valid for seconds granularity, and nothing if it is not |
|
1511
|
|
|
|
|
|
|
valid. |
|
1512
|
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
# FIXME - should add more validation |
|
1514
|
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
=cut |
|
1516
|
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
sub get_datestamp_granularity { |
|
1518
|
|
|
|
|
|
|
my $self=shift; |
|
1519
|
|
|
|
|
|
|
my ($datestamp)=@_; |
|
1520
|
|
|
|
|
|
|
if ($datestamp=~/^(\d\d\d\d)-(\d\d)-(\d\d)$/) { |
|
1521
|
|
|
|
|
|
|
return 'days' if ($2>=1 and $2<=12 and $3>=1 and $3<=31); |
|
1522
|
|
|
|
|
|
|
} elsif ($datestamp=~/^(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)(\.\d+)?Z$/) { |
|
1523
|
|
|
|
|
|
|
return 'seconds' if ($2>=1 and $2<=12 and $3>=1 and $3<=31 and $4<24 and $5<60); |
|
1524
|
|
|
|
|
|
|
} |
|
1525
|
|
|
|
|
|
|
return; |
|
1526
|
|
|
|
|
|
|
} |
|
1527
|
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
=head3 is_no_records_match |
|
1530
|
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
Returns true if the current document contains and error code element with the code "noRecordsMatch" |
|
1532
|
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
### FIXME - should be merged into an extended is_error_response |
|
1534
|
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
=cut |
|
1536
|
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
sub is_no_records_match { |
|
1538
|
|
|
|
|
|
|
my $self=shift; |
|
1539
|
|
|
|
|
|
|
my $error_elements = $self->doc->getElementsByTagName('error'); |
|
1540
|
|
|
|
|
|
|
return( $self->error_elements_include($error_elements, ['noRecordsMatch']) ); |
|
1541
|
|
|
|
|
|
|
} |
|
1542
|
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
=head3 get_resumption_token() |
|
1545
|
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
See if there is a resumptionToken with this response, return |
|
1547
|
|
|
|
|
|
|
value if present, empty if not or if there is some other error. |
|
1548
|
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
=cut |
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
sub get_resumption_token { |
|
1552
|
|
|
|
|
|
|
my $self=shift; |
|
1553
|
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
my $tokenList = $self->doc->getElementsByTagName('resumptionToken'); |
|
1555
|
|
|
|
|
|
|
if ( !$tokenList or $tokenList->getLength()==0 ) { |
|
1556
|
|
|
|
|
|
|
return; #no resumptionToken |
|
1557
|
|
|
|
|
|
|
} |
|
1558
|
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
# Dig out the resumption token from the document |
|
1560
|
|
|
|
|
|
|
my $token = $tokenList->item(0)->getFirstChild(); |
|
1561
|
|
|
|
|
|
|
|
|
1562
|
|
|
|
|
|
|
# Try getting the resumption token, $token will be will be undefined |
|
1563
|
|
|
|
|
|
|
# unless the element has content |
|
1564
|
|
|
|
|
|
|
if ($token) { |
|
1565
|
|
|
|
|
|
|
return($token->getData()); |
|
1566
|
|
|
|
|
|
|
} |
|
1567
|
|
|
|
|
|
|
return; |
|
1568
|
|
|
|
|
|
|
} |
|
1569
|
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
|
|
1571
|
|
|
|
|
|
|
=head3 is_error_response($details) |
|
1572
|
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
Look at the parsed response in $self->doc to see if it is an error response, |
|
1574
|
|
|
|
|
|
|
parse data and return true if it is. |
|
1575
|
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
Returns true (a printable string containing the error messages) if response was a valid |
|
1577
|
|
|
|
|
|
|
OAI_PMH error response, codes in %$details if a hash reference is passed in. |
|
1578
|
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
=cut |
|
1580
|
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
sub is_error_response { |
|
1582
|
|
|
|
|
|
|
my $self=shift; |
|
1583
|
|
|
|
|
|
|
my ($details)=@_; |
|
1584
|
|
|
|
|
|
|
$details={} unless (ref($details) eq 'HASH'); #dummy hash unless one supplied |
|
1585
|
|
|
|
|
|
|
# |
|
1586
|
|
|
|
|
|
|
my $error_elements = $self->doc->getElementsByTagName('error'); |
|
1587
|
|
|
|
|
|
|
if ($error_elements and $error_elements->getLength()>=1) { |
|
1588
|
|
|
|
|
|
|
my $msg=''; |
|
1589
|
|
|
|
|
|
|
for (my $i=0; $i<$error_elements->getLength; $i++) { |
|
1590
|
|
|
|
|
|
|
my $code=$error_elements->item($i)->getAttribute("code"); |
|
1591
|
|
|
|
|
|
|
my $child=$error_elements->item($i)->getFirstChild(); |
|
1592
|
|
|
|
|
|
|
unless ($child) { |
|
1593
|
|
|
|
|
|
|
# Warn about no content unless it is the special case of noSetHierarchy |
|
1594
|
|
|
|
|
|
|
# where the error code really is sufficient |
|
1595
|
|
|
|
|
|
|
unless ($code eq 'noSetHierarchy') { |
|
1596
|
|
|
|
|
|
|
$self->log->warn("No human readable message included in error element for ". |
|
1597
|
|
|
|
|
|
|
"$code error, this is discouraged"); |
|
1598
|
|
|
|
|
|
|
} |
|
1599
|
|
|
|
|
|
|
$details->{$code}='[NO MESSAGE RETURNED]'; |
|
1600
|
|
|
|
|
|
|
$msg.="[$code] "; |
|
1601
|
|
|
|
|
|
|
} else { |
|
1602
|
|
|
|
|
|
|
$details->{$code}=$child->getData(); |
|
1603
|
|
|
|
|
|
|
$msg.="[$code: $details->{$code}] "; |
|
1604
|
|
|
|
|
|
|
} |
|
1605
|
|
|
|
|
|
|
} |
|
1606
|
|
|
|
|
|
|
return($msg); |
|
1607
|
|
|
|
|
|
|
} else { |
|
1608
|
|
|
|
|
|
|
return; |
|
1609
|
|
|
|
|
|
|
} |
|
1610
|
|
|
|
|
|
|
} |
|
1611
|
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
=head3 get_admin_email() |
|
1614
|
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
Extract admin email from a parsed Identify response in $self->doc). |
|
1616
|
|
|
|
|
|
|
Also note that the email target may have been set via form option |
|
1617
|
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
Returns the pair of ($email,$error) where $email is the combined |
|
1619
|
|
|
|
|
|
|
set of email addresses (comma separated). $error will be undef |
|
1620
|
|
|
|
|
|
|
or a string with error message to users. |
|
1621
|
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
=cut |
|
1623
|
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
sub get_admin_email { |
|
1625
|
|
|
|
|
|
|
my $self=shift; |
|
1626
|
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
my $adminEmailElements = $self->doc->getElementsByTagName('adminEmail'); |
|
1628
|
|
|
|
|
|
|
my @emails=(); |
|
1629
|
|
|
|
|
|
|
my $n = $adminEmailElements->getLength; |
|
1630
|
|
|
|
|
|
|
if ($n > 0) { |
|
1631
|
|
|
|
|
|
|
my $name_node = $adminEmailElements->item(0)->getFirstChild; |
|
1632
|
|
|
|
|
|
|
if ($name_node) { |
|
1633
|
|
|
|
|
|
|
for (my $i=0; $i<$n; $i++) { |
|
1634
|
|
|
|
|
|
|
my $e=$adminEmailElements->item($i)->getFirstChild->getData; |
|
1635
|
|
|
|
|
|
|
if ($e=~s/mailto://g) { |
|
1636
|
|
|
|
|
|
|
$self->log->warn("Stripped mailto: prefix from adminEmail address, this ". |
|
1637
|
|
|
|
|
|
|
"should not be included."); |
|
1638
|
|
|
|
|
|
|
} |
|
1639
|
|
|
|
|
|
|
if (my $msg=$self->bad_admin_email($e)) { |
|
1640
|
|
|
|
|
|
|
return(undef,$msg); |
|
1641
|
|
|
|
|
|
|
} |
|
1642
|
|
|
|
|
|
|
push(@emails,$e); |
|
1643
|
|
|
|
|
|
|
} |
|
1644
|
|
|
|
|
|
|
} else { |
|
1645
|
|
|
|
|
|
|
$self->log->fail("adminEmail element is empty!"); |
|
1646
|
|
|
|
|
|
|
return(undef); |
|
1647
|
|
|
|
|
|
|
} |
|
1648
|
|
|
|
|
|
|
} else { |
|
1649
|
|
|
|
|
|
|
$self->log->fail("No adminEmail element!"); |
|
1650
|
|
|
|
|
|
|
return(undef); |
|
1651
|
|
|
|
|
|
|
} |
|
1652
|
|
|
|
|
|
|
my $email=join(',',@emails); |
|
1653
|
|
|
|
|
|
|
$self->log->pass("Administrator email address is '$email'"); |
|
1654
|
|
|
|
|
|
|
return($email); |
|
1655
|
|
|
|
|
|
|
} |
|
1656
|
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
=head3 bad_admin_email($admin_email) |
|
1659
|
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
Check for some stupid email addresses to avoid so much bounced email. |
|
1661
|
|
|
|
|
|
|
Returns a string (True) if bad, else nothing. |
|
1662
|
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
=cut |
|
1664
|
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
sub bad_admin_email { |
|
1666
|
|
|
|
|
|
|
my $self=shift; |
|
1667
|
|
|
|
|
|
|
my ($admin_email)=@_; |
|
1668
|
|
|
|
|
|
|
if ($admin_email=~/\@localhost$/) { |
|
1669
|
|
|
|
|
|
|
$self->log->fail("adminEmail '$admin_email' is local. This must be corrected to a ". |
|
1670
|
|
|
|
|
|
|
"valid globally resolvable email address before tests can continue"); |
|
1671
|
|
|
|
|
|
|
return("local adminEmail"); |
|
1672
|
|
|
|
|
|
|
} elsif ($admin_email!~/^\w[\w\-\.]+\@[a-zA-Z0-9\-\.]+\.[a-z]{2,}$/) { |
|
1673
|
|
|
|
|
|
|
$self->log->fail("adminEmail '$admin_email' looks bogus. This must be corrected to ". |
|
1674
|
|
|
|
|
|
|
"a valid email address before tests can continue"); |
|
1675
|
|
|
|
|
|
|
return("looks like bogus adminEmail"); |
|
1676
|
|
|
|
|
|
|
} |
|
1677
|
|
|
|
|
|
|
return; |
|
1678
|
|
|
|
|
|
|
} |
|
1679
|
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
=head2 METHODS FOR MAKING REQUESTS AND PARSING RESPONSES |
|
1682
|
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
=head3 make_request_and_validate($verb, $req) |
|
1684
|
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
Given the base URL that we are validating, the Verb that we are checking |
|
1686
|
|
|
|
|
|
|
and the complete query to be sent to the OAI server, get the response to |
|
1687
|
|
|
|
|
|
|
the verb. Validation has already been done, so we need only do some |
|
1688
|
|
|
|
|
|
|
special checks here. Return the response to the OAI verb, |
|
1689
|
|
|
|
|
|
|
or undef if the OAI server failed to respond to that verb. |
|
1690
|
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
Side effects: errors may be printed and error_count bumped. |
|
1692
|
|
|
|
|
|
|
If the verb involved is "Identify" then set the version number and the |
|
1693
|
|
|
|
|
|
|
email address, assuming that some response has been obtained. |
|
1694
|
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
Simple well-formedness is checked by this routine. An undef exit means |
|
1696
|
|
|
|
|
|
|
that any calling code should fail the test but need not report 'no response'. |
|
1697
|
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
If the response is true then $self->doc contains a parsed XML |
|
1699
|
|
|
|
|
|
|
document. |
|
1700
|
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
This is the usual way we make requests with integrated parsing and error |
|
1702
|
|
|
|
|
|
|
checking. This method is built around calls to L and |
|
1703
|
|
|
|
|
|
|
L. |
|
1704
|
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
=cut |
|
1706
|
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
sub make_request_and_validate { |
|
1708
|
|
|
|
|
|
|
my $self=shift; |
|
1709
|
|
|
|
|
|
|
my ($verb, $req) = @_; |
|
1710
|
|
|
|
|
|
|
|
|
1711
|
|
|
|
|
|
|
my $response = $self->make_request($req); |
|
1712
|
|
|
|
|
|
|
|
|
1713
|
|
|
|
|
|
|
unless ( $response->is_success ) { |
|
1714
|
|
|
|
|
|
|
my $status = $response->status_line; |
|
1715
|
|
|
|
|
|
|
my $age = $response->current_age; |
|
1716
|
|
|
|
|
|
|
my $lifetime = $response->freshness_lifetime; |
|
1717
|
|
|
|
|
|
|
my $is_fresh = $response->is_fresh; |
|
1718
|
|
|
|
|
|
|
$self->log->fail("Server failed to respond to the $verb request (HTTP header ". |
|
1719
|
|
|
|
|
|
|
"values: status=$status, age=$age, lifetime=$lifetime, ". |
|
1720
|
|
|
|
|
|
|
"is fresh:=$is_fresh)"); |
|
1721
|
|
|
|
|
|
|
return; |
|
1722
|
|
|
|
|
|
|
} |
|
1723
|
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
unless ($self->parse_response($req, $response)) { |
|
1725
|
|
|
|
|
|
|
$self->log->fail("Failed to parse response"); |
|
1726
|
|
|
|
|
|
|
return; |
|
1727
|
|
|
|
|
|
|
} |
|
1728
|
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
# Check that the responseDate is in UTC format |
|
1730
|
|
|
|
|
|
|
$self->check_response_date($req,$self->doc); |
|
1731
|
|
|
|
|
|
|
# Check that the response refers to the "official" OAI schema |
|
1732
|
|
|
|
|
|
|
$self->check_schema_name($req,$self->doc); |
|
1733
|
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
return($response); |
|
1735
|
|
|
|
|
|
|
} |
|
1736
|
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
=head3 make_request($url,$post_data) |
|
1739
|
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
Routine to GET or POST a request, handle 503's, and return the response |
|
1741
|
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
Second parameter, $post_data, must be hasfref to POST data to indicate that |
|
1743
|
|
|
|
|
|
|
the request should be an HTTP POST request instead of a GET. |
|
1744
|
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
=cut |
|
1746
|
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
sub make_request { |
|
1748
|
|
|
|
|
|
|
my $self=shift; |
|
1749
|
|
|
|
|
|
|
my ($url,$post_data) = @_; |
|
1750
|
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
# Is this https and do we allow that? |
|
1752
|
|
|
|
|
|
|
if (is_https_uri($url)) { |
|
1753
|
|
|
|
|
|
|
$self->uses_https(1); |
|
1754
|
|
|
|
|
|
|
if (not $self->allow_https) { |
|
1755
|
|
|
|
|
|
|
$self->abort("URI $url is https. Use of https URIs is not allowed ". |
|
1756
|
|
|
|
|
|
|
"by the OAI-PMH v2.0 specification"); |
|
1757
|
|
|
|
|
|
|
} |
|
1758
|
|
|
|
|
|
|
} |
|
1759
|
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
my $request; |
|
1761
|
|
|
|
|
|
|
if ($post_data) { |
|
1762
|
|
|
|
|
|
|
my $content_msg=''; #nice string to report |
|
1763
|
|
|
|
|
|
|
# Sort keys in alpha order for consistent behavior |
|
1764
|
|
|
|
|
|
|
foreach my $k (sort keys(%$post_data)) { |
|
1765
|
|
|
|
|
|
|
my $v=$post_data->{$k}; |
|
1766
|
|
|
|
|
|
|
$content_msg.="$k:$v "; |
|
1767
|
|
|
|
|
|
|
} |
|
1768
|
|
|
|
|
|
|
$self->log->request($url,'POST',$content_msg); |
|
1769
|
|
|
|
|
|
|
$request = POST($url,'Content'=>$post_data); |
|
1770
|
|
|
|
|
|
|
} else { |
|
1771
|
|
|
|
|
|
|
$self->log->request($url,'GET'); |
|
1772
|
|
|
|
|
|
|
$request = GET($url); |
|
1773
|
|
|
|
|
|
|
} |
|
1774
|
|
|
|
|
|
|
my $response; |
|
1775
|
|
|
|
|
|
|
my $tries=0; |
|
1776
|
|
|
|
|
|
|
my $try_again = 1; |
|
1777
|
|
|
|
|
|
|
while ( $try_again ) { |
|
1778
|
|
|
|
|
|
|
#$ua->max_redirect(0); |
|
1779
|
|
|
|
|
|
|
$response = $self->ua->request($request); |
|
1780
|
|
|
|
|
|
|
# |
|
1781
|
|
|
|
|
|
|
# Write response if requested |
|
1782
|
|
|
|
|
|
|
if ($self->save_all_responses) { |
|
1783
|
|
|
|
|
|
|
my $response_file="/tmp/".$self->run_id.".".$self->response_number; |
|
1784
|
|
|
|
|
|
|
open(my $fh,'>',$response_file) || $self->abort("Can't write response $response_file: $!"); |
|
1785
|
|
|
|
|
|
|
print {$fh} $response->content(); |
|
1786
|
|
|
|
|
|
|
$self->log->note("Response saved as $response_file") if ($self->debug); |
|
1787
|
|
|
|
|
|
|
close($fh); |
|
1788
|
|
|
|
|
|
|
$self->{response_number}++; |
|
1789
|
|
|
|
|
|
|
} |
|
1790
|
|
|
|
|
|
|
$tries++; |
|
1791
|
|
|
|
|
|
|
if ($tries > $self->max_retries) { |
|
1792
|
|
|
|
|
|
|
$self->abort("Too many 503 Retry-After or 302 Redirect responses received in a row"); |
|
1793
|
|
|
|
|
|
|
} |
|
1794
|
|
|
|
|
|
|
# |
|
1795
|
|
|
|
|
|
|
# Check response for 503 and 302 |
|
1796
|
|
|
|
|
|
|
if ($response->code eq '503') { |
|
1797
|
|
|
|
|
|
|
# 503 (Retry-After), expect to get a time too |
|
1798
|
|
|
|
|
|
|
$self->uses_503(1); |
|
1799
|
|
|
|
|
|
|
if (defined $response->header("Retry-After")) { |
|
1800
|
|
|
|
|
|
|
my $retryAfter=$response->header("Retry-After"); |
|
1801
|
|
|
|
|
|
|
if ($retryAfter=~/^\d+$/) { |
|
1802
|
|
|
|
|
|
|
if ($retryAfter<=3600) { |
|
1803
|
|
|
|
|
|
|
###FIXME: Should check the Retry-After value carefully and barf if bad |
|
1804
|
|
|
|
|
|
|
my $sleep_time = 1 + $response->header("Retry-After"); |
|
1805
|
|
|
|
|
|
|
$self->log->note("Status: ".$response->code(). |
|
1806
|
|
|
|
|
|
|
" -- going to sleep for $sleep_time seconds."); |
|
1807
|
|
|
|
|
|
|
sleep $sleep_time; |
|
1808
|
|
|
|
|
|
|
} else { |
|
1809
|
|
|
|
|
|
|
$self->abort("503 response with Retry-After > 1hour (3600s), aborting"); |
|
1810
|
|
|
|
|
|
|
} |
|
1811
|
|
|
|
|
|
|
} else { |
|
1812
|
|
|
|
|
|
|
$self->log->fail("503 response with bad (non-numeric) Retry-After time, ". |
|
1813
|
|
|
|
|
|
|
"will wait 10s"); |
|
1814
|
|
|
|
|
|
|
sleep 10; |
|
1815
|
|
|
|
|
|
|
} |
|
1816
|
|
|
|
|
|
|
} else { |
|
1817
|
|
|
|
|
|
|
$self->log->warn("503 response without Retry-After time, will wait 10s"); |
|
1818
|
|
|
|
|
|
|
sleep 10; |
|
1819
|
|
|
|
|
|
|
} |
|
1820
|
|
|
|
|
|
|
} elsif ($response->code eq '302') { |
|
1821
|
|
|
|
|
|
|
# 302 (Found) redirect |
|
1822
|
|
|
|
|
|
|
my $loc=$response->header('Location'); |
|
1823
|
|
|
|
|
|
|
if ($loc!~m%^http://([^\?&]+)%) { |
|
1824
|
|
|
|
|
|
|
if (is_https_uri($loc)) { |
|
1825
|
|
|
|
|
|
|
$self->uses_https(1); |
|
1826
|
|
|
|
|
|
|
if (not $self->allow_https) { |
|
1827
|
|
|
|
|
|
|
$self->abort("Redirect URI specified in 302 response is https. Use of ". |
|
1828
|
|
|
|
|
|
|
"https URIs is not allowed by the OAI-PMH v2.0 specification"); |
|
1829
|
|
|
|
|
|
|
} |
|
1830
|
|
|
|
|
|
|
} else { |
|
1831
|
|
|
|
|
|
|
$self->abort("Bad redirect URI specified in 302 response"); |
|
1832
|
|
|
|
|
|
|
} |
|
1833
|
|
|
|
|
|
|
} |
|
1834
|
|
|
|
|
|
|
# Make new request |
|
1835
|
|
|
|
|
|
|
if ($post_data and $loc!~/\?/) { #don't do POST if new Location includes ? |
|
1836
|
|
|
|
|
|
|
$request = POST($loc,'Content'=>$post_data); |
|
1837
|
|
|
|
|
|
|
} else { |
|
1838
|
|
|
|
|
|
|
$request = GET($loc); |
|
1839
|
|
|
|
|
|
|
} |
|
1840
|
|
|
|
|
|
|
} elsif ($response->code eq '501') { |
|
1841
|
|
|
|
|
|
|
$self->abort("Got 501 Not Implemented response which may either have come from ". |
|
1842
|
|
|
|
|
|
|
"the server or have been generated within the validator because the ". |
|
1843
|
|
|
|
|
|
|
"request type (perhaps https) is not supported."); |
|
1844
|
|
|
|
|
|
|
} else { |
|
1845
|
|
|
|
|
|
|
$try_again=0; |
|
1846
|
|
|
|
|
|
|
} |
|
1847
|
|
|
|
|
|
|
} |
|
1848
|
|
|
|
|
|
|
# Check for oversize limit (indicated by X-Content-Range header) |
|
1849
|
|
|
|
|
|
|
if (defined $response->header('X-Content-Range')) { |
|
1850
|
|
|
|
|
|
|
$self->log->fail("Response to <$url> exceeds maximum size limit (".$self->max_size." bytes), discarded. ". |
|
1851
|
|
|
|
|
|
|
"While this limit is set only in this validation program you should not use excessively ". |
|
1852
|
|
|
|
|
|
|
"large responses as service providers will likely have problems parsing the XML. You ". |
|
1853
|
|
|
|
|
|
|
"should split the responses using the resumptionToken mechanism. (X-Content-Range: '". |
|
1854
|
|
|
|
|
|
|
$response->header('X-Content-Range')."' Content-Length: '".$response->content_length."')\n"); |
|
1855
|
|
|
|
|
|
|
$response->content(''); |
|
1856
|
|
|
|
|
|
|
} |
|
1857
|
|
|
|
|
|
|
return $response; |
|
1858
|
|
|
|
|
|
|
} |
|
1859
|
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
=head3 parse_response($request_url,$response,$xml_reason) |
|
1862
|
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
Attempt to parse the HTTP response $response, examining both the response code |
|
1864
|
|
|
|
|
|
|
and then attempting to parse the content as XML. |
|
1865
|
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
If $xml_reason is specified then this is added to the failure message, if |
|
1867
|
|
|
|
|
|
|
nothing is specified then a standard message about UTF-8 issues is |
|
1868
|
|
|
|
|
|
|
added. |
|
1869
|
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
Returns true on success and sets $self->doc with the parsed XML document. |
|
1871
|
|
|
|
|
|
|
If unsuccessful, log an error message, bump the error count, and |
|
1872
|
|
|
|
|
|
|
return false. |
|
1873
|
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
=cut |
|
1875
|
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
sub parse_response { |
|
1877
|
|
|
|
|
|
|
my $self=shift; |
|
1878
|
|
|
|
|
|
|
my ($request_url,$response,$xml_reason) = @_; |
|
1879
|
|
|
|
|
|
|
$xml_reason='' unless (defined $xml_reason); |
|
1880
|
|
|
|
|
|
|
# |
|
1881
|
|
|
|
|
|
|
# Fail if reponse=undef, else check to see if response is ref to |
|
1882
|
|
|
|
|
|
|
# response object or is string |
|
1883
|
|
|
|
|
|
|
if (!defined($response) or not ref($response)) { |
|
1884
|
|
|
|
|
|
|
$self->log->warn("Bad response from server"); |
|
1885
|
|
|
|
|
|
|
return; |
|
1886
|
|
|
|
|
|
|
} |
|
1887
|
|
|
|
|
|
|
# Unpack the bits we want from response object |
|
1888
|
|
|
|
|
|
|
my $code=$response->code; |
|
1889
|
|
|
|
|
|
|
my $content=$response->content; |
|
1890
|
|
|
|
|
|
|
# Check return code (if given) |
|
1891
|
|
|
|
|
|
|
if ($code and $code=~/^[45]/) { |
|
1892
|
|
|
|
|
|
|
$self->log->warn("Bad HTTP status code from server: $code"); |
|
1893
|
|
|
|
|
|
|
return; |
|
1894
|
|
|
|
|
|
|
} |
|
1895
|
|
|
|
|
|
|
# |
|
1896
|
|
|
|
|
|
|
# Check content |
|
1897
|
|
|
|
|
|
|
my $doc; |
|
1898
|
|
|
|
|
|
|
eval { $doc=$self->parser->parse($content); }; |
|
1899
|
|
|
|
|
|
|
unless ( $doc ) { |
|
1900
|
|
|
|
|
|
|
my $err=$@; |
|
1901
|
|
|
|
|
|
|
$err=~s/^\s+//; |
|
1902
|
|
|
|
|
|
|
$err=~s%at\s+/usr/lib/perl.*%%i; #trim stuff about our perl installation |
|
1903
|
|
|
|
|
|
|
if ($request_url) { |
|
1904
|
|
|
|
|
|
|
unless ($xml_reason) { |
|
1905
|
|
|
|
|
|
|
$xml_reason="The most common reason for malformed responses is illegal bytes in ". |
|
1906
|
|
|
|
|
|
|
"UTF-8 streams (e.g. the inclusion of Latin1 characters with codes>127 ". |
|
1907
|
|
|
|
|
|
|
"without creating proper UTF-8 mutli-byte sequences). You might find ". |
|
1908
|
|
|
|
|
|
|
"the utf8conditioner, found on the OAI tools page helpful for debugging."; |
|
1909
|
|
|
|
|
|
|
} |
|
1910
|
|
|
|
|
|
|
$self->log->warn("Malformed response: $err. $xml_reason"); |
|
1911
|
|
|
|
|
|
|
} |
|
1912
|
|
|
|
|
|
|
return; |
|
1913
|
|
|
|
|
|
|
} |
|
1914
|
|
|
|
|
|
|
# Set parsed document |
|
1915
|
|
|
|
|
|
|
$self->doc( $doc ); |
|
1916
|
|
|
|
|
|
|
return(1); |
|
1917
|
|
|
|
|
|
|
} |
|
1918
|
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
|
|
1920
|
|
|
|
|
|
|
=head2 UTILITY FUNCTIONS |
|
1921
|
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
=head3 html_escape($str) |
|
1923
|
|
|
|
|
|
|
|
|
1924
|
|
|
|
|
|
|
Escapes characters which have special meanings in HTML |
|
1925
|
|
|
|
|
|
|
|
|
1926
|
|
|
|
|
|
|
=cut |
|
1927
|
|
|
|
|
|
|
|
|
1928
|
|
|
|
|
|
|
sub html_escape { |
|
1929
|
|
|
|
|
|
|
my $string = shift; |
|
1930
|
|
|
|
|
|
|
$string =~ s/&/&/g; #must be first! |
|
1931
|
|
|
|
|
|
|
$string =~ s/</g; |
|
1932
|
|
|
|
|
|
|
$string =~ s/>/>/g; |
|
1933
|
|
|
|
|
|
|
$string =~ s/"/"/g; |
|
1934
|
|
|
|
|
|
|
$string =~ s/'/'/g; |
|
1935
|
|
|
|
|
|
|
return $string; |
|
1936
|
|
|
|
|
|
|
} |
|
1937
|
|
|
|
|
|
|
|
|
1938
|
|
|
|
|
|
|
=head3 one_year_before($date) |
|
1939
|
|
|
|
|
|
|
|
|
1940
|
|
|
|
|
|
|
Assumes properly formatted date, decrements year by one |
|
1941
|
|
|
|
|
|
|
via string manipulation and returns date. |
|
1942
|
|
|
|
|
|
|
|
|
1943
|
|
|
|
|
|
|
=cut |
|
1944
|
|
|
|
|
|
|
|
|
1945
|
|
|
|
|
|
|
sub one_year_before { |
|
1946
|
|
|
|
|
|
|
my ($date)=@_; |
|
1947
|
|
|
|
|
|
|
my ($year) = $date =~ /^([0-9]{4})/; |
|
1948
|
|
|
|
|
|
|
my $year_minus_one = sprintf('%04d',($year - 1)); #make sure we get leading zeros |
|
1949
|
|
|
|
|
|
|
$date =~ s/^$year/$year_minus_one/; |
|
1950
|
|
|
|
|
|
|
return($date); |
|
1951
|
|
|
|
|
|
|
} |
|
1952
|
|
|
|
|
|
|
|
|
1953
|
|
|
|
|
|
|
=head3 url_encode($str) |
|
1954
|
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
Escape/encode any characters that aren't in the small safe set for URLs |
|
1956
|
|
|
|
|
|
|
|
|
1957
|
|
|
|
|
|
|
=cut |
|
1958
|
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
sub url_encode { |
|
1960
|
|
|
|
|
|
|
my $str=shift; |
|
1961
|
|
|
|
|
|
|
$str =~ s/([^\w\/\,\- ])/sprintf("%%%02X",ord($1))/eg; |
|
1962
|
|
|
|
|
|
|
$str =~ tr/ /+/; |
|
1963
|
|
|
|
|
|
|
return($str); |
|
1964
|
|
|
|
|
|
|
} |
|
1965
|
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
=head3 is_https_uri($uri) |
|
1968
|
|
|
|
|
|
|
|
|
1969
|
|
|
|
|
|
|
Return true if the URI is an https URI, false otherwise. |
|
1970
|
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
=cut |
|
1972
|
|
|
|
|
|
|
|
|
1973
|
|
|
|
|
|
|
sub is_https_uri { |
|
1974
|
|
|
|
|
|
|
my $uri=shift; |
|
1975
|
|
|
|
|
|
|
return($uri=~m%^https:%); |
|
1976
|
|
|
|
|
|
|
} |
|
1977
|
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
=head3 sanitize($str) |
|
1980
|
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
Return a sanitized version of $str that doesn't contain odd |
|
1982
|
|
|
|
|
|
|
characters and it not over 80 chars long. Will have the |
|
1983
|
|
|
|
|
|
|
string '(sanitized)' appended if changed. |
|
1984
|
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
=cut |
|
1986
|
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
sub sanitize { |
|
1988
|
|
|
|
|
|
|
my ($str)=@_; |
|
1989
|
|
|
|
|
|
|
my $out=$str; |
|
1990
|
|
|
|
|
|
|
$out=~s/[^\w\-:;.!@#%^*\(\) ]/_/g; |
|
1991
|
|
|
|
|
|
|
$out=substr($out,0,80); |
|
1992
|
|
|
|
|
|
|
if ($out ne $str) { |
|
1993
|
|
|
|
|
|
|
$out.='(sanitized)'; |
|
1994
|
|
|
|
|
|
|
} |
|
1995
|
|
|
|
|
|
|
return($out); |
|
1996
|
|
|
|
|
|
|
} |
|
1997
|
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
=head1 SUPPORT |
|
2000
|
|
|
|
|
|
|
|
|
2001
|
|
|
|
|
|
|
Please report any bugs of questions about validation via the |
|
2002
|
|
|
|
|
|
|
OAI-PMH discussion list at L. |
|
2003
|
|
|
|
|
|
|
Be sure to make it clear that you are talking about the |
|
2004
|
|
|
|
|
|
|
HTTP::OAIPMH::Validator module. |
|
2005
|
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
=head1 AUTHORS |
|
2007
|
|
|
|
|
|
|
|
|
2008
|
|
|
|
|
|
|
Simeon Warner, Donna Bergmark |
|
2009
|
|
|
|
|
|
|
|
|
2010
|
|
|
|
|
|
|
=head1 HISTORY |
|
2011
|
|
|
|
|
|
|
|
|
2012
|
|
|
|
|
|
|
This module is based on an OAI-PMH validator first written by Donna Bergmark |
|
2013
|
|
|
|
|
|
|
(Cornell University) in 2001-01 for the OAI-PMH validation and registration |
|
2014
|
|
|
|
|
|
|
service (L). |
|
2015
|
|
|
|
|
|
|
Simeon Warner (Cornell University) took over the validator and operation of |
|
2016
|
|
|
|
|
|
|
the registration service in 2004-01, and then did a significant tidy/rework |
|
2017
|
|
|
|
|
|
|
of the code. That code ran the validation and registration service with |
|
2018
|
|
|
|
|
|
|
few changes through 2015-01. Some of the early work on the OAI-PMH validation |
|
2019
|
|
|
|
|
|
|
service was supported through NSF award number 0127308. |
|
2020
|
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
Code was abstracted into this module 2015-01 by Simeon Warner and is |
|
2022
|
|
|
|
|
|
|
used for the OAI-PMH validation and registration service on |
|
2023
|
|
|
|
|
|
|
L. |
|
2024
|
|
|
|
|
|
|
|
|
2025
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
2026
|
|
|
|
|
|
|
|
|
2027
|
|
|
|
|
|
|
Copyright 2001..2017 by Simeon Warner, Donna Bergmark. |
|
2028
|
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify it under |
|
2030
|
|
|
|
|
|
|
the same terms as Perl itself. |
|
2031
|
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
=cut |
|
2033
|
|
|
|
|
|
|
|
|
2034
|
|
|
|
|
|
|
1; |