| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTTP::OAI::UserAgent; |
|
2
|
|
|
|
|
|
|
|
|
3
|
11
|
|
|
11
|
|
83
|
use strict; |
|
|
11
|
|
|
|
|
20
|
|
|
|
11
|
|
|
|
|
302
|
|
|
4
|
11
|
|
|
11
|
|
65
|
use warnings; |
|
|
11
|
|
|
|
|
22
|
|
|
|
11
|
|
|
|
|
314
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
11
|
|
|
11
|
|
56
|
use vars qw(@ISA $ACCEPT); |
|
|
11
|
|
|
|
|
25
|
|
|
|
11
|
|
|
|
|
1006
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
our $VERSION = '4.13'; |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# Do not use eval() |
|
11
|
|
|
|
|
|
|
our $USE_EVAL = 1; |
|
12
|
|
|
|
|
|
|
# Ignore bad utf8 characters |
|
13
|
|
|
|
|
|
|
our $IGNORE_BAD_CHARS = 1; |
|
14
|
|
|
|
|
|
|
# Silence bad utf8 warnings |
|
15
|
|
|
|
|
|
|
our $SILENT_BAD_CHARS = 0; |
|
16
|
|
|
|
|
|
|
|
|
17
|
11
|
|
|
11
|
|
87
|
use constant MAX_UTF8_BYTES => 4; |
|
|
11
|
|
|
|
|
18
|
|
|
|
11
|
|
|
|
|
14662
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
require LWP::UserAgent; |
|
20
|
|
|
|
|
|
|
@ISA = qw(LWP::UserAgent); |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
unless( $@ ) { |
|
23
|
|
|
|
|
|
|
$ACCEPT = "gzip"; |
|
24
|
|
|
|
|
|
|
} |
|
25
|
|
|
|
|
|
|
|
|
26
|
1
|
|
|
1
|
1
|
4
|
sub delay { shift->_elem( "delay", @_ ) } |
|
27
|
1
|
|
|
1
|
0
|
7
|
sub last_request_completed { shift->_elem( "last_request_completed", @_ ) } |
|
28
|
|
|
|
|
|
|
|
|
29
|
0
|
|
|
0
|
1
|
0
|
sub redirect_ok { 1 } |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub _oai { |
|
32
|
10
|
|
|
10
|
|
37
|
my( $self, @args ) = @_; |
|
33
|
10
|
50
|
|
|
|
43
|
my $cb = ref($args[0]) eq "CODE" ? shift @args : undef; |
|
34
|
10
|
|
|
|
|
33
|
my %args = @args; |
|
35
|
10
|
|
33
|
|
|
77
|
$cb = delete $args{onRecord} || $cb || $self->{onRecord}; |
|
36
|
|
|
|
|
|
|
|
|
37
|
10
|
|
50
|
|
|
50
|
my $handlers = delete $args{handlers} || {}; |
|
38
|
|
|
|
|
|
|
|
|
39
|
10
|
50
|
66
|
|
|
56
|
if( !$args{force} && (my @errors = HTTP::OAI::Repository::validate_request(%args)) ) { |
|
40
|
0
|
|
|
|
|
0
|
return new HTTP::OAI::Response( |
|
41
|
|
|
|
|
|
|
code=>503, |
|
42
|
|
|
|
|
|
|
message=>'Invalid Request (use \'force\' to force a non-conformant request): ' . $errors[0]->toString, |
|
43
|
|
|
|
|
|
|
errors=>\@errors |
|
44
|
|
|
|
|
|
|
); |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# Get rid of any empty arguments |
|
48
|
10
|
|
|
|
|
29
|
for( keys %args ) { |
|
49
|
20
|
50
|
33
|
|
|
76
|
delete $args{$_} if !defined($args{$_}) || !length($args{$_}); |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
10
|
|
|
|
|
43
|
my $request = HTTP::Request->new( GET => $self->_buildurl(%args) ); |
|
53
|
|
|
|
|
|
|
|
|
54
|
10
|
|
|
|
|
1209
|
delete $args{force}; |
|
55
|
|
|
|
|
|
|
|
|
56
|
10
|
|
|
|
|
82
|
my $response = HTTP::OAI::Response->new( |
|
57
|
|
|
|
|
|
|
%args, |
|
58
|
|
|
|
|
|
|
handlers => $handlers, |
|
59
|
|
|
|
|
|
|
onRecord => $cb, |
|
60
|
|
|
|
|
|
|
); |
|
61
|
10
|
|
|
|
|
50
|
$response->request( $request ); |
|
62
|
10
|
|
|
|
|
155
|
my $parser = XML::LibXML->new( |
|
63
|
|
|
|
|
|
|
Handler => HTTP::OAI::SAX::Trace->new( |
|
64
|
|
|
|
|
|
|
Handler => HTTP::OAI::SAX::Text->new( |
|
65
|
|
|
|
|
|
|
Handler => $response |
|
66
|
|
|
|
|
|
|
) ) ); |
|
67
|
10
|
|
|
|
|
832
|
$parser->{content_length} = 0; |
|
68
|
10
|
|
|
|
|
85
|
$parser->{content_buffer} = Encode::encode('UTF-8',''); |
|
69
|
|
|
|
|
|
|
|
|
70
|
10
|
|
|
|
|
800
|
HTTP::OAI::Debug::trace( $args{verb} . " " . ref($parser) . "->parse_chunk()" ); |
|
71
|
10
|
|
|
|
|
19
|
my $r; |
|
72
|
|
|
|
|
|
|
{ |
|
73
|
10
|
|
|
|
|
16
|
local $SIG{__DIE__}; |
|
|
10
|
|
|
|
|
33
|
|
|
74
|
|
|
|
|
|
|
$r = $self->SUPER::request($request,sub { |
|
75
|
14
|
|
|
14
|
|
34324
|
$self->lwp_callback( $parser, @_ ) |
|
76
|
10
|
|
|
|
|
76
|
}); |
|
77
|
10
|
100
|
100
|
|
|
57346
|
if( $r->is_success && !defined $r->headers->header( 'Client-Aborted' ) ) |
|
78
|
|
|
|
|
|
|
{ |
|
79
|
1
|
|
|
|
|
60
|
eval { $self->lwp_endparse( $parser ) }; |
|
|
1
|
|
|
|
|
13
|
|
|
80
|
1
|
50
|
|
|
|
87
|
if( $@ ) |
|
81
|
|
|
|
|
|
|
{ |
|
82
|
0
|
|
|
|
|
0
|
$r->headers->header( 'Client-Aborted', 'die' ); |
|
83
|
0
|
|
|
|
|
0
|
$r->headers->header( 'X-Died', $@ ); |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
} |
|
87
|
10
|
100
|
66
|
|
|
451
|
if( defined($r->headers->header( 'Client-Aborted' )) && $r->headers->header( 'Client-Aborted' ) eq 'die' ) |
|
88
|
|
|
|
|
|
|
{ |
|
89
|
8
|
|
|
|
|
623
|
my $err = $r->headers->header( 'X-Died' ); |
|
90
|
8
|
50
|
|
|
|
302
|
if( $err eq "done" ) |
|
91
|
|
|
|
|
|
|
{ |
|
92
|
8
|
|
|
|
|
26
|
$r->code(200); |
|
93
|
8
|
|
|
|
|
83
|
$r->message("OK"); |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
else |
|
96
|
|
|
|
|
|
|
{ |
|
97
|
0
|
|
|
|
|
0
|
$r->code(500); |
|
98
|
0
|
|
|
|
|
0
|
$r->message( 'An error occurred while parsing: ' . $err ); |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
|
|
102
|
10
|
|
|
|
|
195
|
my $cnt_len = $parser->{content_length}; |
|
103
|
10
|
|
|
|
|
58
|
undef $parser; |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# OAI retry-after |
|
106
|
10
|
50
|
33
|
|
|
63
|
if( defined($r) && ( $r->code == 503 || $r->code == 429 ) && defined(my $timeout = $r->headers->header('Retry-After')) ) { |
|
|
|
50
|
33
|
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
107
|
0
|
0
|
|
|
|
0
|
if( $self->{recursion}++ > 10 ) { |
|
108
|
0
|
|
|
|
|
0
|
$r->code(500); |
|
109
|
0
|
|
|
|
|
0
|
$r->message("Server did not give a response after 10 retries"); |
|
110
|
0
|
|
|
|
|
0
|
return $r; |
|
111
|
|
|
|
|
|
|
} |
|
112
|
0
|
0
|
0
|
|
|
0
|
if( !$timeout or $timeout =~ /\D/ or $timeout < 0 or $timeout > 86400 ) { |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
113
|
0
|
|
|
|
|
0
|
$r->code(500); |
|
114
|
0
|
|
0
|
|
|
0
|
$r->message("Server specified an unsupported duration to wait (\"".($timeout||'null')."\""); |
|
115
|
0
|
|
|
|
|
0
|
return $r; |
|
116
|
|
|
|
|
|
|
} |
|
117
|
0
|
|
|
|
|
0
|
HTTP::OAI::Debug::trace( "Waiting $timeout seconds" ); |
|
118
|
0
|
|
|
|
|
0
|
sleep($timeout+10); # We wait an extra 10 secs for safety |
|
119
|
0
|
|
|
|
|
0
|
return $self->_oai(@args); |
|
120
|
|
|
|
|
|
|
# Got an empty response |
|
121
|
|
|
|
|
|
|
} elsif( defined($r) && $r->is_success && $cnt_len == 0 ) { |
|
122
|
0
|
0
|
|
|
|
0
|
if( $self->{recursion}++ > 10 ) { |
|
123
|
0
|
|
|
|
|
0
|
$r->code(500); |
|
124
|
0
|
|
|
|
|
0
|
$r->message("No content in server response"); |
|
125
|
0
|
|
|
|
|
0
|
return $r; |
|
126
|
|
|
|
|
|
|
} |
|
127
|
0
|
|
|
|
|
0
|
HTTP::OAI::Debug::trace( "Retrying on empty response" ); |
|
128
|
0
|
|
|
|
|
0
|
sleep(5); |
|
129
|
0
|
|
|
|
|
0
|
return $self->_oai(@args); |
|
130
|
|
|
|
|
|
|
# An HTTP error occurred |
|
131
|
|
|
|
|
|
|
} elsif( $r->is_error ) { |
|
132
|
1
|
|
|
|
|
83
|
return $r; |
|
133
|
|
|
|
|
|
|
# An error occurred during parsing |
|
134
|
|
|
|
|
|
|
} elsif( $@ ) { |
|
135
|
0
|
0
|
|
|
|
0
|
$r->code(my $code = $@ =~ /read timeout/ ? 504 : 600); |
|
136
|
0
|
|
|
|
|
0
|
$r->message($@); |
|
137
|
0
|
|
|
|
|
0
|
return $r; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# access the original response via previous |
|
141
|
9
|
|
|
|
|
372
|
$response->previous($r); |
|
142
|
|
|
|
|
|
|
|
|
143
|
9
|
|
|
|
|
130
|
return $response; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub request |
|
147
|
|
|
|
|
|
|
{ |
|
148
|
1
|
|
|
1
|
1
|
142
|
my( $self, @args ) = @_; |
|
149
|
|
|
|
|
|
|
|
|
150
|
1
|
|
|
|
|
6
|
my $delay = $self->delay; |
|
151
|
1
|
50
|
|
|
|
13
|
if( defined $delay ) |
|
152
|
|
|
|
|
|
|
{ |
|
153
|
0
|
0
|
|
|
|
0
|
if( ref($delay) eq "CODE" ) |
|
154
|
|
|
|
|
|
|
{ |
|
155
|
0
|
|
|
|
|
0
|
$delay = &$delay( $self->last_request_completed ); |
|
156
|
|
|
|
|
|
|
} |
|
157
|
0
|
0
|
|
|
|
0
|
select(undef,undef,undef,$delay) if $delay > 0; |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
1
|
|
|
|
|
9
|
my $r = $self->SUPER::request( @args ); |
|
161
|
|
|
|
|
|
|
|
|
162
|
1
|
|
|
|
|
23282
|
$self->last_request_completed( time ); |
|
163
|
|
|
|
|
|
|
|
|
164
|
1
|
|
|
|
|
24
|
return $r; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub lwp_badchar |
|
168
|
|
|
|
|
|
|
{ |
|
169
|
0
|
|
|
0
|
0
|
0
|
my $codepoint = sprintf('U+%04x', ord($_[2])); |
|
170
|
0
|
0
|
|
|
|
0
|
unless( $SILENT_BAD_CHARS ) |
|
171
|
|
|
|
|
|
|
{ |
|
172
|
0
|
|
|
|
|
0
|
warn "Bad Unicode character $codepoint at byte offset ".$_[1]->{content_length}." from ".$_[1]->{request}->uri."\n"; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
0
|
|
|
|
|
0
|
return $codepoint; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub lwp_endparse |
|
178
|
|
|
|
|
|
|
{ |
|
179
|
1
|
|
|
1
|
0
|
4
|
my( $self, $parser ) = @_; |
|
180
|
|
|
|
|
|
|
|
|
181
|
1
|
|
|
|
|
3
|
my $utf8 = $parser->{content_buffer}; |
|
182
|
|
|
|
|
|
|
# Replace bad chars with '?' |
|
183
|
1
|
50
|
33
|
|
|
16
|
if( $IGNORE_BAD_CHARS and length($utf8) ) { |
|
184
|
0
|
|
|
0
|
|
0
|
$utf8 = Encode::decode('UTF-8', $utf8, sub { $self->lwp_badchar($parser, @_) }); |
|
|
0
|
|
|
|
|
0
|
|
|
185
|
|
|
|
|
|
|
} |
|
186
|
1
|
50
|
|
|
|
5
|
if( length($utf8) > 0 ) |
|
187
|
|
|
|
|
|
|
{ |
|
188
|
0
|
|
|
|
|
0
|
_ccchars($utf8); # Fix control chars |
|
189
|
0
|
|
|
|
|
0
|
$parser->{content_length} += length($utf8); |
|
190
|
0
|
|
|
|
|
0
|
$parser->parse_chunk($utf8); |
|
191
|
|
|
|
|
|
|
} |
|
192
|
1
|
|
|
|
|
4
|
delete($parser->{content_buffer}); |
|
193
|
1
|
|
|
|
|
5
|
$parser->parse_chunk('', 1); |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub lwp_callback |
|
197
|
|
|
|
|
|
|
{ |
|
198
|
14
|
|
|
14
|
0
|
36
|
my( $self, $parser ) = @_; |
|
199
|
|
|
|
|
|
|
|
|
200
|
11
|
|
|
11
|
|
86
|
use bytes; # fixing utf-8 will need byte semantics |
|
|
11
|
|
|
|
|
44
|
|
|
|
11
|
|
|
|
|
64
|
|
|
201
|
|
|
|
|
|
|
|
|
202
|
14
|
|
|
|
|
87
|
$parser->{content_buffer} .= $_[2]; |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
do |
|
205
|
|
|
|
|
|
|
{ |
|
206
|
|
|
|
|
|
|
# FB_QUIET won't split multi-byte chars on input |
|
207
|
14
|
|
|
|
|
87
|
my $utf8 = Encode::decode('UTF-8', $parser->{content_buffer}, Encode::FB_QUIET); |
|
208
|
|
|
|
|
|
|
|
|
209
|
14
|
50
|
|
|
|
744
|
if( length($utf8) > 0 ) |
|
210
|
|
|
|
|
|
|
{ |
|
211
|
11
|
|
|
11
|
|
7502
|
use utf8; |
|
|
11
|
|
|
|
|
159
|
|
|
|
11
|
|
|
|
|
57
|
|
|
212
|
14
|
|
|
|
|
41
|
_ccchars($utf8); # Fix control chars |
|
213
|
14
|
|
|
|
|
37
|
$parser->{content_length} += length($utf8); |
|
214
|
14
|
|
|
|
|
50
|
$parser->parse_chunk($utf8); |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
|
|
217
|
6
|
50
|
|
|
|
80
|
if( length($parser->{content_buffer}) > MAX_UTF8_BYTES ) |
|
218
|
|
|
|
|
|
|
{ |
|
219
|
0
|
|
|
|
|
0
|
$parser->{content_buffer} =~ s/^([\x80-\xff]{1,4})//s; |
|
220
|
0
|
|
|
|
|
0
|
my $badbytes = $1; |
|
221
|
0
|
0
|
|
|
|
0
|
if( length($badbytes) == 0 ) |
|
222
|
|
|
|
|
|
|
{ |
|
223
|
0
|
|
|
|
|
0
|
Carp::confess "Internal error - bad bytes but not in 0x80-0xff range???"; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
0
|
0
|
|
|
|
0
|
if( $IGNORE_BAD_CHARS ) |
|
226
|
|
|
|
|
|
|
{ |
|
227
|
|
|
|
|
|
|
$badbytes = join('', map { |
|
228
|
0
|
|
|
|
|
0
|
$self->lwp_badchar($parser, $_) |
|
|
0
|
|
|
|
|
0
|
|
|
229
|
|
|
|
|
|
|
} split //, $badbytes); |
|
230
|
|
|
|
|
|
|
} |
|
231
|
0
|
|
|
|
|
0
|
$parser->parse_chunk( $badbytes ); |
|
232
|
|
|
|
|
|
|
} |
|
233
|
14
|
|
|
|
|
25
|
} while( length($parser->{content_buffer}) > MAX_UTF8_BYTES ); |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
sub _ccchars { |
|
237
|
14
|
|
|
14
|
|
802
|
$_[0] =~ s/([\x00-\x08\x0b-\x0c\x0e-\x1f])/sprintf("\\%04d",ord($1))/seg; |
|
|
0
|
|
|
|
|
0
|
|
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
sub _buildurl { |
|
241
|
10
|
|
|
10
|
|
29
|
my( $self, %args ) = @_; |
|
242
|
|
|
|
|
|
|
|
|
243
|
10
|
50
|
|
|
|
25
|
Carp::confess "Requires verb parameter" unless $args{'verb'}; |
|
244
|
|
|
|
|
|
|
|
|
245
|
10
|
|
|
|
|
36
|
my $uri = URI->new( $self->baseURL ); |
|
246
|
10
|
100
|
|
|
|
678
|
return $uri->as_string if $uri->scheme eq "file"; |
|
247
|
|
|
|
|
|
|
|
|
248
|
1
|
50
|
33
|
|
|
31
|
if( defined($args{resumptionToken}) && !$args{force} ) { |
|
249
|
0
|
|
|
|
|
0
|
$uri->query_form(verb=>$args{'verb'},resumptionToken=>$args{'resumptionToken'}); |
|
250
|
|
|
|
|
|
|
} else { |
|
251
|
1
|
|
|
|
|
3
|
delete $args{force}; |
|
252
|
|
|
|
|
|
|
# http://www.cshc.ubc.ca/oai/ breaks if verb isn't first, doh |
|
253
|
1
|
|
|
|
|
6
|
$uri->query_form(verb=>delete($args{'verb'}),%args); |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
|
|
256
|
1
|
|
|
|
|
98
|
return $uri->as_string; |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
sub decompress { |
|
260
|
0
|
|
|
0
|
0
|
|
my ($response) = @_; |
|
261
|
0
|
|
|
|
|
|
my $type = $response->headers->header("Content-Encoding"); |
|
262
|
0
|
0
|
|
|
|
|
return $response->{_content_filename} unless defined($type); |
|
263
|
0
|
0
|
|
|
|
|
if( $type eq 'gzip' ) { |
|
264
|
0
|
|
|
|
|
|
my $filename = File::Temp->new( UNLINK => 1 ); |
|
265
|
0
|
0
|
|
|
|
|
my $gz = Compress::Zlib::gzopen($response->{_content_filename}, "r") or die $!; |
|
266
|
0
|
|
|
|
|
|
my ($buffer,$c); |
|
267
|
0
|
|
|
|
|
|
my $fh = IO::File->new($filename,"w"); |
|
268
|
0
|
|
|
|
|
|
binmode($fh,":utf8"); |
|
269
|
0
|
|
|
|
|
|
while( ($c = $gz->gzread($buffer)) > 0 ) { |
|
270
|
0
|
|
|
|
|
|
print $fh $buffer; |
|
271
|
|
|
|
|
|
|
} |
|
272
|
0
|
|
|
|
|
|
$fh->close(); |
|
273
|
0
|
|
|
|
|
|
$gz->gzclose(); |
|
274
|
0
|
0
|
|
|
|
|
die "Error decompressing gziped response: " . $gz->gzerror() if -1 == $c; |
|
275
|
0
|
|
|
|
|
|
return $response->{_content_filename} = $filename; |
|
276
|
|
|
|
|
|
|
} else { |
|
277
|
0
|
|
|
|
|
|
die "Unsupported compression returned: $type\n"; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
1; |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
__END__ |