line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Catmandu::Importer::Pure; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
221295
|
use Catmandu::Sane; |
|
2
|
|
|
|
|
404771
|
|
|
2
|
|
|
|
|
16
|
|
4
|
2
|
|
|
2
|
|
648
|
use Catmandu::Util qw(:is); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
538
|
|
5
|
2
|
|
|
2
|
|
1071
|
use URI::Escape; |
|
2
|
|
|
|
|
3121
|
|
|
2
|
|
|
|
|
119
|
|
6
|
2
|
|
|
2
|
|
991
|
use MIME::Base64; |
|
2
|
|
|
|
|
1447
|
|
|
2
|
|
|
|
|
114
|
|
7
|
2
|
|
|
2
|
|
954
|
use Furl; |
|
2
|
|
|
|
|
49616
|
|
|
2
|
|
|
|
|
64
|
|
8
|
2
|
|
|
2
|
|
15
|
use Moo; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
16
|
|
9
|
2
|
|
|
2
|
|
964
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
125
|
|
10
|
2
|
|
|
2
|
|
1392
|
use XML::LibXML; |
|
2
|
|
|
|
|
93779
|
|
|
2
|
|
|
|
|
16
|
|
11
|
2
|
|
|
2
|
|
324
|
use XML::LibXML::XPathContext; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
49
|
|
12
|
2
|
|
|
2
|
|
1052
|
use Data::Validate::URI qw(is_web_uri); |
|
2
|
|
|
|
|
86001
|
|
|
2
|
|
|
|
|
151
|
|
13
|
2
|
|
|
2
|
|
21
|
use Scalar::Util qw(blessed); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
6084
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
with 'Catmandu::Importer'; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
has base => ( is => 'ro' ); |
20
|
|
|
|
|
|
|
has endpoint => ( is => 'ro' ); |
21
|
|
|
|
|
|
|
has path => ( is => 'ro' ); |
22
|
|
|
|
|
|
|
has apiKey => ( is => 'ro' ); |
23
|
|
|
|
|
|
|
has user => ( is => 'ro' ); |
24
|
|
|
|
|
|
|
has password => ( is => 'ro' ); |
25
|
|
|
|
|
|
|
has post_xml => ( is => 'ro' ); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
has handler => |
28
|
|
|
|
|
|
|
( is => 'ro', default => sub { 'simple' }, coerce => \&_coerce_handler ); |
29
|
|
|
|
|
|
|
has options => |
30
|
|
|
|
|
|
|
( is => 'ro', default => sub { +{} }, coerce => \&_coerce_options ); |
31
|
|
|
|
|
|
|
has fullResponse => ( is => 'ro', default => sub { 0 } ); |
32
|
|
|
|
|
|
|
has trim_text => ( is => 'ro', default => sub { 0 } ); |
33
|
|
|
|
|
|
|
has filter => ( is => 'ro' ); |
34
|
|
|
|
|
|
|
has userAgent => ( is => 'ro', default => sub { 'Mozilla/5.0' } ); |
35
|
|
|
|
|
|
|
has timeout => ( is => 'ro', default => sub { 50 } ); |
36
|
|
|
|
|
|
|
has furl => ( |
37
|
|
|
|
|
|
|
is => 'ro', |
38
|
|
|
|
|
|
|
isa => sub { |
39
|
|
|
|
|
|
|
Catmandu::BadVal->throw("Invalid furl, should be compatible with Furl") |
40
|
|
|
|
|
|
|
unless is_maybe_able( $_[0], 'get' ); |
41
|
|
|
|
|
|
|
}, |
42
|
|
|
|
|
|
|
lazy => 1, |
43
|
|
|
|
|
|
|
builder => \&_build_furl |
44
|
|
|
|
|
|
|
); |
45
|
|
|
|
|
|
|
has max_retries => ( is => 'ro', default => sub { 0 } ); |
46
|
|
|
|
|
|
|
has _currentRecordSet => ( is => 'ro' ); |
47
|
|
|
|
|
|
|
has _n => ( is => 'ro', default => sub { 0 } ); |
48
|
|
|
|
|
|
|
has _start => ( is => 'ro', default => sub { 0 } ); |
49
|
|
|
|
|
|
|
has _rs_size => ( is => 'ro', default => sub { 0 } ); |
50
|
|
|
|
|
|
|
has _total_size => ( is => 'ro', default => sub { 0 } ); |
51
|
|
|
|
|
|
|
has _next_url => ( is => 'ro'); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub BUILD { |
55
|
24
|
|
|
24
|
0
|
122
|
my $self = shift; |
56
|
|
|
|
|
|
|
|
57
|
24
|
100
|
100
|
|
|
211
|
Catmandu::BadVal->throw("Base URL, endpoint and apiKey are required") |
|
|
|
100
|
|
|
|
|
58
|
|
|
|
|
|
|
unless $self->base && $self->endpoint && $self->apiKey; |
59
|
|
|
|
|
|
|
|
60
|
21
|
100
|
100
|
|
|
79
|
Catmandu::BadVal->throw( "Password is needed for user " . $self->user ) |
61
|
|
|
|
|
|
|
if $self->user && !$self->password; |
62
|
|
|
|
|
|
|
|
63
|
20
|
100
|
100
|
|
|
71
|
Catmandu::BadVal->throw("Invalid filter, filter should be a CODE ref") |
64
|
|
|
|
|
|
|
if $self->filter && !is_code_ref( $self->filter ); |
65
|
|
|
|
|
|
|
|
66
|
19
|
100
|
|
|
|
63
|
Catmandu::BadVal->throw( |
67
|
|
|
|
|
|
|
"Invalid value for timeout, should be non negative integer") |
68
|
|
|
|
|
|
|
if !is_natural( $self->timeout ); |
69
|
|
|
|
|
|
|
|
70
|
18
|
50
|
|
|
|
182
|
Catmandu::BadVal->throw( |
71
|
|
|
|
|
|
|
"Invalid value for max_retries, should be non negative integer") |
72
|
|
|
|
|
|
|
if !is_natural( $self->max_retries ); |
73
|
|
|
|
|
|
|
|
74
|
18
|
|
|
|
|
119
|
my $url = $self->base; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# remove first any username password: |
77
|
18
|
|
|
|
|
79
|
$url =~ s|^(\w+://)[^\@/]+[:][^\@/]*\@|$1|; |
78
|
18
|
100
|
|
|
|
419
|
if ( !is_web_uri($url) ) { |
79
|
2
|
|
|
|
|
497
|
Catmandu::BadVal->throw( "Invalid base URL: " . $self->base ); |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
16
|
|
|
|
|
7509
|
my $options = $self->options; |
83
|
|
|
|
|
|
|
|
84
|
16
|
50
|
33
|
|
|
84
|
if ( !$self->fullResponse && $self->post_xml ) { |
85
|
0
|
0
|
0
|
|
|
0
|
if ( $options->{offset} || $options->{page} || |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
86
|
|
|
|
|
|
|
(defined $options->{size} && $options->{size}==0) ) { |
87
|
0
|
|
|
|
|
0
|
$self->{fullResponse} = 1; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
} |
90
|
16
|
100
|
66
|
|
|
168
|
if ( !$self->fullResponse && $options->{offset} ) { |
91
|
1
|
|
|
|
|
14
|
$self->{_start} = $options->{offset}; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub _build_furl { |
96
|
0
|
|
|
0
|
|
0
|
my ( $user, $password, $apiKey ) = ( $_[0]->user, $_[0]->password, $_[0]->apiKey ); |
97
|
0
|
|
|
|
|
0
|
my @headers; |
98
|
|
|
|
|
|
|
|
99
|
0
|
0
|
|
|
|
0
|
push @headers, |
100
|
|
|
|
|
|
|
( 'Authorization' => ( 'Basic ' . encode_base64("$user:$password") ) ) |
101
|
|
|
|
|
|
|
if $user; |
102
|
0
|
0
|
|
|
|
0
|
push @headers, ( 'api-key' => $apiKey ) |
103
|
|
|
|
|
|
|
if $apiKey; |
104
|
0
|
|
|
|
|
0
|
Furl->new( |
105
|
|
|
|
|
|
|
agent => $_[0]->userAgent, |
106
|
|
|
|
|
|
|
timeout => $_[0]->timeout, |
107
|
|
|
|
|
|
|
headers => \@headers |
108
|
|
|
|
|
|
|
); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub _coerce_handler { |
112
|
27
|
|
|
27
|
|
152
|
my ($handler) = @_; |
113
|
|
|
|
|
|
|
|
114
|
27
|
100
|
100
|
|
|
224
|
return $handler if is_invocant($handler) or is_code_ref($handler); |
115
|
|
|
|
|
|
|
|
116
|
25
|
100
|
100
|
|
|
137
|
if ( is_string($handler) && !is_number($handler) ) { |
117
|
23
|
100
|
|
|
|
88
|
my $class = |
118
|
|
|
|
|
|
|
$handler =~ /^\+(.+)/ |
119
|
|
|
|
|
|
|
? $1 |
120
|
|
|
|
|
|
|
: "Catmandu::Importer::Pure::Parser::$handler"; |
121
|
|
|
|
|
|
|
|
122
|
23
|
|
|
|
|
35
|
my $handler; |
123
|
23
|
|
|
|
|
37
|
eval { $handler = Catmandu::Util::require_package($class)->new; }; |
|
23
|
|
|
|
|
74
|
|
124
|
23
|
100
|
|
|
|
6034
|
if ($@) { |
125
|
1
|
|
|
|
|
874
|
Catmandu::Error->throw("Unable to load handler $class: $@"); |
126
|
|
|
|
|
|
|
} else { |
127
|
22
|
|
|
|
|
397
|
return $handler; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
2
|
|
50
|
|
|
6
|
$handler ||= ''; |
132
|
2
|
|
|
|
|
16
|
Catmandu::BadVal->throw("Invalid handler: '$handler'"); |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub _coerce_options { |
136
|
24
|
|
|
24
|
|
69
|
my ($options) = @_; |
137
|
|
|
|
|
|
|
|
138
|
24
|
100
|
|
|
|
352
|
return $options if !%$options; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
return { # arrays to comman separated values |
141
|
3
|
50
|
|
|
|
13
|
map { $_ => (ref $options->{$_} eq 'ARRAY' ? join (',', @{$options->{$_}}) : $options->{$_})} |
|
3
|
|
|
|
|
61
|
|
|
0
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
keys %$options |
143
|
|
|
|
|
|
|
}; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub _request { |
147
|
0
|
|
|
0
|
|
|
my ( $self, $url, $rcontent ) = @_; |
148
|
|
|
|
|
|
|
|
149
|
0
|
|
|
|
|
|
$self->log->debug("requesting $url\n"); |
150
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
|
my $res; |
152
|
0
|
|
|
|
|
|
my $tries = $self->max_retries; |
153
|
|
|
|
|
|
|
try { |
154
|
0
|
|
0
|
0
|
|
|
do { |
|
|
|
0
|
|
|
|
|
155
|
|
|
|
|
|
|
|
156
|
0
|
0
|
|
|
|
|
$res = $rcontent |
157
|
|
|
|
|
|
|
? $self->furl->post($url, ['Content-Type' => 'application/xml'], $$rcontent) |
158
|
|
|
|
|
|
|
: $self->furl->get($url); |
159
|
|
|
|
|
|
|
} while ( $res->status >= 500 && $tries-- && sleep(10) ) |
160
|
|
|
|
|
|
|
; # Retry on server error; |
161
|
0
|
0
|
0
|
|
|
|
die( $res->status_line ) |
|
|
|
0
|
|
|
|
|
162
|
|
|
|
|
|
|
unless $res->is_success |
163
|
|
|
|
|
|
|
|| ( $res->content && $res->content =~ m|<\?xml| ); |
164
|
0
|
|
|
|
|
|
return $res->content; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
catch { |
167
|
0
|
|
|
0
|
|
|
Catmandu::Error->throw( |
168
|
|
|
|
|
|
|
"Requested '$url'\nStatus code: " . $res->status_line ); |
169
|
0
|
|
|
|
|
|
}; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub _url { |
173
|
0
|
|
|
0
|
|
|
my ( $self, $options ) = @_; |
174
|
|
|
|
|
|
|
|
175
|
0
|
0
|
|
|
|
|
my $url = $self->base . '/' . $self->endpoint |
176
|
|
|
|
|
|
|
. ($self->path ? '/' . $self->path : ''); |
177
|
|
|
|
|
|
|
|
178
|
0
|
0
|
0
|
|
|
|
if ($options && %$options) { |
179
|
|
|
|
|
|
|
$url .= '?' . join '&', |
180
|
0
|
|
|
|
|
|
map { "$_=" . uri_escape( $options->{$_}, "^A-Za-z0-9\-\._~," ) } |
181
|
0
|
|
|
|
|
|
sort keys %{$options}; |
|
0
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
} |
183
|
0
|
|
|
|
|
|
return $url; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub _nextRecordSet { |
187
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
188
|
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
|
my %options = %{ $self->options }; |
|
0
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
191
|
0
|
0
|
0
|
|
|
|
if (!$self->fullResponse && $self->post_xml) { |
192
|
0
|
|
|
|
|
|
$options{offset} = $self->_start; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
0
|
|
0
|
|
|
|
my $url = $self->_next_url || $self->_url( \%options ); |
196
|
|
|
|
|
|
|
|
197
|
0
|
0
|
|
|
|
|
my $xml = $self->_request( $url, ($self->post_xml ? \$self->post_xml : undef) ); |
198
|
|
|
|
|
|
|
|
199
|
0
|
0
|
|
|
|
|
if ( $self->filter ) { |
200
|
0
|
|
|
|
|
|
&{ $self->filter }( \$xml ); |
|
0
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
0
|
|
|
|
|
|
my $hash = $self->_hashify($xml); |
204
|
|
|
|
|
|
|
|
205
|
0
|
0
|
|
|
|
|
if ( exists $hash->{'error'} ) { |
206
|
|
|
|
|
|
|
Catmandu::Error->throw( |
207
|
|
|
|
|
|
|
"Requested '$url'\nPure REST Error ($hash->{error}{code}): " |
208
|
|
|
|
|
|
|
. $hash->{error}{title} |
209
|
|
|
|
|
|
|
. ( |
210
|
|
|
|
|
|
|
$hash->{'error'}{'description'} |
211
|
|
|
|
|
|
|
? "\nDescription:\n" . $hash->{error}{description} |
212
|
0
|
0
|
|
|
|
|
: '' |
213
|
|
|
|
|
|
|
) |
214
|
|
|
|
|
|
|
); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
0
|
0
|
|
|
|
|
if ( $self->fullResponse ) { |
218
|
0
|
|
|
|
|
|
$self->{_rs_size} = 1; |
219
|
0
|
|
|
|
|
|
$self->{_total_size} = 1; |
220
|
0
|
|
|
|
|
|
return $hash->{results}; #check |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
$self->{_next_url} = $hash->{next_url}; #only GET requests |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# get total number of results |
226
|
0
|
|
|
|
|
|
$self->{_total_size} = $hash->{count}; |
227
|
|
|
|
|
|
|
|
228
|
0
|
|
|
|
|
|
my $set = $hash->{results}; |
229
|
|
|
|
|
|
|
|
230
|
0
|
|
|
|
|
|
$self->{_rs_size} = scalar(@$set); |
231
|
|
|
|
|
|
|
|
232
|
0
|
|
|
|
|
|
return $set; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# Internal: gets the next record from our current resultset. |
236
|
|
|
|
|
|
|
# Returns a hash representation of the next record. |
237
|
|
|
|
|
|
|
sub _nextRecord { |
238
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# fetch recordset if we don't have one yet. |
241
|
0
|
|
0
|
|
|
|
$self->{_currentRecordSet} ||= $self->_nextRecordSet || return; |
|
|
|
0
|
|
|
|
|
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
return |
244
|
0
|
0
|
0
|
|
|
|
if (!$self->_next_url ) && $self->_total_size |
|
|
|
0
|
|
|
|
|
245
|
|
|
|
|
|
|
&& ( $self->_start + $self->_n ) >= |
246
|
|
|
|
|
|
|
$self->_total_size; # no more results |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# check for a exhausted recordset. |
249
|
0
|
0
|
|
|
|
|
if ( $self->_n >= $self->_rs_size ) { |
250
|
0
|
|
|
|
|
|
$self->{_start} += $self->_rs_size; |
251
|
0
|
|
|
|
|
|
$self->{_n} = 0; |
252
|
0
|
|
|
|
|
|
$self->{_currentRecordSet} = $self->_nextRecordSet; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
0
|
|
|
|
|
|
my $record_dom = $self->_currentRecordSet->[ $self->{_n}++ ]; |
256
|
|
|
|
|
|
|
|
257
|
0
|
|
|
|
|
|
return $self->_handle_record($record_dom); |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# Internal: Converts XML to a perl hash. |
262
|
|
|
|
|
|
|
# $in - the raw XML input. |
263
|
|
|
|
|
|
|
# Returns a hash representation of the given XML. |
264
|
|
|
|
|
|
|
sub _hashify { |
265
|
0
|
|
|
0
|
|
|
my ( $self, $in ) = @_; |
266
|
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
|
my $parser = XML::LibXML->new(); |
268
|
0
|
|
|
|
|
|
my $doc = $parser->load_xml( string => $in ); |
269
|
0
|
|
|
|
|
|
my $root = $doc->documentElement; |
270
|
0
|
|
|
|
|
|
my $xc = XML::LibXML::XPathContext->new($root); |
271
|
|
|
|
|
|
|
|
272
|
0
|
0
|
|
|
|
|
if ( $self->trim_text ) { |
273
|
0
|
|
|
|
|
|
my $all_text_nodes = $doc->findnodes('//text()'); |
274
|
|
|
|
|
|
|
$all_text_nodes->foreach( |
275
|
|
|
|
|
|
|
sub { |
276
|
0
|
|
|
0
|
|
|
my $node = shift; |
277
|
0
|
|
|
|
|
|
my $t = $node->data; |
278
|
0
|
|
0
|
|
|
|
my $subs_done = |
|
|
|
0
|
|
|
|
|
279
|
|
|
|
|
|
|
( $t =~ s/\A\s+// || 0 ) + ( $t =~ s/\s+\Z// || 0 ); |
280
|
0
|
0
|
|
|
|
|
$node->setData($t) if $subs_done; |
281
|
|
|
|
|
|
|
} |
282
|
0
|
|
|
|
|
|
); |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
my $out; |
286
|
|
|
|
|
|
|
|
287
|
0
|
0
|
|
|
|
|
if ( $xc->exists('/error') ) { |
288
|
0
|
|
|
|
|
|
my $code = $xc->findvalue('/error/code'); |
289
|
0
|
|
|
|
|
|
my $title = $xc->findvalue('/error/title'); |
290
|
0
|
|
|
|
|
|
my $description = $xc->findvalue('/error/description'); |
291
|
|
|
|
|
|
|
|
292
|
0
|
|
|
|
|
|
$out->{error} = { code => $code, title => $title, description => $description }; |
293
|
0
|
|
|
|
|
|
return $out; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
0
|
|
|
|
|
|
my $next_url = $xc->findvalue('/result/navigationLink[@ref="next"]/@href|/result/navigationLinks/navigationLink[@ref="next"]/@href'); |
297
|
0
|
0
|
|
|
|
|
$next_url =~ s/&/&/g if $next_url; |
298
|
0
|
0
|
|
|
|
|
$out->{next_url} = $next_url if $next_url; |
299
|
|
|
|
|
|
|
|
300
|
0
|
|
|
|
|
|
$out->{count} = $xc->findvalue("/result/count"); |
301
|
|
|
|
|
|
|
|
302
|
0
|
|
|
|
|
|
my @result_nodes; |
303
|
|
|
|
|
|
|
|
304
|
0
|
0
|
|
|
|
|
if ( $xc->exists('/result/items') ) { |
|
|
0
|
|
|
|
|
|
305
|
0
|
|
|
|
|
|
@result_nodes = $xc->findnodes('/result/items/*'); |
306
|
|
|
|
|
|
|
} elsif ($self->endpoint eq 'changes') { |
307
|
0
|
|
|
|
|
|
@result_nodes = $xc->findnodes('/result/contentChange'); |
308
|
|
|
|
|
|
|
} else { |
309
|
0
|
|
|
|
|
|
@result_nodes = $xc->findnodes('/result/*[@uuid]'); |
310
|
|
|
|
|
|
|
}; |
311
|
|
|
|
|
|
|
|
312
|
0
|
0
|
|
|
|
|
if ( $self->fullResponse ) { |
313
|
0
|
|
|
|
|
|
$out->{results} = [$root]; |
314
|
0
|
|
|
|
|
|
return $out; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
0
|
|
|
|
|
|
$out->{results} = \@result_nodes; |
318
|
|
|
|
|
|
|
|
319
|
0
|
|
|
|
|
|
return $out; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub _handle_record { |
323
|
0
|
|
|
0
|
|
|
my ( $self, $dom ) = @_; |
324
|
0
|
0
|
|
|
|
|
return unless $dom; |
325
|
|
|
|
|
|
|
|
326
|
0
|
0
|
|
|
|
|
return blessed( $self->handler ) |
327
|
|
|
|
|
|
|
? $self->handler->parse($dom) |
328
|
|
|
|
|
|
|
: $self->handler->($dom); |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# Public Methods. -------------------------------------------------------------- |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub url { |
335
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
336
|
0
|
|
|
|
|
|
return $self->_url( $self->options ) |
337
|
|
|
|
|
|
|
; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
sub generator { |
341
|
|
|
|
|
|
|
my ($self) = @_; |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
return sub { |
344
|
|
|
|
|
|
|
$self->_nextRecord; |
345
|
|
|
|
|
|
|
}; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
1; |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=head1 NAME |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Catmandu::Importer::Pure - Package that imports Pure data. |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head1 SYNOPSIS |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# From the command line |
357
|
|
|
|
|
|
|
$ catmandu convert Pure \ |
358
|
|
|
|
|
|
|
--base https://host/ws/api/... \ |
359
|
|
|
|
|
|
|
--endpoint research-outputs \ |
360
|
|
|
|
|
|
|
--apiKey "..." |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# In Perl |
363
|
|
|
|
|
|
|
use Catmandu; |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
my %attrs = ( |
366
|
|
|
|
|
|
|
base => 'http://host/path', |
367
|
|
|
|
|
|
|
endpoint => 'research-outputs', |
368
|
|
|
|
|
|
|
apiKey => '...', |
369
|
|
|
|
|
|
|
); |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
TODO: add options |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
my $importer = Catmandu->importer('Pure', %attrs); |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
my $n = $importer->each(sub { |
377
|
|
|
|
|
|
|
my $hashref = $_[0]; |
378
|
|
|
|
|
|
|
# ... |
379
|
|
|
|
|
|
|
}); |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
# get number of valid and approved publications |
382
|
|
|
|
|
|
|
my $count = Catmandu->importer( |
383
|
|
|
|
|
|
|
'Pure', |
384
|
|
|
|
|
|
|
base => base, |
385
|
|
|
|
|
|
|
endpoint => 'research-outputs', |
386
|
|
|
|
|
|
|
apiKey => '...', |
387
|
|
|
|
|
|
|
fullResponse => 1, |
388
|
|
|
|
|
|
|
post_xml => '<?xml version="1.0" encoding="utf-8"?>' |
389
|
|
|
|
|
|
|
. '<researchOutputsQuery>', |
390
|
|
|
|
|
|
|
. '<workflowSteps>approved</workflowSteps>', |
391
|
|
|
|
|
|
|
. '</researchOutputsQuery>', |
392
|
|
|
|
|
|
|
)->first->{result}{count}; |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=head1 DESCRIPTION |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
Catmandu::Importer::Pure is a Catmandu package that seamlessly imports data from Elsevier's Pure system using its REST service. |
397
|
|
|
|
|
|
|
In order to use the Pure Web Service you need an API key. List of all available endpoints and further documentation can currently |
398
|
|
|
|
|
|
|
be found under /ws on a webserver that is running Pure. Note that this version of the importer is tested with Pure API version |
399
|
|
|
|
|
|
|
5.18 and might not work with later versions. |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=head1 CONFIGURATION |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=over |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=item base |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
Base URL for the REST service is required, for example 'http://purehost.com/ws/api/518' |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=item endpoint |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Valid endpoint is required, like 'research-outputs' |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=item apiKey |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
Valid API key is required for access |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item path |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Path after the endpoint |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item user |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
User name if basic authentication is used |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=item password |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Password if basic authentication is used |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=item options |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Options passed as parameters to the REST service, for example: |
432
|
|
|
|
|
|
|
{ |
433
|
|
|
|
|
|
|
'size' => 20, |
434
|
|
|
|
|
|
|
'fields' => 'title,type,authors.*' |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=item post_xml |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
xml containing a query that will be submitted with a POST request |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=item fullResponse |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
Optional flag. If true delivers the complete results as a single item (record), corresponding to the |
445
|
|
|
|
|
|
|
XML response received. Only one request to the REST service is made in this case. Default is false. |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
If the flag is false then the items are set to child |
448
|
|
|
|
|
|
|
elements of the element 'result' or in case the 'result' element does not exist they are set to child elements |
449
|
|
|
|
|
|
|
of the root element for each response. |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=item handler( sub {} | $object | 'NAME' | '+NAME' ) |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
Handler to transform each record from XML DOM (L<XML::LibXML::Element>) into |
454
|
|
|
|
|
|
|
Perl hash. |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
Handlers can be provided as function reference, an instance of a Perl |
457
|
|
|
|
|
|
|
package that implements 'parse', or by a package NAME. Package names should |
458
|
|
|
|
|
|
|
be prepended by C<+> or prefixed with C<Catmandu::Importer::Pure::Parser>. E.g |
459
|
|
|
|
|
|
|
C<foobar> will create a C<Catmandu::Importer::Pure::Parser::foobar> instance. |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
By default the handler L<Catmandu::Importer::Pure::Parser::simple> is used. |
462
|
|
|
|
|
|
|
It provides a simple XML parsing, using XML::LibXML::Simple, |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
Other possible values are L<Catmandu::Importer::Pure::Parser::struct> for XML::Struct |
465
|
|
|
|
|
|
|
based structure that preserves order and L<Catmandu::Importer::Pure::Parser::raw> that |
466
|
|
|
|
|
|
|
returns the XML as it is. |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=item userAgent |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
HTTP user agent string, set to C<Mozilla/5.0> by default. |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=item furl |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
Instance of L<Furl> or compatible class to fetch URLs with. |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=item timeout |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
Timeout for HTTP requests in seonds. Defaults to 50. |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=item trim_text |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
Optional flag. If true then all text nodes in the REST response are trimmed so that any leading and trailing whitespace is removed before parsing. |
483
|
|
|
|
|
|
|
This is useful if you don't want to risk getting leading and trailing whitespace in your data, since Pure doesn't currently clean leading/trailing white space from |
484
|
|
|
|
|
|
|
user input. Note that there is a small performance penalty when using this option. Default is false. |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
=item filter( sub {} ) |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
Optional reference to function that processes the XML response before it is parsed. The argument to the function is a reference to the XML text, |
489
|
|
|
|
|
|
|
which is then used to modify it. This is option is normally not needed but can helpful if there is a problem parsing the response due to a bug |
490
|
|
|
|
|
|
|
in the REST service. |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=back |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=head1 METHODS |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
In addition to methods inherited from Catmandu::Iterable, this module provides the following public methods: |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=over |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=item B<url > |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Return the current Pure REST request URL (useful for debugging). |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=back |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=head1 SEE ALSO |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
L<Catmandu::Importer> |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
L<Catmandu::Iterable> |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
Furl |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
http://librecat.org |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=head1 AUTHOR |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
Snorri Briem E<lt>briem@cpan.orgE<gt> |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=head1 COPYRIGHT |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
Copyright 2017- Lund University Library |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=head1 LICENSE |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=cut |