| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Catmandu::Importer::Pure; |
|
2
|
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
225699
|
use Catmandu::Sane; |
|
|
2
|
|
|
|
|
412196
|
|
|
|
2
|
|
|
|
|
16
|
|
|
4
|
2
|
|
|
2
|
|
739
|
use Catmandu::Util qw(:is); |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
596
|
|
|
5
|
2
|
|
|
2
|
|
1133
|
use URI::Escape; |
|
|
2
|
|
|
|
|
3261
|
|
|
|
2
|
|
|
|
|
127
|
|
|
6
|
2
|
|
|
2
|
|
1014
|
use MIME::Base64; |
|
|
2
|
|
|
|
|
1494
|
|
|
|
2
|
|
|
|
|
153
|
|
|
7
|
2
|
|
|
2
|
|
989
|
use Furl; |
|
|
2
|
|
|
|
|
51580
|
|
|
|
2
|
|
|
|
|
77
|
|
|
8
|
2
|
|
|
2
|
|
19
|
use Moo; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
17
|
|
|
9
|
2
|
|
|
2
|
|
1061
|
use Carp; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
137
|
|
|
10
|
2
|
|
|
2
|
|
1433
|
use XML::LibXML; |
|
|
2
|
|
|
|
|
96618
|
|
|
|
2
|
|
|
|
|
14
|
|
|
11
|
2
|
|
|
2
|
|
355
|
use XML::LibXML::XPathContext; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
56
|
|
|
12
|
2
|
|
|
2
|
|
1115
|
use Data::Validate::URI qw(is_web_uri); |
|
|
2
|
|
|
|
|
88082
|
|
|
|
2
|
|
|
|
|
153
|
|
|
13
|
2
|
|
|
2
|
|
19
|
use Scalar::Util qw(blessed); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
6145
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our $VERSION = '0.05'; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
with 'Catmandu::Importer'; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
has base => ( is => 'ro' ); |
|
20
|
|
|
|
|
|
|
has endpoint => ( is => 'ro' ); |
|
21
|
|
|
|
|
|
|
has path => ( is => 'ro' ); |
|
22
|
|
|
|
|
|
|
has apiKey => ( is => 'ro' ); |
|
23
|
|
|
|
|
|
|
has user => ( is => 'ro' ); |
|
24
|
|
|
|
|
|
|
has password => ( is => 'ro' ); |
|
25
|
|
|
|
|
|
|
has post_xml => ( is => 'ro' ); |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
has handler => |
|
28
|
|
|
|
|
|
|
( is => 'ro', default => sub { 'simple' }, coerce => \&_coerce_handler ); |
|
29
|
|
|
|
|
|
|
has options => |
|
30
|
|
|
|
|
|
|
( is => 'ro', default => sub { +{} }, coerce => \&_coerce_options ); |
|
31
|
|
|
|
|
|
|
has fullResponse => ( is => 'ro', default => sub { 0 } ); |
|
32
|
|
|
|
|
|
|
has trim_text => ( is => 'ro', default => sub { 0 } ); |
|
33
|
|
|
|
|
|
|
has filter => ( is => 'ro' ); |
|
34
|
|
|
|
|
|
|
has userAgent => ( is => 'ro', default => sub { 'Mozilla/5.0' } ); |
|
35
|
|
|
|
|
|
|
has timeout => ( is => 'ro', default => sub { 50 } ); |
|
36
|
|
|
|
|
|
|
has furl => ( |
|
37
|
|
|
|
|
|
|
is => 'ro', |
|
38
|
|
|
|
|
|
|
isa => sub { |
|
39
|
|
|
|
|
|
|
Catmandu::BadVal->throw("Invalid furl, should be compatible with Furl") |
|
40
|
|
|
|
|
|
|
unless is_maybe_able( $_[0], 'get' ); |
|
41
|
|
|
|
|
|
|
}, |
|
42
|
|
|
|
|
|
|
lazy => 1, |
|
43
|
|
|
|
|
|
|
builder => \&_build_furl |
|
44
|
|
|
|
|
|
|
); |
|
45
|
|
|
|
|
|
|
has max_retries => ( is => 'ro', default => sub { 0 } ); |
|
46
|
|
|
|
|
|
|
has _currentRecordSet => ( is => 'ro' ); |
|
47
|
|
|
|
|
|
|
has _n => ( is => 'ro', default => sub { 0 } ); |
|
48
|
|
|
|
|
|
|
has _start => ( is => 'ro', default => sub { 0 } ); |
|
49
|
|
|
|
|
|
|
has _rs_size => ( is => 'ro', default => sub { 0 } ); |
|
50
|
|
|
|
|
|
|
has _total_size => ( is => 'ro', default => sub { 0 } ); |
|
51
|
|
|
|
|
|
|
has _next_url => ( is => 'ro'); |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub BUILD { |
|
55
|
24
|
|
|
24
|
0
|
126
|
my $self = shift; |
|
56
|
|
|
|
|
|
|
|
|
57
|
24
|
100
|
100
|
|
|
232
|
Catmandu::BadVal->throw("Base URL, endpoint and apiKey are required") |
|
|
|
|
100
|
|
|
|
|
|
58
|
|
|
|
|
|
|
unless $self->base && $self->endpoint && $self->apiKey; |
|
59
|
|
|
|
|
|
|
|
|
60
|
21
|
100
|
100
|
|
|
101
|
Catmandu::BadVal->throw( "Password is needed for user " . $self->user ) |
|
61
|
|
|
|
|
|
|
if $self->user && !$self->password; |
|
62
|
|
|
|
|
|
|
|
|
63
|
20
|
100
|
100
|
|
|
80
|
Catmandu::BadVal->throw("Invalid filter, filter should be a CODE ref") |
|
64
|
|
|
|
|
|
|
if $self->filter && !is_code_ref( $self->filter ); |
|
65
|
|
|
|
|
|
|
|
|
66
|
19
|
100
|
|
|
|
68
|
Catmandu::BadVal->throw( |
|
67
|
|
|
|
|
|
|
"Invalid value for timeout, should be non negative integer") |
|
68
|
|
|
|
|
|
|
if !is_natural( $self->timeout ); |
|
69
|
|
|
|
|
|
|
|
|
70
|
18
|
50
|
|
|
|
237
|
Catmandu::BadVal->throw( |
|
71
|
|
|
|
|
|
|
"Invalid value for max_retries, should be non negative integer") |
|
72
|
|
|
|
|
|
|
if !is_natural( $self->max_retries ); |
|
73
|
|
|
|
|
|
|
|
|
74
|
18
|
|
|
|
|
122
|
my $url = $self->base; |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# remove first any username password: |
|
77
|
18
|
|
|
|
|
81
|
$url =~ s|^(\w+://)[^\@/]+[:][^\@/]*\@|$1|; |
|
78
|
18
|
100
|
|
|
|
421
|
if ( !is_web_uri($url) ) { |
|
79
|
2
|
|
|
|
|
545
|
Catmandu::BadVal->throw( "Invalid base URL: " . $self->base ); |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
16
|
|
|
|
|
7815
|
my $options = $self->options; |
|
83
|
|
|
|
|
|
|
|
|
84
|
16
|
50
|
33
|
|
|
86
|
if ( !$self->fullResponse && $self->post_xml ) { |
|
85
|
0
|
0
|
0
|
|
|
0
|
if ( $options->{offset} || $options->{page} || |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
86
|
|
|
|
|
|
|
(defined $options->{size} && $options->{size}==0) ) { |
|
87
|
0
|
|
|
|
|
0
|
$self->{fullResponse} = 1; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
} |
|
90
|
16
|
100
|
66
|
|
|
178
|
if ( !$self->fullResponse && $options->{offset} ) { |
|
91
|
1
|
|
|
|
|
15
|
$self->{_start} = $options->{offset}; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub _build_furl { |
|
96
|
0
|
|
|
0
|
|
0
|
my ( $user, $password, $apiKey ) = ( $_[0]->user, $_[0]->password, $_[0]->apiKey ); |
|
97
|
0
|
|
|
|
|
0
|
my @headers; |
|
98
|
|
|
|
|
|
|
|
|
99
|
0
|
0
|
|
|
|
0
|
push @headers, |
|
100
|
|
|
|
|
|
|
( 'Authorization' => ( 'Basic ' . encode_base64("$user:$password") ) ) |
|
101
|
|
|
|
|
|
|
if $user; |
|
102
|
0
|
0
|
|
|
|
0
|
push @headers, ( 'api-key' => $apiKey ) |
|
103
|
|
|
|
|
|
|
if $apiKey; |
|
104
|
0
|
|
|
|
|
0
|
Furl->new( |
|
105
|
|
|
|
|
|
|
agent => $_[0]->userAgent, |
|
106
|
|
|
|
|
|
|
timeout => $_[0]->timeout, |
|
107
|
|
|
|
|
|
|
headers => \@headers |
|
108
|
|
|
|
|
|
|
); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub _coerce_handler { |
|
112
|
27
|
|
|
27
|
|
147
|
my ($handler) = @_; |
|
113
|
|
|
|
|
|
|
|
|
114
|
27
|
100
|
100
|
|
|
249
|
return $handler if is_invocant($handler) or is_code_ref($handler); |
|
115
|
|
|
|
|
|
|
|
|
116
|
25
|
100
|
100
|
|
|
147
|
if ( is_string($handler) && !is_number($handler) ) { |
|
117
|
23
|
100
|
|
|
|
86
|
my $class = |
|
118
|
|
|
|
|
|
|
$handler =~ /^\+(.+)/ |
|
119
|
|
|
|
|
|
|
? $1 |
|
120
|
|
|
|
|
|
|
: "Catmandu::Importer::Pure::Parser::$handler"; |
|
121
|
|
|
|
|
|
|
|
|
122
|
23
|
|
|
|
|
35
|
my $handler; |
|
123
|
23
|
|
|
|
|
36
|
eval { $handler = Catmandu::Util::require_package($class)->new; }; |
|
|
23
|
|
|
|
|
76
|
|
|
124
|
23
|
100
|
|
|
|
6598
|
if ($@) { |
|
125
|
1
|
|
|
|
|
908
|
Catmandu::Error->throw("Unable to load handler $class: $@"); |
|
126
|
|
|
|
|
|
|
} else { |
|
127
|
22
|
|
|
|
|
396
|
return $handler; |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
2
|
|
50
|
|
|
10
|
$handler ||= ''; |
|
132
|
2
|
|
|
|
|
19
|
Catmandu::BadVal->throw("Invalid handler: '$handler'"); |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub _coerce_options { |
|
136
|
24
|
|
|
24
|
|
75
|
my ($options) = @_; |
|
137
|
|
|
|
|
|
|
|
|
138
|
24
|
100
|
|
|
|
357
|
return $options if !%$options; |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
return { # arrays to comman separated values |
|
141
|
3
|
50
|
|
|
|
14
|
map { $_ => (ref $options->{$_} eq 'ARRAY' ? join (',', @{$options->{$_}}) : $options->{$_})} |
|
|
3
|
|
|
|
|
67
|
|
|
|
0
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
keys %$options |
|
143
|
|
|
|
|
|
|
}; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub _request { |
|
147
|
0
|
|
|
0
|
|
|
my ( $self, $url, $rcontent ) = @_; |
|
148
|
|
|
|
|
|
|
|
|
149
|
0
|
|
|
|
|
|
$self->log->debug("requesting $url\n"); |
|
150
|
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
|
my $res; |
|
152
|
0
|
|
|
|
|
|
my $tries = $self->max_retries; |
|
153
|
|
|
|
|
|
|
try { |
|
154
|
0
|
|
0
|
0
|
|
|
do { |
|
|
|
|
0
|
|
|
|
|
|
155
|
|
|
|
|
|
|
|
|
156
|
0
|
0
|
|
|
|
|
$res = $rcontent |
|
157
|
|
|
|
|
|
|
? $self->furl->post($url, ['Content-Type' => 'application/xml'], $$rcontent) |
|
158
|
|
|
|
|
|
|
: $self->furl->get($url); |
|
159
|
|
|
|
|
|
|
} while ( $res->status >= 500 && $tries-- && sleep(10) ) |
|
160
|
|
|
|
|
|
|
; # Retry on server error; |
|
161
|
0
|
0
|
0
|
|
|
|
die( $res->status_line ) |
|
|
|
|
0
|
|
|
|
|
|
162
|
|
|
|
|
|
|
unless $res->is_success |
|
163
|
|
|
|
|
|
|
|| ( $res->content && $res->content =~ m|<\?xml| ); |
|
164
|
0
|
|
|
|
|
|
return $res->content; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
catch { |
|
167
|
0
|
|
|
0
|
|
|
Catmandu::Error->throw( |
|
168
|
|
|
|
|
|
|
"Requested '$url'\nStatus code: " . $res->status_line ); |
|
169
|
0
|
|
|
|
|
|
}; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub _url { |
|
173
|
0
|
|
|
0
|
|
|
my ( $self, $options ) = @_; |
|
174
|
|
|
|
|
|
|
|
|
175
|
0
|
0
|
|
|
|
|
my $url = $self->base . '/' . $self->endpoint |
|
176
|
|
|
|
|
|
|
. ($self->path ? '/' . $self->path : ''); |
|
177
|
|
|
|
|
|
|
|
|
178
|
0
|
0
|
0
|
|
|
|
if ($options && %$options) { |
|
179
|
|
|
|
|
|
|
$url .= '?' . join '&', |
|
180
|
0
|
|
|
|
|
|
map { "$_=" . uri_escape( $options->{$_}, "^A-Za-z0-9\-\._~," ) } |
|
181
|
0
|
|
|
|
|
|
sort keys %{$options}; |
|
|
0
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
} |
|
183
|
0
|
|
|
|
|
|
return $url; |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub _nextRecordSet { |
|
187
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
|
188
|
|
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
|
my %options = %{ $self->options }; |
|
|
0
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
|
191
|
0
|
0
|
0
|
|
|
|
if (!$self->fullResponse && $self->post_xml) { |
|
192
|
0
|
|
|
|
|
|
$options{offset} = $self->_start; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
|
|
195
|
0
|
|
0
|
|
|
|
my $url = $self->_next_url || $self->_url( \%options ); |
|
196
|
|
|
|
|
|
|
|
|
197
|
0
|
0
|
|
|
|
|
my $xml = $self->_request( $url, ($self->post_xml ? \$self->post_xml : undef) ); |
|
198
|
|
|
|
|
|
|
|
|
199
|
0
|
0
|
|
|
|
|
if ( $self->filter ) { |
|
200
|
0
|
|
|
|
|
|
&{ $self->filter }( \$xml ); |
|
|
0
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
0
|
|
|
|
|
|
my $hash = $self->_hashify($xml); |
|
204
|
|
|
|
|
|
|
|
|
205
|
0
|
0
|
|
|
|
|
if ( exists $hash->{'error'} ) { |
|
206
|
|
|
|
|
|
|
Catmandu::Error->throw( |
|
207
|
|
|
|
|
|
|
"Requested '$url'\nPure REST Error ($hash->{error}{code}): " |
|
208
|
|
|
|
|
|
|
. $hash->{error}{title} |
|
209
|
|
|
|
|
|
|
. ( |
|
210
|
|
|
|
|
|
|
$hash->{'error'}{'description'} |
|
211
|
|
|
|
|
|
|
? "\nDescription:\n" . $hash->{error}{description} |
|
212
|
0
|
0
|
|
|
|
|
: '' |
|
213
|
|
|
|
|
|
|
) |
|
214
|
|
|
|
|
|
|
); |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
|
|
217
|
0
|
0
|
|
|
|
|
if ( $self->fullResponse ) { |
|
218
|
0
|
|
|
|
|
|
$self->{_rs_size} = 1; |
|
219
|
0
|
|
|
|
|
|
$self->{_total_size} = 1; |
|
220
|
0
|
|
|
|
|
|
return $hash->{results}; #check |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
$self->{_next_url} = $hash->{next_url}; #only GET requests |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# get total number of results |
|
226
|
0
|
|
|
|
|
|
$self->{_total_size} = $hash->{count}; |
|
227
|
|
|
|
|
|
|
|
|
228
|
0
|
|
|
|
|
|
my $set = $hash->{results}; |
|
229
|
|
|
|
|
|
|
|
|
230
|
0
|
|
|
|
|
|
$self->{_rs_size} = scalar(@$set); |
|
231
|
|
|
|
|
|
|
|
|
232
|
0
|
|
|
|
|
|
return $set; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# Internal: gets the next record from our current resultset. |
|
236
|
|
|
|
|
|
|
# Returns a hash representation of the next record. |
|
237
|
|
|
|
|
|
|
sub _nextRecord { |
|
238
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# fetch recordset if we don't have one yet. |
|
241
|
0
|
|
0
|
|
|
|
$self->{_currentRecordSet} ||= $self->_nextRecordSet || return; |
|
|
|
|
0
|
|
|
|
|
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
return |
|
244
|
0
|
0
|
0
|
|
|
|
if (!$self->_next_url ) && $self->_total_size |
|
|
|
|
0
|
|
|
|
|
|
245
|
|
|
|
|
|
|
&& ( $self->_start + $self->_n ) >= |
|
246
|
|
|
|
|
|
|
$self->_total_size; # no more results |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# check for a exhausted recordset. |
|
249
|
0
|
0
|
|
|
|
|
if ( $self->_n >= $self->_rs_size ) { |
|
250
|
0
|
|
|
|
|
|
$self->{_start} += $self->_rs_size; |
|
251
|
0
|
|
|
|
|
|
$self->{_n} = 0; |
|
252
|
0
|
|
|
|
|
|
$self->{_currentRecordSet} = $self->_nextRecordSet; |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
|
|
255
|
0
|
|
|
|
|
|
my $record_dom = $self->_currentRecordSet->[ $self->{_n}++ ]; |
|
256
|
|
|
|
|
|
|
|
|
257
|
0
|
|
|
|
|
|
return $self->_handle_record($record_dom); |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# Internal: Converts XML to a perl hash. |
|
262
|
|
|
|
|
|
|
# $in - the raw XML input. |
|
263
|
|
|
|
|
|
|
# Returns a hash representation of the given XML. |
|
264
|
|
|
|
|
|
|
sub _hashify { |
|
265
|
0
|
|
|
0
|
|
|
my ( $self, $in ) = @_; |
|
266
|
|
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
|
my $parser = XML::LibXML->new(); |
|
268
|
0
|
|
|
|
|
|
my $doc = $parser->load_xml( string => $in ); |
|
269
|
0
|
|
|
|
|
|
my $root = $doc->documentElement; |
|
270
|
0
|
|
|
|
|
|
my $xc = XML::LibXML::XPathContext->new($root); |
|
271
|
|
|
|
|
|
|
|
|
272
|
0
|
0
|
|
|
|
|
if ( $self->trim_text ) { |
|
273
|
0
|
|
|
|
|
|
my $all_text_nodes = $doc->findnodes('//text()'); |
|
274
|
|
|
|
|
|
|
$all_text_nodes->foreach( |
|
275
|
|
|
|
|
|
|
sub { |
|
276
|
0
|
|
|
0
|
|
|
my $node = shift; |
|
277
|
0
|
|
|
|
|
|
my $t = $node->data; |
|
278
|
0
|
|
0
|
|
|
|
my $subs_done = |
|
|
|
|
0
|
|
|
|
|
|
279
|
|
|
|
|
|
|
( $t =~ s/\A\s+// || 0 ) + ( $t =~ s/\s+\Z// || 0 ); |
|
280
|
0
|
0
|
|
|
|
|
$node->setData($t) if $subs_done; |
|
281
|
|
|
|
|
|
|
} |
|
282
|
0
|
|
|
|
|
|
); |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
my $out; |
|
286
|
|
|
|
|
|
|
|
|
287
|
0
|
0
|
|
|
|
|
if ( $xc->exists('/error') ) { |
|
288
|
0
|
|
|
|
|
|
my $code = $xc->findvalue('/error/code'); |
|
289
|
0
|
|
|
|
|
|
my $title = $xc->findvalue('/error/title'); |
|
290
|
0
|
|
|
|
|
|
my $description = $xc->findvalue('/error/description'); |
|
291
|
|
|
|
|
|
|
|
|
292
|
0
|
|
|
|
|
|
$out->{error} = { code => $code, title => $title, description => $description }; |
|
293
|
0
|
|
|
|
|
|
return $out; |
|
294
|
|
|
|
|
|
|
} |
|
295
|
|
|
|
|
|
|
|
|
296
|
0
|
|
|
|
|
|
my $next_url = $xc->findvalue('/result/navigationLink[@ref="next"]/@href|/result/navigationLinks/navigationLink[@ref="next"]/@href'); |
|
297
|
0
|
0
|
|
|
|
|
$next_url =~ s/&/&/g if $next_url; |
|
298
|
0
|
0
|
|
|
|
|
$out->{next_url} = $next_url if $next_url; |
|
299
|
|
|
|
|
|
|
|
|
300
|
0
|
|
|
|
|
|
$out->{count} = $xc->findvalue("/result/count"); |
|
301
|
|
|
|
|
|
|
|
|
302
|
0
|
0
|
|
|
|
|
if ( $self->fullResponse ) { |
|
303
|
0
|
|
|
|
|
|
$out->{results} = [$root]; |
|
304
|
0
|
|
|
|
|
|
return $out; |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
|
|
307
|
0
|
|
|
|
|
|
my @result_nodes; |
|
308
|
|
|
|
|
|
|
|
|
309
|
0
|
0
|
|
|
|
|
if ( $xc->exists('/result/items') ) { |
|
|
|
0
|
|
|
|
|
|
|
310
|
0
|
|
|
|
|
|
@result_nodes = $xc->findnodes('/result/items/*'); |
|
311
|
|
|
|
|
|
|
} elsif ($self->endpoint eq 'changes') { |
|
312
|
0
|
|
|
|
|
|
@result_nodes = $xc->findnodes('/result/contentChange'); |
|
313
|
|
|
|
|
|
|
} else { |
|
314
|
0
|
|
|
|
|
|
@result_nodes = $xc->findnodes('/result/*[@uuid]'); |
|
315
|
|
|
|
|
|
|
}; |
|
316
|
|
|
|
|
|
|
|
|
317
|
0
|
|
|
|
|
|
$out->{results} = \@result_nodes; |
|
318
|
|
|
|
|
|
|
|
|
319
|
0
|
|
|
|
|
|
return $out; |
|
320
|
|
|
|
|
|
|
} |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub _handle_record { |
|
323
|
0
|
|
|
0
|
|
|
my ( $self, $dom ) = @_; |
|
324
|
0
|
0
|
|
|
|
|
return unless $dom; |
|
325
|
|
|
|
|
|
|
|
|
326
|
0
|
0
|
|
|
|
|
return blessed( $self->handler ) |
|
327
|
|
|
|
|
|
|
? $self->handler->parse($dom) |
|
328
|
|
|
|
|
|
|
: $self->handler->($dom); |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# Public Methods. -------------------------------------------------------------- |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub url { |
|
335
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
|
336
|
0
|
|
|
|
|
|
return $self->_url( $self->options ) |
|
337
|
|
|
|
|
|
|
; |
|
338
|
|
|
|
|
|
|
} |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
sub generator { |
|
341
|
|
|
|
|
|
|
my ($self) = @_; |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
return sub { |
|
344
|
|
|
|
|
|
|
$self->_nextRecord; |
|
345
|
|
|
|
|
|
|
}; |
|
346
|
|
|
|
|
|
|
} |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
1; |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=head1 NAME |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Catmandu::Importer::Pure - Package that imports Pure data. |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# From the command line |
|
357
|
|
|
|
|
|
|
$ catmandu convert Pure \ |
|
358
|
|
|
|
|
|
|
--base https://host/ws/api/... \ |
|
359
|
|
|
|
|
|
|
--endpoint research-outputs \ |
|
360
|
|
|
|
|
|
|
--apiKey "..." |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# In Perl |
|
363
|
|
|
|
|
|
|
use Catmandu; |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
my %attrs = ( |
|
366
|
|
|
|
|
|
|
base => 'https://host/path', |
|
367
|
|
|
|
|
|
|
endpoint => 'research-outputs', |
|
368
|
|
|
|
|
|
|
apiKey => '...', |
|
369
|
|
|
|
|
|
|
options => { 'fields' => 'title,type,authors.*' } |
|
370
|
|
|
|
|
|
|
); |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
my $importer = Catmandu->importer('Pure', %attrs); |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
my $n = $importer->each(sub { |
|
375
|
|
|
|
|
|
|
my $hashref = $_[0]; |
|
376
|
|
|
|
|
|
|
# ... |
|
377
|
|
|
|
|
|
|
}); |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# get number of validated and approved publications |
|
380
|
|
|
|
|
|
|
my $count = Catmandu->importer( |
|
381
|
|
|
|
|
|
|
'Pure', |
|
382
|
|
|
|
|
|
|
base => 'https://host/path', |
|
383
|
|
|
|
|
|
|
endpoint => 'research-outputs', |
|
384
|
|
|
|
|
|
|
apiKey => '...', |
|
385
|
|
|
|
|
|
|
fullResponse => 1, |
|
386
|
|
|
|
|
|
|
post_xml => '<?xml version="1.0" encoding="utf-8"?>' |
|
387
|
|
|
|
|
|
|
. '<researchOutputsQuery>' |
|
388
|
|
|
|
|
|
|
. '<size>0</size>' |
|
389
|
|
|
|
|
|
|
. '<workflowSteps>' |
|
390
|
|
|
|
|
|
|
. ' <workflowStep>approved</workflowStep>' |
|
391
|
|
|
|
|
|
|
. ' <workflowStep>validated</workflowStep>' |
|
392
|
|
|
|
|
|
|
. '</workflowSteps>' |
|
393
|
|
|
|
|
|
|
. '</researchOutputsQuery>' |
|
394
|
|
|
|
|
|
|
)->first->{count}; |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
Catmandu::Importer::Pure is a Catmandu package that seamlessly imports data from Elsevier's Pure system using its REST service. |
|
399
|
|
|
|
|
|
|
In order to use the Pure Web Service you need an API key. List of all available endpoints and further documentation can currently |
|
400
|
|
|
|
|
|
|
be found under /ws on a webserver that is running Pure. Note that this version of the importer is tested with Pure API version |
|
401
|
|
|
|
|
|
|
5.18 and might not work with later versions. |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head1 CONFIGURATION |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=over |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=item base |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
Base URL for the REST service is required, for example 'http://purehost.com/ws/api/518' |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=item endpoint |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Valid endpoint is required, like 'research-outputs' |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=item apiKey |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Valid API key is required for access |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=item path |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
Path after the endpoint |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=item user |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
User name if basic authentication is used |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
=item password |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
Password if basic authentication is used |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=item options |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
Options passed as parameters to the REST service, for example: |
|
434
|
|
|
|
|
|
|
{ |
|
435
|
|
|
|
|
|
|
'size' => 20, |
|
436
|
|
|
|
|
|
|
'fields' => 'title,type,authors.*' |
|
437
|
|
|
|
|
|
|
} |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=item post_xml |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
xml containing a query that will be submitted with a POST request |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=item fullResponse |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
Optional flag. If true delivers the complete results as a single item (record), corresponding to the |
|
447
|
|
|
|
|
|
|
XML response received. Only one request to the REST service is made in this case. Default is false. |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
If the flag is false then the items are set to child |
|
450
|
|
|
|
|
|
|
elements of the element 'result' or in case the 'result' element does not exist they are set to child elements |
|
451
|
|
|
|
|
|
|
of the root element for each response. |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=item handler( sub {} | $object | 'NAME' | '+NAME' ) |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
Handler to transform each record from XML DOM (L<XML::LibXML::Element>) into |
|
456
|
|
|
|
|
|
|
Perl hash. |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
Handlers can be provided as function reference, an instance of a Perl |
|
459
|
|
|
|
|
|
|
package that implements 'parse', or by a package NAME. Package names should |
|
460
|
|
|
|
|
|
|
be prepended by C<+> or prefixed with C<Catmandu::Importer::Pure::Parser>. E.g |
|
461
|
|
|
|
|
|
|
C<foobar> will create a C<Catmandu::Importer::Pure::Parser::foobar> instance. |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
By default the handler L<Catmandu::Importer::Pure::Parser::simple> is used. |
|
464
|
|
|
|
|
|
|
It provides a simple XML parsing, using XML::LibXML::Simple, |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Other possible values are L<Catmandu::Importer::Pure::Parser::struct> for XML::Struct |
|
467
|
|
|
|
|
|
|
based structure that preserves order and L<Catmandu::Importer::Pure::Parser::raw> that |
|
468
|
|
|
|
|
|
|
returns the XML as it is. |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=item userAgent |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
HTTP user agent string, set to C<Mozilla/5.0> by default. |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=item furl |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
Instance of L<Furl> or compatible class to fetch URLs with. |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=item timeout |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
Timeout for HTTP requests in seonds. Defaults to 50. |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=item trim_text |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
Optional flag. If true then all text nodes in the REST response are trimmed so that any leading and trailing whitespace is removed before parsing. |
|
485
|
|
|
|
|
|
|
This is useful if you don't want to risk getting leading and trailing whitespace in your data, since Pure doesn't currently clean leading/trailing white space from |
|
486
|
|
|
|
|
|
|
user input. Note that there is a small performance penalty when using this option. Default is false. |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=item filter( sub {} ) |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
Optional reference to function that processes the XML response before it is parsed. The argument to the function is a reference to the XML text, |
|
491
|
|
|
|
|
|
|
which is then used to modify it. This option is normally not needed but can helpful if there is a problem parsing the response due to a bug |
|
492
|
|
|
|
|
|
|
in the REST service. |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=back |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
=head1 METHODS |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
In addition to methods inherited from Catmandu::Iterable, this module provides the following public methods: |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=over |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=item B<url > |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
Return the current Pure REST request URL (useful for debugging). |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=back |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
L<Catmandu> |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
L<Catmandu::Importer> |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
L<Catmandu::Iterable> |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
L<Furl> |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=head1 AUTHOR |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
Snorri Briem E<lt>briem@cpan.orgE<gt> |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Copyright 2017- Lund University Library |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=head1 LICENSE |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=cut |