| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Catmandu::Importer::getJSON; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.52'; |
|
4
|
|
|
|
|
|
|
our $CACHE; |
|
5
|
|
|
|
|
|
|
|
|
6
|
4
|
|
|
4
|
|
147367
|
use Catmandu::Sane; |
|
|
4
|
|
|
|
|
459785
|
|
|
|
4
|
|
|
|
|
29
|
|
|
7
|
4
|
|
|
4
|
|
995
|
use Moo; |
|
|
4
|
|
|
|
|
11
|
|
|
|
4
|
|
|
|
|
22
|
|
|
8
|
4
|
|
|
4
|
|
3872
|
use JSON; |
|
|
4
|
|
|
|
|
27567
|
|
|
|
4
|
|
|
|
|
22
|
|
|
9
|
4
|
|
|
4
|
|
1873
|
use Furl; |
|
|
4
|
|
|
|
|
75432
|
|
|
|
4
|
|
|
|
|
135
|
|
|
10
|
4
|
|
|
4
|
|
29
|
use Scalar::Util qw(blessed); |
|
|
4
|
|
|
|
|
9
|
|
|
|
4
|
|
|
|
|
186
|
|
|
11
|
4
|
|
|
4
|
|
1661
|
use URI::Template; |
|
|
4
|
|
|
|
|
37714
|
|
|
|
4
|
|
|
|
|
1721
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
with 'Catmandu::Importer'; |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
has url => ( |
|
16
|
|
|
|
|
|
|
is => 'rw', |
|
17
|
|
|
|
|
|
|
trigger => sub { |
|
18
|
|
|
|
|
|
|
$_[0]->{url} = _url_template_or_url( $_[1] ); |
|
19
|
|
|
|
|
|
|
} |
|
20
|
|
|
|
|
|
|
); |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
has from => ( is => 'ro' ); |
|
23
|
|
|
|
|
|
|
has timeout => ( is => 'ro', default => sub { 10 } ); |
|
24
|
|
|
|
|
|
|
has agent => ( is => 'ro' ); |
|
25
|
|
|
|
|
|
|
has proxy => ( is => 'ro' ); |
|
26
|
|
|
|
|
|
|
has dry => ( is => 'ro' ); |
|
27
|
|
|
|
|
|
|
has headers => ( |
|
28
|
|
|
|
|
|
|
is => 'ro', |
|
29
|
|
|
|
|
|
|
default => sub { [ 'Accept' => 'application/json' ] } |
|
30
|
|
|
|
|
|
|
); |
|
31
|
|
|
|
|
|
|
has wait => ( is => 'ro' ); |
|
32
|
|
|
|
|
|
|
has cache => ( is => 'ro', trigger => 1 ); |
|
33
|
|
|
|
|
|
|
has client => ( |
|
34
|
|
|
|
|
|
|
is => 'ro', |
|
35
|
|
|
|
|
|
|
lazy => 1, |
|
36
|
|
|
|
|
|
|
builder => sub { |
|
37
|
|
|
|
|
|
|
Furl->new( |
|
38
|
0
|
|
|
0
|
|
0
|
map { $_ => $_[0]->{$_} } grep { defined $_[0]->{$_} } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
39
|
|
|
|
|
|
|
qw(timeout agent proxy), |
|
40
|
|
|
|
|
|
|
); |
|
41
|
|
|
|
|
|
|
} |
|
42
|
|
|
|
|
|
|
); |
|
43
|
|
|
|
|
|
|
has json => ( is => 'ro', default => sub { JSON->new->utf8(1) } ); |
|
44
|
|
|
|
|
|
|
has time => ( is => 'rw' ); |
|
45
|
|
|
|
|
|
|
has warn => ( is => 'ro', default => sub { 0 } ); |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub _url_template_or_url { |
|
48
|
11
|
|
|
11
|
|
20
|
my ($url) = @_; |
|
49
|
|
|
|
|
|
|
|
|
50
|
11
|
50
|
|
|
|
34
|
if ( !blessed $url) { |
|
51
|
11
|
|
|
|
|
47
|
$url = URI::Template->new($url); |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
|
|
54
|
11
|
50
|
|
|
|
777
|
if ( $url->isa('URI::Template') ) { |
|
55
|
11
|
100
|
|
|
|
29
|
unless ( my @variables = $url->variables ) { |
|
56
|
5
|
|
|
|
|
47
|
$url = URI->new("$url"); |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
} |
|
59
|
11
|
|
|
|
|
6539
|
return $url; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
{ |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
package Importer::getJSON::MemoryCache; |
|
65
|
4
|
|
|
4
|
|
57
|
use JSON; |
|
|
4
|
|
|
|
|
9
|
|
|
|
4
|
|
|
|
|
48
|
|
|
66
|
|
|
|
|
|
|
our $JSON = JSON->new->utf8; |
|
67
|
4
|
|
|
4
|
|
14
|
sub new { bless {}, $_[0] } |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub get { |
|
70
|
6
|
|
|
6
|
|
10
|
eval { $JSON->decode( $_[0]->{ $_[1] } ) }; |
|
|
6
|
|
|
|
|
15
|
|
|
71
|
|
|
|
|
|
|
} |
|
72
|
2
|
50
|
|
2
|
|
26
|
sub set { $_[0]->{ $_[1] } = ref $_[2] ? $JSON->encode( $_[2] ) : '' } |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
$CACHE = Importer::getJSON::MemoryCache->new; |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
{ |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
package Importer::getJSON::FileCache; |
|
79
|
4
|
|
|
4
|
|
1190
|
use JSON; |
|
|
4
|
|
|
|
|
7
|
|
|
|
4
|
|
|
|
|
15
|
|
|
80
|
4
|
|
|
4
|
|
384
|
use Catmandu::Util qw(read_json); |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
210
|
|
|
81
|
4
|
|
|
4
|
|
24
|
use Digest::MD5 qw(md5_hex); |
|
|
4
|
|
|
|
|
4
|
|
|
|
4
|
|
|
|
|
5627
|
|
|
82
|
|
|
|
|
|
|
our $JSON = JSON->new->utf8; |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub new { |
|
85
|
1
|
|
|
1
|
|
3
|
my ( $class, $dir ) = @_; |
|
86
|
1
|
|
|
|
|
3
|
$dir =~ s{/$}{}; |
|
87
|
1
|
|
|
|
|
3
|
bless { dir => $dir }, $class; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub file { |
|
91
|
5
|
|
|
5
|
|
10
|
my ( $self, $url ) = @_; |
|
92
|
5
|
|
|
|
|
23
|
$self->{dir} . '/' . md5_hex( $url->as_string ) . '.json'; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub get { |
|
96
|
3
|
|
|
3
|
|
6
|
eval { read_json( $_[0]->file( $_[1] ) ) }; |
|
|
3
|
|
|
|
|
7
|
|
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub set { |
|
100
|
2
|
|
|
2
|
|
5
|
my ( $self, $url, $data ) = @_; |
|
101
|
2
|
|
|
|
|
10
|
open my $fh, ">", $self->file($url); |
|
102
|
2
|
50
|
|
|
|
229
|
print $fh ( ref $data ? $JSON->encode($data) : '' ); |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub _trigger_cache { |
|
107
|
15
|
|
|
15
|
|
9076
|
my ( $self, $cache ) = @_; |
|
108
|
|
|
|
|
|
|
|
|
109
|
15
|
50
|
33
|
|
|
136
|
if ( blessed $cache and $cache->can('get') and $cache->can('set') ) { |
|
|
|
100
|
33
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# use cache object |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
elsif ( $cache and -d $cache ) { |
|
114
|
1
|
|
|
|
|
9
|
$cache = Importer::getJSON::FileCache->new($cache); |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
elsif ($cache) { |
|
117
|
2
|
|
|
|
|
6
|
$cache = $CACHE; |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
15
|
|
|
|
|
243
|
$self->{cache} = $cache; |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub generator { |
|
124
|
|
|
|
|
|
|
my ($self) = @_; |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
if ( $self->from ) { |
|
127
|
|
|
|
|
|
|
return sub { |
|
128
|
|
|
|
|
|
|
state $data = do { |
|
129
|
|
|
|
|
|
|
my $r = $self->request( $self->from ); |
|
130
|
|
|
|
|
|
|
( ref $r // '' ) eq 'ARRAY' ? $r : [$r]; |
|
131
|
|
|
|
|
|
|
}; |
|
132
|
|
|
|
|
|
|
return shift @$data; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub { |
|
137
|
|
|
|
|
|
|
state $fh = $self->fh; |
|
138
|
|
|
|
|
|
|
state $data; |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
if ( $data and ref $data eq 'ARRAY' and @$data ) { |
|
141
|
|
|
|
|
|
|
return shift @$data; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
my $url; |
|
145
|
|
|
|
|
|
|
until ($url) { |
|
146
|
|
|
|
|
|
|
my $line = <$fh> // return; |
|
147
|
|
|
|
|
|
|
chomp $line; |
|
148
|
|
|
|
|
|
|
$line =~ s/^\s+|\s+$//g; |
|
149
|
|
|
|
|
|
|
next if $line eq ''; # ignore empty lines |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
my $request = eval { $self->request_hook($line) }; |
|
152
|
|
|
|
|
|
|
$url = $self->construct_url($request); |
|
153
|
|
|
|
|
|
|
if ( !$url ) { |
|
154
|
|
|
|
|
|
|
warn "failed to construct URL: $line\n" if $self->warn; |
|
155
|
|
|
|
|
|
|
$self->log->warn("failed to construct URL: $line"); |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
$data = $self->request($url); |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
return ( ref $data // '' ) eq 'ARRAY' ? shift @$data : $data; |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub request_hook { |
|
166
|
22
|
|
|
22
|
1
|
48
|
my ( $self, $line ) = @_; |
|
167
|
22
|
100
|
|
|
|
106
|
return $line =~ /^\s*{/ ? $self->json->decode($line) : $line; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub construct_url { |
|
171
|
29
|
|
|
29
|
1
|
3603
|
my $self = shift; |
|
172
|
29
|
100
|
|
|
|
449
|
my $url = @_ > 1 ? _url_template_or_url(shift) : $self->url; |
|
173
|
29
|
|
|
|
|
131
|
my $request = shift; |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# Template or query variables |
|
176
|
29
|
100
|
66
|
|
|
199
|
if ( ref $request and not blessed $request) { |
|
|
|
50
|
33
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
177
|
9
|
50
|
|
|
|
33
|
return unless blessed $url; |
|
178
|
9
|
100
|
|
|
|
48
|
if ( $url->isa('URI::Template') ) { |
|
179
|
5
|
|
|
|
|
17
|
$url = $url->process($request); |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
else { |
|
182
|
4
|
|
|
|
|
19
|
$url = $url->clone; |
|
183
|
4
|
|
|
|
|
79
|
$url->query_form($request); |
|
184
|
|
|
|
|
|
|
} |
|
185
|
9
|
|
|
|
|
7469
|
return $url; |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
elsif ( blessed $request and $request->isa('URI::URL') ) { |
|
188
|
0
|
|
|
|
|
0
|
return $request; |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
elsif ( $request =~ /^https?:\/\// ) { # plain URL |
|
191
|
15
|
|
|
|
|
59
|
return URI->new($request); |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
elsif ( $request =~ /^\// ) { # URL path (and optional query) |
|
194
|
4
|
|
|
|
|
14
|
$url = "$url"; |
|
195
|
4
|
|
|
|
|
29
|
$url =~ s{/$}{}; |
|
196
|
4
|
|
|
|
|
8
|
$request =~ s{\s+$}{}; |
|
197
|
4
|
|
|
|
|
19
|
return URI->new( $url . $request ); |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
1
|
|
|
|
|
3
|
return; |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub request { |
|
204
|
34
|
|
|
34
|
1
|
4395
|
my ( $self, $url ) = @_; |
|
205
|
|
|
|
|
|
|
|
|
206
|
34
|
|
|
|
|
522
|
$self->log->debug($url); |
|
207
|
|
|
|
|
|
|
|
|
208
|
34
|
|
|
|
|
4584
|
my $json = ''; |
|
209
|
|
|
|
|
|
|
|
|
210
|
34
|
100
|
|
|
|
104
|
if ( $self->dry ) { |
|
211
|
13
|
|
|
|
|
88
|
return { url => "$url" }; |
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
|
|
214
|
21
|
100
|
|
|
|
58
|
if ( $self->cache ) { |
|
215
|
9
|
|
|
|
|
24
|
$json = $self->cache->get($url); |
|
216
|
9
|
100
|
|
|
|
10555
|
if ( defined $json ) { |
|
217
|
5
|
50
|
|
|
|
13
|
return ref $json ? $json : undef; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
|
|
221
|
16
|
50
|
33
|
|
|
46
|
if ( $self->wait and $self->time ) { |
|
222
|
0
|
|
0
|
|
|
0
|
my $elapsed = ( $self->time // time ) - time; |
|
223
|
0
|
|
|
|
|
0
|
sleep( $self->wait - $elapsed ); |
|
224
|
|
|
|
|
|
|
} |
|
225
|
16
|
|
|
|
|
50
|
$self->time(time); |
|
226
|
|
|
|
|
|
|
|
|
227
|
16
|
|
|
|
|
260
|
my $response = $self->client->get( $url, $self->headers ); |
|
228
|
16
|
50
|
|
|
|
218
|
if ( $response->is_success ) { |
|
229
|
16
|
|
|
|
|
63
|
my $content = $response->decoded_content; |
|
230
|
16
|
|
|
|
|
184
|
my $data = $self->json->decode($content); |
|
231
|
16
|
|
|
|
|
40
|
$json = $self->response_hook($data); |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
else { |
|
234
|
0
|
0
|
|
|
|
0
|
warn "request failed: $url\n" if $self->warn; |
|
235
|
0
|
|
|
|
|
0
|
$self->log->warn("request failed: $url"); |
|
236
|
0
|
0
|
|
|
|
0
|
if ( $response->status =~ /^4/ ) { |
|
237
|
0
|
|
|
|
|
0
|
$json = ''; |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
else { |
|
240
|
0
|
|
|
|
|
0
|
return; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
16
|
100
|
|
|
|
40
|
if ( $self->cache ) { |
|
245
|
4
|
|
|
|
|
14
|
$self->cache->set( $url, $json ); |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
16
|
50
|
|
|
|
55
|
return ref $json ? $json : undef; |
|
249
|
|
|
|
|
|
|
} |
|
250
|
|
|
|
|
|
|
|
|
251
|
16
|
|
|
16
|
1
|
29
|
sub response_hook { $_[1] } |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
1; |
|
254
|
|
|
|
|
|
|
__END__ |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=head1 NAME |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Catmandu::Importer::getJSON - load JSON-encoded data from a server using a GET HTTP request |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=begin markdown |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# STATUS |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
[](https://travis-ci.org/nichtich/Catmandu-Importer-getJSON) |
|
265
|
|
|
|
|
|
|
[](https://coveralls.io/r/nichtich/Catmandu-Importer-getJSON) |
|
266
|
|
|
|
|
|
|
[](http://cpants.cpanauthors.org/dist/Catmandu-Importer-getJSON) |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=end markdown |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
The following three examples are equivalent: |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
Catmandu::Importer::getJSON->new( |
|
275
|
|
|
|
|
|
|
file => \"http://example.org/alice.json\nhttp://example.org/bob.json" |
|
276
|
|
|
|
|
|
|
)->each(sub { my ($record) = @_; ... ); |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
Catmandu::Importer::getJSON->new( |
|
279
|
|
|
|
|
|
|
url => "http://example.org", |
|
280
|
|
|
|
|
|
|
file => \"/alice.json\n/bob.json" |
|
281
|
|
|
|
|
|
|
)->each(sub { my ($record) = @_; ... ); |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Catmandu::Importer::getJSON->new( |
|
284
|
|
|
|
|
|
|
url => "http://example.org/{name}.json", |
|
285
|
|
|
|
|
|
|
file => \"{\"name\":\"alice\"}\n{\"name\":\"bob\"}" |
|
286
|
|
|
|
|
|
|
)->each(sub { my ($record) = @_; ... ); |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
For more convenience the L<catmandu> command line client can be used: |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
echo http://example.org/alice.json | catmandu convert getJSON to YAML |
|
291
|
|
|
|
|
|
|
catmandu convert getJSON --from http://example.org/alice.json to YAML |
|
292
|
|
|
|
|
|
|
catmandu convert getJSON --dry 1 --url http://{domain}/robots.txt < domains |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
This L<Catmandu::Importer> performs a HTTP GET request to load JSON-encoded |
|
297
|
|
|
|
|
|
|
data from a server. The importer expects a line-separated input. Each line |
|
298
|
|
|
|
|
|
|
corresponds to a HTTP request that is mapped to a JSON-record on success. The |
|
299
|
|
|
|
|
|
|
following input formats are accepted: |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=over |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=item plain URL |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
A line that starts with "C<http://>" or "C<https://>" is used as plain URL. |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=item URL path |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
A line that starts with "C</>" is appended to the configured B<url> parameter. |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=item variables |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
A JSON object with variables to be used with an URL template or as HTTP query |
|
314
|
|
|
|
|
|
|
parameters. For instance the input line C<< {"name":"Karl Marx"} >> with URL |
|
315
|
|
|
|
|
|
|
C<http://api.lobid.org/person> or the input line |
|
316
|
|
|
|
|
|
|
C<< {"entity":"person","name":"Karl Marx"} >> with URL template |
|
317
|
|
|
|
|
|
|
C<http://api.lobid.org/{entity}{?id}{?name}{?q}> are both expanded to |
|
318
|
|
|
|
|
|
|
L<http://api.lobid.org/person?name=Karl+Marx>. |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=back |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
If the JSON data returned in a HTTP response is a JSON array, its elements are |
|
323
|
|
|
|
|
|
|
imported as multiple items. If a JSON object is returned, it is imported as one |
|
324
|
|
|
|
|
|
|
item. |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
=head1 CONFIGURATION |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=over |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
=item url |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
An L<URI> or an URI templates (L<URI::Template>) as defined by |
|
333
|
|
|
|
|
|
|
L<RFC 6570|http://tools.ietf.org/html/rfc6570> to load JSON from. If no B<url> |
|
334
|
|
|
|
|
|
|
is configured, plain URLs must be provided as input or option C<from> must be |
|
335
|
|
|
|
|
|
|
used instead. |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=item from |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
A plain URL to load JSON without reading any input lines. |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=item timeout / agent / proxy / headers |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Optional HTTP client settings. |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=item client |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
Instance of a L<Furl> HTTP client to perform requests with. |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=item dry |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Don't do any HTTP requests but return URLs that data would be queried from. |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=item file / fh |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
Input to read lines from (see L<Catmandu::Importer>). Defaults to STDIN. |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=item fix |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
An optional fix to be applied on every item (see L<Catmandu::Fix>). |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=item wait |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
Number of seconds to wait between requests. |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=item cache |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
Cache JSON response of URLs to not request the same URL twice. HTTP error |
|
368
|
|
|
|
|
|
|
codes in the 4xx range (e.g. 404) are also cached but 5xx errors are not. |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
The value of this option can be any objects that implements method C<get> and |
|
371
|
|
|
|
|
|
|
C<set> (e.g. C<CHI>), an existing directory for file caching, a true value to |
|
372
|
|
|
|
|
|
|
enable global in-memory-caching, or a false value to disable caching (default). |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
File caching uses file names based on MD5 of an URL so for instance |
|
375
|
|
|
|
|
|
|
C<http://example.org/> is cached as C<4389382917e51695b759543fdfd5f690.json>. |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=item warn |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Show error messages on the standard error. |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=back |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head1 METHODS |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=head2 time |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
Returns the UNIX timestamp right before the last request. This can be used for |
|
388
|
|
|
|
|
|
|
instance to add timestamps or the measure how fast requests were responded. |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=head2 construct_url( [ $base_url, ] $vars_url_or_path ) |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
Returns an URL given a hash reference with variables, a plain URL or an URL |
|
393
|
|
|
|
|
|
|
path. The optional first argument can be used to override option C<url>. |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
$importer->construct_url( %query_vars ) |
|
396
|
|
|
|
|
|
|
$importer->construct_url( $importer->url, %query_vars ) # equivalent |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=head2 request($url) |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Perform a HTTP GET request of a given URL including logging, caching, request |
|
401
|
|
|
|
|
|
|
hook etc. Returns a hash/array reference or C<undef>. |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head1 EXTENDING |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
This importer provides two methods to filter requests and responses, |
|
406
|
|
|
|
|
|
|
respectively. See L<Catmandu::Importer::Wikidata> for an example. |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=head2 request_hook |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
Gets a whitespace-trimmed input line and is expected to return an unblessed |
|
411
|
|
|
|
|
|
|
hash reference, an URL, or undef. Errors are catched and treated equal to |
|
412
|
|
|
|
|
|
|
undef. |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head2 response_hook |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
Gets the queried response object and is expected to return an object. |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=head1 LOGGING |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
URLs are emitted before each request on DEBUG log level. |
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=head1 LIMITATIONS |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
Future versions of this module may also support asynchronous HTTP fetching |
|
425
|
|
|
|
|
|
|
modules such as L<HTTP::Async>, for retrieving multiple URLs at the same time. |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
L<Catmandu::Fix::get_json> provides this importer as fix function. |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=encoding utf8 |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Copyright Jakob VoÃ, 2014- |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify it under |
|
438
|
|
|
|
|
|
|
the same terms as Perl itself. |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=cut |