line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Catmandu::Importer::getJSON; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.51'; |
4
|
|
|
|
|
|
|
our $CACHE; |
5
|
|
|
|
|
|
|
|
6
|
4
|
|
|
4
|
|
176909
|
use Catmandu::Sane; |
|
4
|
|
|
|
|
562412
|
|
|
4
|
|
|
|
|
30
|
|
7
|
4
|
|
|
4
|
|
1090
|
use Moo; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
20
|
|
8
|
4
|
|
|
4
|
|
3914
|
use JSON; |
|
4
|
|
|
|
|
33012
|
|
|
4
|
|
|
|
|
27
|
|
9
|
4
|
|
|
4
|
|
2234
|
use Furl; |
|
4
|
|
|
|
|
90420
|
|
|
4
|
|
|
|
|
144
|
|
10
|
4
|
|
|
4
|
|
33
|
use Scalar::Util qw(blessed); |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
209
|
|
11
|
4
|
|
|
4
|
|
1943
|
use URI::Template; |
|
4
|
|
|
|
|
44702
|
|
|
4
|
|
|
|
|
2218
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
with 'Catmandu::Importer'; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
has url => ( |
16
|
|
|
|
|
|
|
is => 'rw', |
17
|
|
|
|
|
|
|
trigger => sub { |
18
|
|
|
|
|
|
|
$_[0]->{url} = _url_template_or_url( $_[1] ); |
19
|
|
|
|
|
|
|
} |
20
|
|
|
|
|
|
|
); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
has from => ( is => 'ro' ); |
23
|
|
|
|
|
|
|
has timeout => ( is => 'ro', default => sub { 10 } ); |
24
|
|
|
|
|
|
|
has agent => ( is => 'ro' ); |
25
|
|
|
|
|
|
|
has proxy => ( is => 'ro' ); |
26
|
|
|
|
|
|
|
has dry => ( is => 'ro' ); |
27
|
|
|
|
|
|
|
has headers => ( |
28
|
|
|
|
|
|
|
is => 'ro', |
29
|
|
|
|
|
|
|
default => sub { [ 'Accept' => 'application/json' ] } |
30
|
|
|
|
|
|
|
); |
31
|
|
|
|
|
|
|
has wait => ( is => 'ro' ); |
32
|
|
|
|
|
|
|
has cache => ( is => 'ro', trigger => 1 ); |
33
|
|
|
|
|
|
|
has client => ( |
34
|
|
|
|
|
|
|
is => 'ro', |
35
|
|
|
|
|
|
|
lazy => 1, |
36
|
|
|
|
|
|
|
builder => sub { |
37
|
|
|
|
|
|
|
Furl->new( |
38
|
0
|
|
|
0
|
|
0
|
map { $_ => $_[0]->{$_} } grep { defined $_[0]->{$_} } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
39
|
|
|
|
|
|
|
qw(timeout agent proxy), |
40
|
|
|
|
|
|
|
); |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
); |
43
|
|
|
|
|
|
|
has json => ( is => 'ro', default => sub { JSON->new->utf8(1) } ); |
44
|
|
|
|
|
|
|
has time => ( is => 'rw' ); |
45
|
|
|
|
|
|
|
has warn => ( is => 'ro', default => sub { 0 } ); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub _url_template_or_url { |
48
|
11
|
|
|
11
|
|
28
|
my ($url) = @_; |
49
|
|
|
|
|
|
|
|
50
|
11
|
50
|
|
|
|
43
|
if ( !blessed $url) { |
51
|
11
|
|
|
|
|
52
|
$url = URI::Template->new($url); |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
11
|
50
|
|
|
|
877
|
if ( $url->isa('URI::Template') ) { |
55
|
11
|
100
|
|
|
|
32
|
unless ( my @variables = $url->variables ) { |
56
|
5
|
|
|
|
|
54
|
$url = URI->new("$url"); |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
} |
59
|
11
|
|
|
|
|
7637
|
return $url; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
{ |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
package Importer::getJSON::MemoryCache; |
65
|
4
|
|
|
4
|
|
63
|
use JSON; |
|
4
|
|
|
|
|
11
|
|
|
4
|
|
|
|
|
30
|
|
66
|
|
|
|
|
|
|
our $JSON = JSON->new->utf8; |
67
|
4
|
|
|
4
|
|
16
|
sub new { bless {}, $_[0] } |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub get { |
70
|
6
|
|
|
6
|
|
21
|
eval { $JSON->decode( $_[0]->{ $_[1] } ) }; |
|
6
|
|
|
|
|
22
|
|
71
|
|
|
|
|
|
|
} |
72
|
2
|
50
|
|
2
|
|
30
|
sub set { $_[0]->{ $_[1] } = ref $_[2] ? $JSON->encode( $_[2] ) : '' } |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
$CACHE = Importer::getJSON::MemoryCache->new; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
{ |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
package Importer::getJSON::FileCache; |
79
|
4
|
|
|
4
|
|
1385
|
use JSON; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
18
|
|
80
|
4
|
|
|
4
|
|
495
|
use Catmandu::Util qw(read_json); |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
235
|
|
81
|
4
|
|
|
4
|
|
28
|
use Digest::MD5 qw(md5_hex); |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
6947
|
|
82
|
|
|
|
|
|
|
our $JSON = JSON->new->utf8; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub new { |
85
|
1
|
|
|
1
|
|
4
|
my ( $class, $dir ) = @_; |
86
|
1
|
|
|
|
|
3
|
$dir =~ s{/$}{}; |
87
|
1
|
|
|
|
|
4
|
bless { dir => $dir }, $class; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub file { |
91
|
5
|
|
|
5
|
|
11
|
my ( $self, $url ) = @_; |
92
|
5
|
|
|
|
|
277
|
$self->{dir} . '/' . md5_hex($url) . '.json'; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub get { |
96
|
3
|
|
|
3
|
|
7
|
eval { read_json( $_[0]->file( $_[1] ) ) }; |
|
3
|
|
|
|
|
8
|
|
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub set { |
100
|
2
|
|
|
2
|
|
6
|
my ( $self, $url, $data ) = @_; |
101
|
2
|
|
|
|
|
11
|
open my $fh, ">", $self->file($url); |
102
|
2
|
50
|
|
|
|
275
|
print $fh ( ref $data ? $JSON->encode($data) : '' ); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub _trigger_cache { |
107
|
15
|
|
|
15
|
|
10958
|
my ( $self, $cache ) = @_; |
108
|
|
|
|
|
|
|
|
109
|
15
|
50
|
33
|
|
|
158
|
if ( blessed $cache and $cache->can('get') and $cache->can('set') ) { |
|
|
100
|
33
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# use cache object |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
elsif ( $cache and -d $cache ) { |
114
|
1
|
|
|
|
|
9
|
$cache = Importer::getJSON::FileCache->new($cache); |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
elsif ($cache) { |
117
|
2
|
|
|
|
|
7
|
$cache = $CACHE; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
15
|
|
|
|
|
339
|
$self->{cache} = $cache; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub generator { |
124
|
|
|
|
|
|
|
my ($self) = @_; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
if ( $self->from ) { |
127
|
|
|
|
|
|
|
return sub { |
128
|
|
|
|
|
|
|
state $data = do { |
129
|
|
|
|
|
|
|
my $r = $self->request( $self->from ); |
130
|
|
|
|
|
|
|
( ref $r // '' ) eq 'ARRAY' ? $r : [$r]; |
131
|
|
|
|
|
|
|
}; |
132
|
|
|
|
|
|
|
return shift @$data; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub { |
137
|
|
|
|
|
|
|
state $fh = $self->fh; |
138
|
|
|
|
|
|
|
state $data; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
if ( $data and ref $data eq 'ARRAY' and @$data ) { |
141
|
|
|
|
|
|
|
return shift @$data; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
my $url; |
145
|
|
|
|
|
|
|
until ($url) { |
146
|
|
|
|
|
|
|
my $line = <$fh> // return; |
147
|
|
|
|
|
|
|
chomp $line; |
148
|
|
|
|
|
|
|
$line =~ s/^\s+|\s+$//g; |
149
|
|
|
|
|
|
|
next if $line eq ''; # ignore empty lines |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
my $request = eval { $self->request_hook($line) }; |
152
|
|
|
|
|
|
|
$url = $self->construct_url($request); |
153
|
|
|
|
|
|
|
if ( !$url ) { |
154
|
|
|
|
|
|
|
warn "failed to construct URL: $line\n" if $self->warn; |
155
|
|
|
|
|
|
|
$self->log->warn("failed to construct URL: $line"); |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
$data = $self->request($url); |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
return ( ref $data // '' ) eq 'ARRAY' ? shift @$data : $data; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub request_hook { |
166
|
22
|
|
|
22
|
1
|
58
|
my ( $self, $line ) = @_; |
167
|
22
|
100
|
|
|
|
128
|
return $line =~ /^\s*{/ ? $self->json->decode($line) : $line; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub construct_url { |
171
|
29
|
|
|
29
|
1
|
4659
|
my $self = shift; |
172
|
29
|
100
|
|
|
|
619
|
my $url = @_ > 1 ? _url_template_or_url(shift) : $self->url; |
173
|
29
|
|
|
|
|
168
|
my $request = shift; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# Template or query variables |
176
|
29
|
100
|
66
|
|
|
214
|
if ( ref $request and not blessed $request) { |
|
|
50
|
33
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
177
|
9
|
50
|
|
|
|
36
|
return unless blessed $url; |
178
|
9
|
100
|
|
|
|
55
|
if ( $url->isa('URI::Template') ) { |
179
|
5
|
|
|
|
|
21
|
$url = $url->process($request); |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
else { |
182
|
4
|
|
|
|
|
25
|
$url = $url->clone; |
183
|
4
|
|
|
|
|
94
|
$url->query_form($request); |
184
|
|
|
|
|
|
|
} |
185
|
9
|
|
|
|
|
8811
|
return $url; |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
elsif ( blessed $request and $request->isa('URI::URL') ) { |
188
|
0
|
|
|
|
|
0
|
return $request; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
elsif ( $request =~ /^https?:\/\// ) { # plain URL |
191
|
15
|
|
|
|
|
75
|
return URI->new($request); |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
elsif ( $request =~ /^\// ) { # URL path (and optional query) |
194
|
4
|
|
|
|
|
16
|
$url = "$url"; |
195
|
4
|
|
|
|
|
34
|
$url =~ s{/$}{}; |
196
|
4
|
|
|
|
|
13
|
$request =~ s{\s+$}{}; |
197
|
4
|
|
|
|
|
19
|
return URI->new( $url . $request ); |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
1
|
|
|
|
|
3
|
return; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub request { |
204
|
35
|
|
|
35
|
1
|
5208
|
my ( $self, $url ) = @_; |
205
|
|
|
|
|
|
|
|
206
|
35
|
|
|
|
|
679
|
$self->log->debug($url); |
207
|
|
|
|
|
|
|
|
208
|
35
|
|
|
|
|
5533
|
my $json = ''; |
209
|
|
|
|
|
|
|
|
210
|
35
|
100
|
|
|
|
117
|
if ( $self->dry ) { |
211
|
13
|
|
|
|
|
108
|
return { url => "$url" }; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
22
|
100
|
|
|
|
68
|
if ( $self->cache ) { |
215
|
9
|
|
|
|
|
26
|
$json = $self->cache->get($url); |
216
|
9
|
100
|
|
|
|
12234
|
if ( defined $json ) { |
217
|
5
|
50
|
|
|
|
18
|
return ref $json ? $json : undef; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
17
|
50
|
33
|
|
|
53
|
if ( $self->wait and $self->time ) { |
222
|
0
|
|
0
|
|
|
0
|
my $elapsed = ( $self->time // time ) - time; |
223
|
0
|
|
|
|
|
0
|
sleep( $self->wait - $elapsed ); |
224
|
|
|
|
|
|
|
} |
225
|
17
|
|
|
|
|
75
|
$self->time(time); |
226
|
|
|
|
|
|
|
|
227
|
17
|
|
|
|
|
320
|
my $response = $self->client->get( $url, $self->headers ); |
228
|
17
|
50
|
|
|
|
259
|
if ( $response->is_success ) { |
229
|
17
|
|
|
|
|
70
|
my $content = $response->decoded_content; |
230
|
17
|
|
|
|
|
224
|
my $data = $self->json->decode($content); |
231
|
17
|
|
|
|
|
48
|
$json = $self->response_hook($data); |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
else { |
234
|
0
|
0
|
|
|
|
0
|
warn "request failed: $url\n" if $self->warn; |
235
|
0
|
|
|
|
|
0
|
$self->log->warn("request failed: $url"); |
236
|
0
|
0
|
|
|
|
0
|
if ( $response->status =~ /^4/ ) { |
237
|
0
|
|
|
|
|
0
|
$json = ''; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
else { |
240
|
0
|
|
|
|
|
0
|
return; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
17
|
100
|
|
|
|
50
|
if ( $self->cache ) { |
245
|
4
|
|
|
|
|
12
|
$self->cache->set( $url, $json ); |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
17
|
50
|
|
|
|
70
|
return ref $json ? $json : undef; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
17
|
|
|
17
|
1
|
35
|
sub response_hook { $_[1] } |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
1; |
254
|
|
|
|
|
|
|
__END__ |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=head1 NAME |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Catmandu::Importer::getJSON - load JSON-encoded data from a server using a GET HTTP request |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=begin markdown |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# STATUS |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
[![Build Status](https://travis-ci.org/nichtich/Catmandu-Importer-getJSON.png)](https://travis-ci.org/nichtich/Catmandu-Importer-getJSON) |
265
|
|
|
|
|
|
|
[![Coverage Status](https://coveralls.io/repos/nichtich/Catmandu-Importer-getJSON/badge.png)](https://coveralls.io/r/nichtich/Catmandu-Importer-getJSON) |
266
|
|
|
|
|
|
|
[![Kwalitee Score](http://cpants.cpanauthors.org/dist/Catmandu-Importer-getJSON.png)](http://cpants.cpanauthors.org/dist/Catmandu-Importer-getJSON) |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=end markdown |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head1 SYNOPSIS |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
The following three examples are equivalent: |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
Catmandu::Importer::getJSON->new( |
275
|
|
|
|
|
|
|
file => \"http://example.org/alice.json\nhttp://example.org/bob.json" |
276
|
|
|
|
|
|
|
)->each(sub { my ($record) = @_; ... ); |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
Catmandu::Importer::getJSON->new( |
279
|
|
|
|
|
|
|
url => "http://example.org", |
280
|
|
|
|
|
|
|
file => \"/alice.json\n/bob.json" |
281
|
|
|
|
|
|
|
)->each(sub { my ($record) = @_; ... ); |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Catmandu::Importer::getJSON->new( |
284
|
|
|
|
|
|
|
url => "http://example.org/{name}.json", |
285
|
|
|
|
|
|
|
file => \"{\"name\":\"alice\"}\n{\"name\":\"bob\"}" |
286
|
|
|
|
|
|
|
)->each(sub { my ($record) = @_; ... ); |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
For more convenience the L<catmandu> command line client can be used: |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
echo http://example.org/alice.json | catmandu convert getJSON to YAML |
291
|
|
|
|
|
|
|
catmandu convert getJSON --from http://example.org/alice.json to YAML |
292
|
|
|
|
|
|
|
catmandu convert getJSON --dry 1 --url http://{domain}/robots.txt < domains |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=head1 DESCRIPTION |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
This L<Catmandu::Importer> performs a HTTP GET request to load JSON-encoded |
297
|
|
|
|
|
|
|
data from a server. The importer expects a line-separated input. Each line |
298
|
|
|
|
|
|
|
corresponds to a HTTP request that is mapped to a JSON-record on success. The |
299
|
|
|
|
|
|
|
following input formats are accepted: |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=over |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=item plain URL |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
A line that starts with "C<http://>" or "C<https://>" is used as plain URL. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=item URL path |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
A line that starts with "C</>" is appended to the configured B<url> parameter. |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=item variables |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
A JSON object with variables to be used with an URL template or as HTTP query |
314
|
|
|
|
|
|
|
parameters. For instance the input line C<< {"name":"Karl Marx"} >> with URL |
315
|
|
|
|
|
|
|
C<http://api.lobid.org/person> or the input line |
316
|
|
|
|
|
|
|
C<< {"entity":"person","name":"Karl Marx"} >> with URL template |
317
|
|
|
|
|
|
|
C<http://api.lobid.org/{entity}{?id}{?name}{?q}> are both expanded to |
318
|
|
|
|
|
|
|
L<http://api.lobid.org/person?name=Karl+Marx>. |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=back |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
If the JSON data returned in a HTTP response is a JSON array, its elements are |
323
|
|
|
|
|
|
|
imported as multiple items. If a JSON object is returned, it is imported as one |
324
|
|
|
|
|
|
|
item. |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
=head1 CONFIGURATION |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=over |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
=item url |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
An L<URI> or an URI templates (L<URI::Template>) as defined by |
333
|
|
|
|
|
|
|
L<RFC 6570|http://tools.ietf.org/html/rfc6570> to load JSON from. If no B<url> |
334
|
|
|
|
|
|
|
is configured, plain URLs must be provided as input or option C<from> must be |
335
|
|
|
|
|
|
|
used instead. |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=item from |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
A plain URL to load JSON without reading any input lines. |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=item timeout / agent / proxy / headers |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Optional HTTP client settings. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=item client |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
Instance of a L<Furl> HTTP client to perform requests with. |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=item dry |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Don't do any HTTP requests but return URLs that data would be queried from. |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=item file / fh |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
Input to read lines from (see L<Catmandu::Importer>). Defaults to STDIN. |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=item fix |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
An optional fix to be applied on every item (see L<Catmandu::Fix>). |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=item wait |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
Number of seconds to wait between requests. |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=item cache |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
Cache JSON response of URLs to not request the same URL twice. HTTP error |
368
|
|
|
|
|
|
|
codes in the 4xx range (e.g. 404) are also cached but 5xx errors are not. |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
The value of this option can be any objects that implements method C<get> and |
371
|
|
|
|
|
|
|
C<set> (e.g. C<CHI>), an existing directory for file caching, a true value to |
372
|
|
|
|
|
|
|
enable global in-memory-caching, or a false value to disable caching (default). |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
File caching uses file names based on MD5 of an URL so for instance |
375
|
|
|
|
|
|
|
C<http://example.org/> is cached as C<4389382917e51695b759543fdfd5f690.json>. |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=item warn |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Show error messages on the standard error. |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=back |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head1 METHODS |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=head2 time |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
Returns the UNIX timestamp right before the last request. This can be used for |
388
|
|
|
|
|
|
|
instance to add timestamps or the measure how fast requests were responded. |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=head2 construct_url( [ $base_url, ] $vars_url_or_path ) |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
Returns an URL given a hash reference with variables, a plain URL or an URL |
393
|
|
|
|
|
|
|
path. The optional first argument can be used to override option C<url>. |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
$importer->construct_url( %query_vars ) |
396
|
|
|
|
|
|
|
$importer->construct_url( $importer->url, %query_vars ) # equivalent |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=head2 request($url) |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Perform a HTTP GET request of a given URL including logging, caching, request |
401
|
|
|
|
|
|
|
hook etc. Returns a hash/array reference or C<undef>. |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head1 EXTENDING |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
This importer provides two methods to filter requests and responses, |
406
|
|
|
|
|
|
|
respectively. See L<Catmandu::Importer::Wikidata> for an example. |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=head2 request_hook |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
Gets a whitespace-trimmed input line and is expected to return an unblessed |
411
|
|
|
|
|
|
|
hash reference, an URL, or undef. Errors are catched and treated equal to |
412
|
|
|
|
|
|
|
undef. |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head2 response_hook |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
Gets the queried response object and is expected to return an object. |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=head1 LOGGING |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
URLs are emitted before each request on DEBUG log level. |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=head1 LIMITATIONS |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
Future versions of this module may also support asynchronous HTTP fetching |
425
|
|
|
|
|
|
|
modules such as L<HTTP::Async>, for retrieving multiple URLs at the same time. |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
=head1 SEE ALSO |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
L<Catmandu::Fix::get_json> provides this importer as fix function. |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=encoding utf8 |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Copyright Jakob VoÃ, 2014- |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify it under |
438
|
|
|
|
|
|
|
the same terms as Perl itself. |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=cut |