File Coverage

blib/lib/REST/Client/CrossRef.pm
Criterion Covered Total %
statement 246 404 60.8
branch 80 180 44.4
condition 10 27 37.0
subroutine 34 47 72.3
pod 15 17 88.2
total 385 675 57.0


line stmt bran cond sub pod time code
1             package REST::Client::CrossRef;
2 1     1   174988 use strict;
  1         2  
  1         33  
3 1     1   6 use warnings;
  1         2  
  1         26  
4 1     1   617 use Moo;
  1         11402  
  1         6  
5            
6 1     1   2175 use JSON;
  1         10206  
  1         6  
7 1     1   602 use URI::Escape;
  1         1278  
  1         61  
8 1     1   463 use REST::Client;
  1         48234  
  1         41  
9            
10             #use Data::Dumper;
11 1     1   11 use Carp;
  1         2  
  1         58  
12 1     1   607 use Log::Any;
  1         8808  
  1         5  
13 1     1   553 use HTTP::Cache::Transparent;
  1         15021  
  1         8  
14            
15             #use JSON::MultiValueOrdered;
16             #use YAML;
17 1     1   569 use JSON::Path;
  1         53068  
  1         11  
18            
19 1     1   629 use namespace::clean;
  1         9634  
  1         6  
20            
21             =head1 NAME
22            
23             REST::Client::CrossRef - Read data from CrossRef using its REST API
24            
25             =cut
26            
27             our $VERSION = '0.007';
28            
29             =head1 VERSION
30            
31             Version 0.007
32            
33             =cut
34            
35             =head1 DESCRIPTION
36            
37             This module use L to read the data from the CrossRef repository.
38            
39             =cut
40            
41             =head1 SYNOPSIS
42            
43             use Log::Any::Adapter( 'File', './log.txt', 'log_level'=> 'info');
44             use REST::Client::CrossRef;
45            
46             #the mail address is added in the request's header
47             #return the data without transformation
48            
49             my $cr = REST::Client::CrossRef->new(
50             mailto => 'you@somewhre.com',
51             spit_raw_data => 1,
52             );
53            
54             #cache the data with HTTP::Cache::Transparent
55             $cr->init_cache(
56             { BasePath => ".\cache",
57             NoUpdate => 60 * 60,
58             verbose => 0
59             });
60            
61             my $data = $cr->journal_from_doi('10.1088/0004-637X/722/2/971');
62            
63             print Dumper($data), "\n"; #$data is a hash ref of the json data converted to perl
64            
65             #unfold the data to something like
66             # field1/subfield1/subfield2 : value
67             #add an undef value after each item fields
68             #output only the fields given with keys_to_keep, with the same ordering
69            
70             my $cr = REST::Client::CrossRef->new(
71             mailto => 'you@somewhere.com',
72             add_end_flag => 1,
73             keys_to_keep => [
74             ['author'], ['title'], ['container-title'],
75             ['volume'],['issue'], ['page'],['issued/date-parts'], ['published-print/date-parts']
76             ],);
77            
78             my $data = $cr->article_from_doi('10.1088/0004-637X/722/2/971');
79            
80             for my $row (@$data) {
81             if (! $row) {
82             print "\n";
83             next;
84             }
85             while ( my ($f, $v) = each %$row) {
86             print "$f : $v \n";
87             }
88             }
89            
90            
91             #display the item's fields in alphabetic order
92             #add 'end of data' field after each item
93            
94             my $cr = REST::Client::CrossRef->new(
95             mailto => 'you@somewhre.com',
96             add_end_flag => 1,
97             sort_output => 1,
98             );
99            
100             $cr->init_cache(
101             { BasePath => "C:\\Windows\\Temp\\perl",
102             NoUpdate => 60 * 60,
103             verbose => 0
104             });
105            
106             my @fields = (qw/author title/);
107             my @values = (qw/allan electron/);
108            
109             #return 100 items by page
110            
111             $cr->rows(100);
112             my $data = $cr->query_articles( \@fields, \@values );
113             while () {
114             last unless $data;
115            
116             for my $row (@$data) {
117             print "\n" unless ($row);
118             for my $field (keys %$row) {
119             print $field, ": ", $row->{$field}. "\n";
120             }
121             }
122             $data = $cr->get_next();
123             }
124            
125             Example output:
126            
127             author : Wilke, Ingrid;
128             MacLeod, Allan M.;
129             Gillespie, William A.;
130             Berden, Giel;
131             Knippels, Guido M. H.;
132             van der Meer, Alexander F. G.;
133             container-title : Optics and Photonics News
134             issue : 12
135             issued/date-parts : 2002, 12, 1,
136             page : 16
137             published-online/date-parts : 2002, 12, 1,
138             published-print/date-parts : 2002, 12, 1,
139             title : Detectors: Time-Domain Terahertz Science Improves Relativistic Electron-Beam Diagnostics
140             volume : 13
141            
142             my $cr = REST::Client::CrossRef->new(
143             mailto => 'dokpe@unifr.ch',
144             spit_raw_data => 0,
145             add_end_flag => 1,
146             json_path => [
147             ['$.author[*]'],
148             ['$.title'],
149             ['$.container-title'],
150             ['$.volume'], ['$.issue'], ['$.page'],
151             ['$.issued..date-parts'],
152             ['$.published-print..date-parts']
153             ],
154             json_path_callback => { '$.items[*].author[*]' => \&unfold_authors },
155             );
156            
157             sub unfold_authors {
158             my ($data_ar) = @_;
159             my @res;
160             for my $aut (@$data_ar) {
161             my $line;
162             if ( $aut->{affiliation} ) {
163             my @aff;
164             for my $hr ( @{$aut->{affiliation}} ) {
165             my @aff = values %$hr;
166             $aff[0] =~ s/\r/ /g;
167             $line .= " " . $aff[0];
168             }
169             }
170             my $fn = (defined $aut->{given}) ?( ", " . $aut->{given} . "; " ): "; ";
171             push @res, $aut->{family} . $fn . ($line // "");
172             }
173             return \@res;
174             }
175            
176             my $data = $cr->article_from_doi($doi);
177             next unless $data;
178             for my $row (@$data) {
179             if ( !$row ) {
180             print "\n";
181             next;
182             }
183             while ( my ( $f, $v ) = each %$row ) {
184             print "$f : $v \n";
185             }
186             }
187            
188             Example of output:
189             $.author[*] : Pelloni, Michelle; University of Basel, Department of Chemistry, Mattenstrasse 24a, BPR 1096, CH 4002 Basel, Switzerland
190             Cote, Paul; School of Chemistry and Biochemistry, University of Geneva, Quai Ernest Ansermet 30, CH-1211 Geneva, Switzerland
191             ....
192             Warding, Tom.; University of Basel, Department of Chemistry, Mattenstrasse 24a, BPR 1096, CH 4002 Basel, Switzerland
193             $.title : Chimeric Artifact for Artificial Metalloenzymes
194             $.container-title : ACS Catalysis
195             $.volume : 8
196             $.issue : 2
197             $.page : 14-18
198             $.issued..date-parts : 2018, 1, 24
199             $.published-print..date-parts : 2018, 2, 2
200            
201             my $cr = REST::Client::CrossRef->new( mailto => 'you@somewher.com'
202             ,keys_to_keep => [["breakdowns/id", "id"], ["location"], [ "primary-name", "breakdowns/primary-name", "name" ]],
203             );
204            
205             $cr->init_cache(
206             { BasePath => "C:\\Windows\\Temp\\perl",
207             NoUpdate => 60 * 60,
208             verbose => 0
209             });
210            
211             $cr->rows(100);
212            
213             my $rs_ar = $cr->get_members;
214            
215             while () {
216             last unless $rs_ar;
217             for my $row_hr (@$rs_ar) {
218             for my $k (keys %$row_hr) {
219             print $k . " : " . $row_hr->{$k} . "\n";
220             }
221             }
222             $rs_ar = $cr->get_next();
223             }
224            
225             Example of items in the output above
226            
227             id : 5007
228             location : W. Struve 1 Tartu 50091 Estonia
229             primary-name : University of Tartu Press
230            
231             id : 310
232             location : 23 Millig Street Helensburgh Helensburgh Argyll G84 9LD United Kingdom
233             primary-name : Westburn Publishers
234            
235             id : 183
236             location : 9650 Rockville Pike Attn: Lynn Willis Bethesda MD 20814 United States
237             primary-name : Society for Leukocyte Biology
238            
239             =cut
240            
241             has baseurl => ( is => 'ro', default => sub {'https://api.crossref.org'} );
242             has modified_since => ( is => 'ro' );
243            
244             #has version => (is => 'ro', default => sub {'v1'} );
245            
246             has rows => (
247             is => 'rw',
248             default => sub {0},
249             isa => sub { croak "rows must be under 1000" unless $_[0] < 1000 }
250             );
251             has code => ( is => 'rw' );
252             has sleep => ( is => 'rw', default => sub {0} );
253             has log => ( is => 'lazy' );
254             has client => ( is => 'lazy' );
255             has decoder => ( is => 'lazy' );
256            
257             =head2 C<$cr = REST::Client::CrossRef-Enew( ... mailto =E your@email.here, ...)>
258            
259             The email address is placed in the header of the page.
260             See L
261            
262             =cut
263            
264             has mailto => ( is => 'lazy', default => sub {0} );
265            
266             =head2 C<$cr = REST::Client::CrossRef-Enew( ... sort_output =E1, ...)>
267            
268             Rows can be sorted using the key name with sort_ouput => 1.
269             Default to 0.
270             In effect only if C is false.
271            
272             =cut
273            
274             has sort_output => ( is => 'lazy', default => sub {0} );
275             has test_data => ( is => 'lazy', default => sub {0} );
276            
277             =head2 C<$cr = REST::Client::CrossRef-Enew( ... spit_raw_data =E1, ...)>
278            
279             Display the data as a hashref if 0 or as an array ref of hasref,
280             where each hashref is a row of key => value that can be sorted with sort_ouput => 1.
281             C default to 0.
282            
283             =cut
284            
285             has spit_raw_data => ( is => 'lazy', default => sub {0} );
286             has cursor => ( is => 'rw' );
287             has page_start_at => ( is => 'rw', default => sub {0} );
288            
289             =head2 C<$cr = REST::Client::CrossRef-Enew( ... add_end_flag =E1, ...)>
290            
291             Add undef after an item's fields.
292             Default to 1.
293            
294             =cut
295            
296             has add_end_flag => ( is => 'lazy', default => sub {1} );
297            
298             =head2 C<$cr = REST::Client::CrossRef-Enew( ... keys_to_keep =E [[key1, key1a, ...], [key2], ... ], ...)>
299            
300             An array ref of array ref, the inner array ref give a key name and the possible alternative keys for the same value,
301             for example [ "primary-name", "breakdowns/primary-name", "name" ] in the member road (url ending with /members).
302             The keys enumeration starts below C, or C - C if the result is a list.
303             This filters the values that are returned and preserves the ordering of the array ref given in argument.
304             The ouput is an array ref of hash ref, each hash having the key and the values.
305             Values are flattened as string. In effect only if spit_raw_data is false.
306            
307             =cut
308            
309             has keys_to_keep => ( is => 'lazy' );
310            
311             =head2 C<$cr = REST::Client::CrossRef-Enew( ... json_path =E [[$path1, path1a, ...], [path2], ... ], ...)>
312            
313             An array ref of array ref, the inner array refs give a L
314             and the possible alternative path for the same value. See also L.
315             The json path starts below C, or C - C if the result is a list.
316             The output, ordering, filtering and flattening is as above. In effect only if spit_raw_data is false.
317            
318             =cut
319            
320             has json_path => ( is => 'lazy' );
321            
322             =head2 C<$cr = REST::Client::CrossRef-Enew( ... json_path_callback =E {$path =E \&some_function }>
323            
324             An hash ref that associates a JSON path and a function that will be run on the data return by C<$jpath-Evalues($json_data)>.
325             The function must accept an array ref as first argument and must return an array ref.
326            
327             =cut
328            
329             has json_path_callback => ( is => 'lazy' );
330            
331             =head2 C<$cr = REST::Client::CrossRef-Enew( ... json_path_safe =E "0", ... )>
332            
333             To turn off the message C set this to 0.
334             Default to 1.
335            
336             =cut
337            
338             has json_path_safe => (is => 'lazy', default=> sub{1});
339            
340             =head2 C<$cr = REST::Client::CrossRef-Enew( ... version =E "v1", ... )>
341            
342             To use a defined version of the api.
343             See L
344            
345             =cut
346            
347             has version => ( is => 'ro' );
348            
349             sub _build_client {
350 1     1   10 my ($self) = @_;
351 1         19 my $client = REST::Client->new();
352            
353             #HTTP::Cache::Transparent::init( { BasePath => './cache', NoUpdate => 15*60, verbose=>1 } );
354             #$self->cache()
355            
356 1 50       5373 if ( $self->version ) {
357 0         0 $self->log->notice( "< Crossref-API-Version: " . $self->version );
358 0         0 $client->addHeader( 'api-version', $self->version );
359             }
360 1 50       21 if ( defined $self->mailto ) {
361            
362             #my $authorization = 'Bearer ' . $self->key;
363 1         18 $self->log->notice( "< Mailto: " . $self->mailto );
364 1         69 $client->addHeader( 'mailto', $self->mailto );
365            
366             }
367            
368 1         30 $client;
369             }
370            
371             sub _build_decoder {
372 3     3   32 my $self = shift;
373 3         262 return JSON->new;
374            
375             #return JSON::MultiValueOrdered->new;
376            
377             }
378            
379             sub _build_log {
380 3     3   28 my ($self) = @_;
381 3         20 Log::Any->get_logger( category => ref($self) );
382             }
383            
384             =head2 C<$cr-Einit_cache( @args )> C<$cr-Einit_cache( $hash_ref )>
385            
386             See L.
387             The array of args is passed to the object constructor.
388             The log file shows if the data has been fetch from the cache and if the server has been queryied to detect any change.
389            
390             =cut
391            
392             sub init_cache {
393 0     0 1 0 my ( $self, @args ) = @_;
394 0         0 my $href;
395 0 0       0 if ( ref $args[0] eq "HASH" ) {
396 0         0 $href = $args[0];
397             }
398             else {
399 0         0 my %h;
400 0         0 %h = @args;
401 0         0 $href = \%h;
402             }
403 0         0 HTTP::Cache::Transparent::init($href);
404             }
405            
406             sub _build_filter {
407 3     3   7 my ( $self, $ar ) = @_;
408            
409             #die Dumper $self->{keys_to_keep};
410             # die "ar:" . Dumper $ar;
411 3         6 my %filter;
412 3         7 for my $filter_name (qw(keys_to_keep json_path)) {
413            
414             # my %keys_to_keep;
415 6 100       46 next if ( !exists $ar->{$filter_name} );
416            
417             #print "_build_filter: $filter_name\n";
418 2         6 my $pos;
419             my %pos_seen;
420 2         0 my %key_seen;
421            
422 2         5 for my $ar ( @{ $self->{$filter_name} } ) {
  2         5  
423 3         7 $pos++;
424 3         5 for my $k (@$ar) {
425 3         9 $filter{$k} = $pos - 1;
426 3         8 $pos_seen{ $pos - 1 } = 0;
427 3         9 $key_seen{ $pos - 1 } = $k;
428             }
429            
430             }
431 2         5 $self->{pos_seen} = \%pos_seen;
432 2         4 $self->{key_seen} = \%key_seen;
433 2         6 $self->{$filter_name} = \%filter;
434             }
435            
436             }
437            
438             sub BUILD {
439            
440 3     3 0 25 my ( $self, $ar ) = @_;
441             croak "Can't use both keys_to_keep and json_path"
442 3 0 33     10 if ( $ar->{json_path} && $ar->{keys_to_keep} );
443 3         9 $self->_build_filter($ar);
444             }
445            
446             sub _crossref_get_request {
447 3     3   12 my ( $self, $path, $query_ar, %param ) = @_;
448 3 100       49 return 1 if ( $self->test_data() );
449 1 50       20 my $url = sprintf "%s%s%s", $self->baseurl,
450             $self->version ? "/" . $self->version : "", $path;
451            
452 1         3 my @params = ();
453            
454 1 50       4 if ($query_ar) {
455            
456 0         0 for my $p (@$query_ar) {
457            
458             #print "$p\n";
459 0         0 push @params, $p;
460            
461             }
462             }
463 1         5 for my $name ( keys %param ) {
464 2         3 my $value = $param{$name};
465            
466             #location:United Kingdom space is uri_escape twice
467             #push @params, uri_escape($name) . "=" . uri_escape($value) if ($value);
468 2 50       7 push @params, $name . "=" . $value if ($value);
469            
470             }
471 1 50       19 push @params, "rows=" . $self->rows if ( $self->rows );
472 1 50       11 push @params, "cursor=" . $self->cursor if ( $self->cursor );
473            
474             #if the first url as &offset=1 we missed the first item
475             #1 in page_start_at means "paginate with offset"
476             #offset : page_start_at -1
477 1 50 33     6 push @params, "offset=" . ( $self->page_start_at - 1 )
478             if ( $self->page_start_at && $self->page_start_at > $self->rows );
479 1 50       5 $url .= '?' . join( "&", @params ) if @params > 0;
480            
481             #die Dumper @params;
482             # The server asked us to sleep..
483 1 50       6 if ( $self->sleep > 0 ) {
484 0         0 $self->log->notice( "sleeping: " . $self->sleep . " seconds" );
485 0         0 sleep $self->sleep;
486 0         0 $self->sleep(0);
487             }
488 1         17 $self->log->notice(" ");
489 1         24 $self->log->notice("requesting: $url");
490 1         27 my $response = $self->client->GET($url);
491 1         721408 my $val = $response->responseHeader('Backoff');
492 1 50       61 my $backoff = defined $val ? $val : 0;
493 1         5 $val = $response->responseHeader('Retry-After');
494 1 50       47 my $retryAfter = defined $val ? $val : 0;
495 1         6 my $code = $response->responseCode();
496            
497 1         65 $self->log->notice("> Code: $code");
498 1         38 $self->log->notice("> Backoff: $backoff");
499 1         35 $self->log->notice("> Retry-After: $retryAfter");
500 1         14 for my $k (qw/X-Cached X-Content-Unchanged X-No-Server-Contact/) {
501 3 50       145 $self->log->notice( "> $k: " . $response->responseHeader($k) )
502             if $response->responseHeader($k);
503             }
504            
505 1 50 33     66 if ( $backoff > 0 ) {
    50          
506 0         0 $self->sleep($backoff);
507             }
508             elsif ( $code eq '429' || $code eq '503' ) {
509 0 0       0 $self->sleep( defined $retryAfter ? $retryAfter : 60 );
510 0         0 return;
511             }
512            
513 1         27 $self->log->notice( "> Content: " . $response->responseContent );
514            
515 1         399 $self->code($code);
516            
517 1 50       5 return unless $code eq '200';
518            
519 1         7 $response;
520             }
521            
522             sub _get_metadata {
523 2     2   7 my ( $self, $path, $query_ar, $filter, $select ) = @_;
524 2 100       44 $self->log->notice( "test_data: ",
525             ( $self->test_data() ? " 1 " : " 0 " ) );
526 2         2283 $self->page_start_at(0);
527 2         8 my $response =
528             $self->_crossref_get_request( $path,
529             $query_ar, ( filter => $filter, select => $select ) );
530            
531             # print Dumper $response;
532 2 50       18 return unless $response;
533            
534             #my $hr = decode_json $response->responseContent;
535            
536 2 100       43 my $hr =
537             $self->test_data()
538             ? $self->_decode_json( $self->test_data() )
539             : $self->_decode_json( $response->responseContent );
540 2         8 my $res_count = $hr->{message}->{'total-results'};
541            
542             #print $res_count;
543 2 100 66     14 if ( defined $res_count && $res_count > 0 ) {
544            
545             # my $keys;
546             # my $data_ar = $hr->{message}->{items};
547 1         2 my $returned_items = @{ $hr->{message}->{items} };
  1         4  
548 1         5 $self->_set_cursor( $hr->{message}, $returned_items );
549            
550             #push @$keys, "items";
551             #die $self->spit_raw_data;
552             #$self->_display_data($hr);
553             }
554 2         74 return $self->_display_data($hr);
555             }
556            
557             sub _get_page_metadata {
558 1     1   4 my ( $self, $path, $param_ar ) = @_;
559            
560 1         2 my $response;
561             my $out;
562 1         5 $self->cursor(undef);
563 1 50       4 if ($param_ar) {
564 0         0 my $filter = join( ",", @$param_ar );
565            
566 0         0 $response =
567             $self->_crossref_get_request( $path, undef,
568             ( filter => $filter ) );
569             }
570             else {
571 1         3 $response = $self->_crossref_get_request($path);
572             }
573            
574 1 50       11 if ($response) {
575            
576             #my $hr = decode_json $response->responseContent;
577 1 50       16 my $hr =
578             $self->test_data()
579             ? $self->_decode_json( $self->test_data() )
580             : $self->_decode_json( $response->responseContent );
581 1         3 my $res_count = $hr->{message}->{'total-results'};
582            
583 1 50       4 if ( defined $res_count ) {
584            
585             # print "from metadata: ", $res_count, "\n";
586 0 0       0 if ( $res_count > 0 ) {
587            
588             #die Dumper $hr->{message}->{items};
589 0         0 my $returned_items_count = @{ $hr->{message}->{items} };
  0         0  
590            
591 0         0 $self->{last_page_items_count} = $returned_items_count;
592 0         0 $out = $self->_display_data($hr);
593            
594             }
595             }
596             else { #singleton
597 1         3 $out = $self->_display_data($hr);
598            
599             }
600             }
601            
602 1         3 return $out;
603            
604             }
605            
606             sub _display_data {
607 3     3   13 my ( $self, $hr ) = @_;
608            
609 3 100       75 return $hr if ( $self->spit_raw_data );
610 2         28 my $formatter = REST::Client::CrossRef::Unfolder->new();
611            
612 2         4 my $data_ar;
613 2 50       18 if ( $hr->{message}->{items} ) {
614 0         0 $data_ar = $hr->{message}->{items};
615             }
616             else {
617 2         6 $data_ar = [ $hr->{message} ];
618             }
619            
620 2         4 my @data;
621 2 50       42 if ( defined $self->{json_path} ) {
    50          
622            
623 0         0 my %result;
624 0         0 my %keys = %{ $self->{json_path} };
  0         0  
625 0         0 my %selectors;
626 0         0 $JSON::Path::Safe=$self->json_path_safe;
627 0         0 for my $path ( keys %keys ) {
628            
629             #print $path, "\n";
630 0         0 $selectors{$path} = JSON::Path->new($path);
631             }
632            
633 0         0 for my $data_hr (@$data_ar) {
634            
635 0         0 for my $path ( keys %selectors ) {
636            
637             #my @val = $jpath->values( $hr->{message} );
638 0         0 my @val = $selectors{$path}->values($data_hr);
639            
640 0 0 0     0 if ( $self->{json_path_callback}
    0          
641             && $self->{json_path_callback}->{$path} )
642             {
643 0         0 my @data;
644 0         0 my $cb = $self->{json_path_callback}->{$path};
645 0         0 eval { @data = @{ $cb->( \@val ) }; };
  0         0  
  0         0  
646 0 0       0 croak "Json callback failed : $@\n" if ($@);
647 0         0 $result{$path} = join( "\n", @data );
648            
649             }
650             elsif (@val) {
651            
652 0         0 my %res_part;
653             %res_part =
654 0         0 %{ $formatter->_unfold_array( \@val, [$path] ) };
  0         0  
655 0         0 @result{ keys %res_part } = values %res_part;
656             }
657            
658             }
659            
660             push @data,
661 0         0 @{ $self->_sort_output( $self->{json_path}, \%result ) };
  0         0  
662             }
663            
664             }
665             elsif ( defined $self->{keys_to_keep} ) {
666            
667 2         4 my $new_ar;
668            
669             #$data_ar :array ref of rows items
670 2         8 $formatter->set_keys_to_keep( $self->{keys_to_keep} );
671            
672 2         5 for my $data_hr (@$data_ar) {
673            
674             #https://www.perlmonks.org/?node_id=1224994
675             #my $result_hr = {};
676             #$formatter->_unfold_hash($data_hr, undef, $result_hr);
677 2         6 my $result_hr = $formatter->_unfold_hash($data_hr);
678            
679             # $self->log->debug("display_data\n", Dumper $result_hr);
680             push @data,
681 2         19 @{ $self->_sort_output( $self->{keys_to_keep}, $result_hr ) };
  2         8  
682            
683             }
684            
685             }
686             else { #neither json_path nor keys_to_keep defined, spit_raw_data set to 0
687            
688 0         0 for my $data_hr (@$data_ar) {
689 0         0 my $val_hr = $formatter->_unfold_hash($data_hr);
690 0         0 my @keys;
691 0 0       0 if ( $self->{sort_output} ) {
692 0         0 @keys = sort { lc($a) cmp lc($b) } keys %$val_hr;
  0         0  
693             }
694             else {
695 0         0 @keys = keys %$val_hr;
696             }
697 0         0 for my $k (@keys) {
698 0         0 push @data, { $k, $val_hr->{$k} };
699             }
700            
701 0 0       0 push @data, undef if $self->add_end_flag;
702             }
703            
704             }
705 2         52 return \@data;
706             }
707            
708             sub _sort_output {
709 2     2   5 my ( $self, $filter_hr, $result_hr ) = @_;
710            
711 2         4 my @data;
712            
713 2 50       6 if ($filter_hr) {
714            
715 2         4 my %keys_to_keep = %{$filter_hr};
  2         8  
716 2         4 my %pos_seen = %{ $self->{pos_seen} };
  2         7  
717 2         4 my %key_seen = %{ $self->{key_seen} };
  2         6  
718 2         8 $pos_seen{$_} = 0 foreach ( keys %pos_seen );
719            
720 2         4 my @item_data;
721 2         18 for my $key ( keys %$result_hr ) {
722            
723 154         207 my $pos = $keys_to_keep{$key};
724 154 100       270 next unless defined $pos;
725            
726             # print "pos undef for $key\n" unless defined $pos;
727             #$key_unseen{$pos}= $key;
728 3         7 $pos_seen{$pos} = 1;
729             my $value =
730             ( defined $result_hr->{$key} )
731 3 50       8 ? $result_hr->{$key}
732             : "";
733 3         73 $self->log->debug( $key, " - ", $value );
734 3         103 $item_data[$pos] = { $key => $value };
735            
736             }
737            
738 2         13 my @unseen = grep { !$pos_seen{$_} } keys %pos_seen;
  3         9  
739            
740 2         7 for my $pos (@unseen) {
741 0         0 $item_data[$pos] = { $key_seen{$pos}, "" };
742            
743             }
744 2         4 push @data, @item_data;
745 2 100       54 push @data, undef if $self->add_end_flag;
746            
747             }
748             else {
749 0         0 my @keys;
750 0 0       0 if ( $self->{sort_output} ) {
751 0         0 @keys = sort { lc($a) cmp lc($b) } keys %$result_hr;
  0         0  
752             }
753             else {
754 0         0 @keys = keys %$result_hr;
755             }
756 0         0 for my $k (@keys) {
757            
758 0         0 push @data, { $k, $result_hr->{$k} };
759             }
760            
761 0 0       0 push @data, undef if $self->add_end_flag;
762             }
763            
764 2         35 return \@data;
765            
766             }
767            
768             =head2 C<$cr-Erows( $row_value )>
769            
770             Set the rows parameter that determines how many items are returned in one page
771            
772             =cut
773            
774             =head2 C<$cr-Eworks_from_doi( $doi )>
775            
776             Retrive the metadata from the work road (url ending with works) using the article's doi.
777             Return undef if the doi is not found.
778             You may pass a select string with the format "field1,field2,..." to return only these fields.
779             Fields that may be use for selection are (October 2018):
780             abstract, URL, member, posted, score, created, degree, update-policy, short-title, license, ISSN,
781             container-title, issued, update-to, issue, prefix, approved, indexed, article-number, clinical-trial-number,
782             accepted, author, group-title, DOI, is-referenced-by-count, updated-by, event, chair, standards-body, original-title,
783             funder, translator, archive, published-print, alternative-id, subject, subtitle, published-online, publisher-location,
784             content-domain, reference, title, link, type, publisher, volume, references-count, ISBN, issn-type, assertion,
785             deposited, page, content-created, short-container-title, relation, editor.
786             Use keys_to_keep or json_path to define an ordering in the ouptut. Use select to filter the fields to be returned from the server.
787            
788             =cut
789            
790             sub works_from_doi {
791 0     0 1 0 my ( $self, $doi, $select ) = @_;
792 0 0       0 croak "works_from_doi: need doi" unless defined $doi;
793 0         0 $self->_get_metadata( "/works", undef, "doi:$doi", $select );
794             }
795            
796             =head2 C<$cr-Ejournal_from_doi( $doi )>
797            
798             A shortcut for C
799            
800             =cut
801            
802             sub journal_from_doi {
803 0     0 1 0 my ( $self, $doi ) = @_;
804 0 0       0 croak "journal_from_doi: need doi" unless defined $doi;
805 0         0 $self->_get_metadata( "/works", undef, "doi:$doi",
806             "container-title,page,issued,volume,issue" );
807            
808             }
809            
810             =head2 C<$cr-Earticle_from_doi( $doi )>
811            
812             A shortcut for C
813            
814             =cut
815            
816             sub article_from_doi {
817 1     1 1 601 my ( $self, $doi ) = @_;
818 1 50       5 croak "article_from_doi: need doi" unless defined $doi;
819 1         6 $self->_get_metadata( "/works", undef, "doi:$doi",
820             "title,container-title,page,issued,volume,issue,author,published-print,published-online"
821             );
822             }
823            
824             =head2 C<$cr-Earticle_from_funder( $funder_id, {name=E'smith'}, $select )>
825            
826             Retrive the metadata from the works road for a given funder, searched with an author's name or orcid.
827             C<$select> default to "title,container-title,page,issued,volume,issue,published-print,DOI". Use * to retrieve all fields.
828            
829             =cut
830            
831             sub articles_from_funder {
832 0     0 0 0 my ( $self, $id, $href, $select ) = @_;
833            
834 0 0       0 croak "articles_from_funder: need funder id" unless defined $id;
835 0 0       0 $select = (
836             $select
837             ? $select
838             : "title,container-title,page,issued,volume,issue,published-print,DOI"
839             );
840 0 0       0 $self->{select} = $select eq "*" ? undef : $select;
841 0         0 $self->{path} = "/funders/$id/works";
842 0         0 $self->cursor("*");
843 0         0 for my $k ( keys %$href ) {
844 0 0       0 if ( $k eq "name" ) {
    0          
845 0         0 my $query = [ "query.author=" . uri_escape( $href->{$k} ) ];
846 0         0 $self->{param} = $query;
847            
848             return $self->_get_metadata( "/funders/$id/works", $query,
849 0         0 undef, $self->{select} );
850             }
851             elsif ( $k eq "orcid" ) {
852             my $url =
853             $href->{$k} =~ /^https*:\/\/orcid.org\//
854             ? $href->{$k}
855 0 0       0 : "http://orcid.org/" . $href->{$k};
856            
857             # $self->{param} = "orcid:" . uri_escape($url);
858 0         0 $self->{filter} = "orcid:" . uri_escape($url);
859             return $self->_get_metadata( "/funders/$id/works", undef,
860 0         0 $self->{filter}, $self->{select} );
861             }
862 0         0 else { croak "articles_from_funder : unknown key : $k"; }
863             }
864             return $self->_get_metadata( "/funders/$id/works", undef, undef,
865 0         0 $self->{select} );
866            
867             }
868            
869             =head2 C<$cr-Eget_types()>
870            
871             Retrieve all the metadata from the types road.
872            
873             =cut
874            
875             sub get_types {
876 1     1 1 6 my $self = shift;
877 1         3 $self->_get_metadata("/types");
878             }
879            
880             =head2 C<$cr-Eget_members()>
881            
882             Retrieve all the metadata (> 10'000 items) from the members road.
883            
884             =cut
885            
886             sub get_members {
887 0     0 1 0 my $self = shift;
888            
889 0         0 $self->page_start_at(1);
890 0         0 $self->{path} = "/members";
891 0         0 $self->_get_page_metadata("/members");
892            
893             }
894            
895             =head2 C<$cr-Emember_from_id( $member_id )>
896            
897             Retrieve a members from it's ID
898            
899             =cut
900            
901             sub member_from_id {
902 1     1 1 8 my ( $self, $id ) = @_;
903 1 50       4 croak "member_from_id: need id" unless ($id);
904 1         21 my $rows = $self->rows();
905 1         24 $self->rows(0);
906 1         14 my $rs = $self->_get_page_metadata("/members/$id");
907 1         21 $self->rows($rows);
908 1         11 return $rs;
909            
910             }
911            
912             =head2 C<$cr-Eget_journals()>
913            
914             Retrieve all the metadata (> 60'000 items) from the journals road.
915            
916             =cut
917            
918             sub get_journals {
919 0     0 1 0 my $self = shift;
920 0         0 $self->{path} = "/journals";
921 0         0 $self->page_start_at(1);
922 0         0 $self->_get_page_metadata("/journals");
923            
924             }
925            
926             =head2 C<$cr-Eget_licences()>
927            
928             Retrieve all the metadata (> 700 items) from the licenses road.
929            
930             =cut
931            
932             sub get_licences {
933 0     0 1 0 my $self = shift;
934 0         0 $self->{path} = "/licences";
935 0         0 $self->page_start_at(1);
936 0         0 $self->_get_page_metadata("/licences");
937            
938             }
939            
940             =head2 C<$cr-Equery_works( $fields_array_ref, $values_array_ref, $select_string )>
941            
942             See L for the fields that can be searched.
943             You may omit the "query." part in the field name.
944             The corresponding values are passed in a second array, in the same order.
945             Beware that searching with first and family name is treated as an OR not and AND:
946             C will retrieve all the works where and author has Tom in the name field or all works where an author has Smith in the name field.
947             See C above for the fields that can be selected.
948             Use keys_to_keep or json_path to define an ordering in the ouptut. Use select to filter the fields to be returned from the server.
949             =cut
950            
951             sub query_works {
952 0     0 1 0 my ( $self, $field_ar, $value_ar, $select ) = @_;
953 0         0 my $i;
954             my @params;
955 0         0 for my $field (@$field_ar) {
956 0 0       0 croak "unknown field $field"
957             unless ( $field
958             =~ /(?:container-)*title$|author$|editor$|chair$|translator$|contributor$|bibliographic$|affiliation$/
959             );
960 0 0       0 $field = "query." . $field unless ( $field =~ /^query\./ );
961 0         0 push @params, $field . "=" . uri_escape( $value_ar->[ $i++ ] );
962             }
963 0         0 $self->cursor("*");
964 0         0 $self->{path} = "/works";
965 0         0 $self->{param} = \@params;
966 0         0 $self->{select} = $select;
967 0         0 $self->_get_metadata( "/works", \@params, undef, $select );
968            
969             }
970            
971             =head2 C<$cr-Equery_articles( $fields_array_ref, $values_array_ref )>
972            
973             A shortcut for C<$cr-Equery_works($fields_array_ref, $values_array_ref, "title,container-title,page,issued,volume,issue,author,published-print,published-online")>
974            
975             =cut
976            
977             sub query_articles {
978 0     0 1 0 my ( $self, $field_ar, $value_ar ) = @_;
979 0         0 $self->query_works( $field_ar, $value_ar,
980             "title,container-title,page,issued,volume,issue,author,published-print,published-online"
981             );
982             }
983            
984             =head2 C<$cr-Equery_journals( $fields_array_ref, $values_array_ref )>
985            
986             A shortcut for C<$cr-Equery_works($fields_array_ref, $values_array_ref, "container-title,page,issued,volume,issue">
987            
988             =cut
989            
990             sub query_journals {
991 0     0 1 0 my ( $self, $field_ar, $value_ar ) = @_;
992 0         0 $self->query_works( $field_ar, $value_ar,
993             "container-title,page,issued,volume,issue" );
994            
995             }
996            
997             =head2 C<$cr-Eget_next()>
998            
999             Return the next set of data in the /works, /members, /journals, /funders, /licences roads,
1000             Return undef after the last set.
1001            
1002             =cut
1003            
1004             sub get_next {
1005 0     0 1 0 my $self = shift;
1006 0 0       0 $self->log->debug( "get_next cursor: ",
1007             ( defined $self->cursor ? " defined " : " undef" ) );
1008 0         0 $self->log->debug( "get_next page_start_at: ", $self->page_start_at );
1009            
1010 0 0       0 if ( $self->cursor ) {
1011             $self->_get_metadata(
1012             $self->{path}, $self->{param},
1013             $self->{filter}, $self->{select}
1014 0         0 );
1015             }
1016 0         0 my $last_start = $self->page_start_at;
1017            
1018             #as long as the count of items returned is equal to ->rows
1019             #there should be a next page to ask for: increment page_start_at to page_start_at + row
1020 0 0 0     0 if ( $last_start && $self->{last_page_items_count} >= $self->rows ) {
1021 0         0 $self->page_start_at( $last_start + $self->rows );
1022 0         0 $self->_get_page_metadata( $self->{path}, $self->{param} );
1023             }
1024            
1025             }
1026            
1027             =head2 C<$cr-Eagencies_from_dois( $dois_array_ref )>
1028            
1029             Retrieve the Registration agency (CrossRef, mEdra ...) using an array ref of article doi.
1030             L
1031            
1032             =cut
1033            
1034             sub agencies_from_dois {
1035 0     0 1 0 my ( $self, $dois_ar ) = @_;
1036 0         0 my @results;
1037            
1038             # die Dumper $dois_ar;
1039 0         0 my $rows = $self->rows;
1040 0         0 $self->rows(0);
1041 0         0 for my $doi (@$dois_ar) {
1042            
1043             #print "looking for $doi\n";
1044 0         0 my $response =
1045             $self->_crossref_get_request( "/works/" . $doi . "/agency" );
1046 0 0       0 if ($response) {
1047 0         0 my $hr = $self->_decode_json( $response->responseContent );
1048            
1049             # my @items = $hr->{message}->{items};
1050 0         0 my $res = $self->_display_data($hr);
1051 0 0       0 return $res if ($self->spit_raw_data);
1052 0         0 push @results, $res;
1053            
1054             }
1055            
1056             }
1057 0         0 $self->rows($rows);
1058            
1059 0         0 return \@results;
1060             }
1061            
1062             =head2 C<$cr-Efunders_from_location( $a_location_name )>
1063            
1064             Retrieve the funder from a country. Problem is that there is no way of having a list of country name used.
1065             These locations has been succefully tested: United Kingdom, Germany, Japan, Morocco, Switzerland, France.
1066            
1067             =cut
1068            
1069             sub funders_from_location {
1070 0     0 1 0 my ( $self, $loc ) = @_;
1071 0 0       0 croak "funders_from_location : need location" unless $loc;
1072 0         0 my $rows = $self->rows;
1073            
1074             #$self->rows(0);
1075 0         0 my $data;
1076             my @params;
1077 0         0 push @params, "location:" . uri_escape($loc);
1078 0         0 $self->page_start_at(1);
1079 0         0 $self->{path} = "/funders";
1080 0         0 $self->{param} = \@params;
1081 0         0 $self->{select} = undef;
1082 0         0 $self->_get_page_metadata( "/funders", \@params );
1083            
1084             #$self->rows($rows);
1085             #return $data;
1086             }
1087            
1088             sub _set_cursor {
1089 1     1   5 my ( $self, $msg_hr, $n_items ) = @_;
1090 1         7 my %msg = %$msg_hr;
1091 1 50 33     8 if ( exists $msg{'next-cursor'} && $n_items >= $self->rows ) {
1092            
1093             # print "_set_cursor: ", uri_escape( $msg{'next-cursor'} ), "\n";
1094 0         0 $self->cursor( uri_escape( $msg{'next-cursor'} ) );
1095             }
1096             else {
1097             # print "_set_cursor: undef\n";
1098 1         7 $self->cursor(undef);
1099             }
1100             }
1101            
1102             sub _decode_json {
1103 3     3   97 my ( $self, $json ) = @_;
1104 3         83 my $data = $self->decoder->decode($json);
1105 3         9 return $data;
1106            
1107             }
1108            
1109             package REST::Client::CrossRef::Unfolder;
1110            
1111             #use Data::Dumper;
1112 1     1   4813 use Carp;
  1         4  
  1         76  
1113 1     1   8 use Log::Any;
  1         2  
  1         8  
1114            
1115             sub new {
1116 2     2   5 my ($class) = shift;
1117 2         8 my $self = { logger => Log::Any->get_logger( category => "unfolder" ), };
1118 2         202 return bless $self, $class;
1119            
1120             }
1121            
1122             sub log {
1123 495     495   701 my $self = shift;
1124 495         1513 return $self->{logger};
1125             }
1126            
1127             # This setting of the array ref could be removed since the ordering in display_data
1128             # also remove the keys that are not wanted. But the hash builded is smaller
1129             # with adding only the key that are needed.
1130             sub set_keys_to_keep {
1131 2     2   6 my ( $self, $ar_ref ) = @_;
1132 2         9 $self->{keys_to_keep} = $ar_ref;
1133            
1134             }
1135            
1136             sub _unfold_hash {
1137 40     40   73 my ( $self, $raw_hr, $key_ar, $result_hr ) = @_;
1138            
1139 40 100       83 $self->log->debug( "unfold_hash1: ",
1140             ( $result_hr ? scalar %$result_hr : 0 ) );
1141 40         200 for my $k ( keys %$raw_hr ) {
1142            
1143             # $self->log->debug( "key: ", $k );
1144            
1145 214         340 push @$key_ar, $k;
1146            
1147 214 100       497 if ( ref $raw_hr->{$k} eq "HASH" ) {
    100          
1148            
1149             $result_hr =
1150 24         68 $self->_unfold_hash( $raw_hr->{$k}, $key_ar, $result_hr );
1151            
1152 24 50       46 $self->log->debug( "1 size ",
1153             $result_hr ? scalar %$result_hr : 0 );
1154             }
1155             elsif ( ref $raw_hr->{$k} eq "ARRAY" ) {
1156             $result_hr =
1157 30         59 $self->_unfold_array( $raw_hr->{$k}, $key_ar, $result_hr );
1158            
1159 30 50       50 $self->log->debug( "2 size ",
1160             $result_hr ? scalar %$result_hr : 0 );
1161            
1162             $result_hr->{ $key_ar->[$#$key_ar] } =~ s/,\s$//
1163 30 100       131 if ( defined $result_hr->{ $key_ar->[$#$key_ar] } );
1164            
1165             }
1166            
1167             else {
1168            
1169             $self->log->debug( "ref: ", ref $raw_hr->{$k} )
1170 160 100       303 if ( ref $raw_hr->{$k} );
1171 160         352 my $key = join( "/", @$key_ar );
1172            
1173 160 100 66     489 if ( defined $self->{keys_to_keep}
1174             && defined $self->{keys_to_keep}->{$key} )
1175             {
1176 1         3 $result_hr->{$key} = $raw_hr->{$k}
1177            
1178             }
1179             else {
1180             $self->log->debug( "key : ", $key, " value: ",
1181 159         318 $raw_hr->{$k} );
1182 159         610 $result_hr->{$key} = $raw_hr->{$k};
1183             }
1184            
1185             }
1186            
1187 214         484 my $tmp = pop @$key_ar;
1188            
1189             }
1190            
1191 40 50       80 $self->log->debug( "_unfold_hash3: ",
1192             $result_hr ? scalar(%$result_hr) : 0 );
1193 40         149 return $result_hr;
1194             }
1195            
1196             sub _unfold_array {
1197 48     48   92 my ( $self, $ar, $key_ar, $res_hr ) = @_;
1198            
1199 48 50       79 $self->log->debug( "_unfold_array0: ", $res_hr ? scalar(%$res_hr) : 0 );
1200 48         138 my $last_key = join( "/", @{$key_ar} );
  48         97  
1201 48         78 my $key = $key_ar->[$#$key_ar];
1202            
1203 48         107 $self->log->debug( "_unfold array1 key: ", $key );
1204 48 100       152 if ( $key eq "author" ) {
1205 1         29 my @first;
1206             my @groups;
1207 1         0 my $first;
1208 1         0 my @all;
1209 1         4 for my $aut (@$ar) {
1210 2 100       7 if ( $aut->{sequence} eq 'first' ) {
1211 1 50       4 if ( $aut->{family} ) {
    0          
1212             $first =
1213             "\n"
1214             . $aut->{family}
1215             . (
1216             defined $aut->{given} ? ", " . $aut->{given} : " " )
1217 1 50       7 . $self->_unfold_affiliation( $aut->{affiliation} );
1218 1         2 push @first, $first;
1219             }
1220             elsif ( $aut->{name} ) {
1221             $first = "\n"
1222             . $aut->{name}
1223 0         0 . $self->_unfold_affiliation( $aut->{affiliation} );
1224 0         0 push @groups, $first;
1225            
1226             }
1227            
1228             }
1229             else {
1230 1 50       3 if ( $aut->{family} ) {
    0          
1231             push @all,
1232             "\n"
1233             . $aut->{family}
1234             . (
1235             defined $aut->{given} ? ", " . $aut->{given} : " " )
1236 1 50       6 . $self->_unfold_affiliation( $aut->{affiliation} );
1237             }
1238             elsif ( $aut->{name} ) {
1239             push @groups,
1240             "\n"
1241             . $aut->{name}
1242 0         0 . $self->_unfold_affiliation( $aut->{affiliation} );
1243            
1244             }
1245             }
1246            
1247             }
1248            
1249 1         20 unshift @all, @first;
1250 1         4 unshift @all, @groups;
1251 1         5 $res_hr->{$key} = join( "", @all );
1252            
1253             }
1254            
1255             else {
1256            
1257 47         79 for my $val (@$ar) {
1258            
1259 88 100       203 if ( ref $val eq "HASH" ) {
    100          
1260 14         28 $res_hr = $self->_unfold_hash( $val, $key_ar, $res_hr );
1261 14         20 my $last = $#$key_ar;
1262             $res_hr->{ $key_ar->[$last] } =~ s/,\s$//
1263 14 50       31 if ( defined $res_hr->{ $key_ar->[$last] } );
1264            
1265 14 50       28 $self->log->debug( "_unfold_array2: ",
1266             $res_hr ? scalar(%$res_hr) : 0 );
1267             }
1268             elsif ( ref $val eq "ARRAY" ) {
1269 18         59 $res_hr = $self->_unfold_array( $val, $key_ar, $res_hr );
1270            
1271 18 50       32 $self->log->debug( "_unfold_array3: ",
1272             $res_hr ? scalar(%$res_hr) : 0 );
1273            
1274             }
1275             else {
1276            
1277 56 100 66     181 if ( defined $self->{keys_to_keep}
1278             && defined $self->{keys_to_keep}->{$last_key} )
1279             {
1280 2 50       5 if ( defined $val ) {
1281 2         7 $res_hr->{$last_key} .= $val . ", ";
1282             }
1283             else {
1284 0         0 $res_hr->{$last_key} = "";
1285             }
1286            
1287             }
1288             else {
1289 54         129 $res_hr->{$last_key} .= $val;
1290             }
1291            
1292             }
1293             } #for
1294            
1295             }
1296            
1297 48 50       114 $self->log->debug( "_unfold_array4: ", $res_hr ? scalar(%$res_hr) : 0 );
1298 48         184 return $res_hr;
1299             }
1300            
1301             sub _unfold_affiliation {
1302 2     2   5 my ( $self, $ar ) = @_;
1303 2         4 my $line = ";";
1304 2         3 my @aff;
1305 2         5 for my $hr (@$ar) {
1306            
1307             # my @k = keys %$hr;
1308 0         0 my @aff = values %$hr;
1309 0         0 $aff[0] =~ s/\r/ /g;
1310 0         0 $line .= " " . $aff[0];
1311             }
1312            
1313 2         7 return $line;
1314             }
1315            
1316             =head1 INSTALLATION
1317            
1318             To install this module type the following:
1319             perl Makefile.PL
1320             make
1321             make test
1322             make install
1323            
1324             On windows use nmake or dmake instead of make.
1325            
1326             =head1 DEPENDENCIES
1327            
1328             The following modules are required in order to use this one
1329            
1330             Moo => 2,
1331             JSON => 2.90,
1332             URI::Escape => 3.31,
1333             REST::Client => 273,
1334             Log::Any => 1.049,
1335             HTTP::Cache::Transparent => 1.4,
1336             Carp => 1.40,
1337             JSON::Path => 0.420
1338            
1339             =head1 BUGS
1340            
1341             See below.
1342            
1343             =head1 SUPPORT
1344            
1345             Any questions or problems can be posted to me (rappazf) on my gmail account.
1346            
1347             The current state of the source can be extract using Mercurial from
1348             L
1349            
1350             =head1 AUTHOR
1351            
1352             F. Rappaz
1353             CPAN ID: RAPPAZF
1354            
1355             =head1 COPYRIGHT
1356            
1357             This program is free software; you can redistribute
1358             it and/or modify it under the same terms as Perl itself.
1359            
1360             The full text of the license can be found in the
1361             LICENSE file included with this module.
1362            
1363             =head1 SEE ALSO
1364            
1365             L Catmandu is a toolframe, *nix oriented.
1366            
1367             L Import data from CrossRef using the CrossRef search, not the REST Api, and convert the XML result into something simpler.
1368            
1369             =cut
1370            
1371             1;
1372