line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=pod |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
ETL::Pipeline::Input::JsonFiles - Process JSON content from individual files |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use ETL::Pipeline; |
10
|
|
|
|
|
|
|
ETL::Pipeline->new( { |
11
|
|
|
|
|
|
|
input => ['JsonFiles', iname => qr/\.json$/i, records_at => '/json'], |
12
|
|
|
|
|
|
|
mapping => {First => '/File/A', Second => '/File/Patient'}, |
13
|
|
|
|
|
|
|
output => ['UnitTest'] |
14
|
|
|
|
|
|
|
} )->process; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 DESCRIPTION |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
B<ETL::Pipeline::Input::JsonFiles> defines an input source that reads one or |
19
|
|
|
|
|
|
|
more records from one or more JSON files. Most of the time, there should be one |
20
|
|
|
|
|
|
|
record per file. But the class handles multiple records per file too. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=cut |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
package ETL::Pipeline::Input::JsonFiles; |
25
|
|
|
|
|
|
|
|
26
|
1
|
|
|
1
|
|
20
|
use 5.014000; |
|
1
|
|
|
|
|
3
|
|
27
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
28
|
|
|
|
|
|
|
|
29
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
61
|
|
30
|
1
|
|
|
1
|
|
7
|
use Data::DPath qw/dpath/; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
9
|
|
31
|
1
|
|
|
1
|
|
1111
|
use JSON; |
|
1
|
|
|
|
|
8348
|
|
|
1
|
|
|
|
|
5
|
|
32
|
1
|
|
|
1
|
|
133
|
use Moose; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
8
|
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
our $VERSION = '2.00'; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 METHODS & ATTRIBUTES |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head2 Arguments for L<ETL::Pipeline/input> |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head3 records_at |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Optional. The path to the record nodes, such as C</json/Record>. The |
45
|
|
|
|
|
|
|
last item in the list is the name of the root for each individual record. The |
46
|
|
|
|
|
|
|
default is B</> - one record in the file. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
You might use this attribute in two cases... |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=over |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=item 1. Multiple records per file. This is the top of each record, like in L<ETL::Pipeline::Input::Xml>. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=item 2. Shorthand to leave off extra nodes from every path. One record per file, but you don't want extra path parts on the beginning of every field. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=back |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
This can be any value accepted by L<Data::DPath>. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=cut |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
has 'records_at' => ( |
63
|
|
|
|
|
|
|
default => '/', |
64
|
|
|
|
|
|
|
is => 'ro', |
65
|
|
|
|
|
|
|
isa => 'Str', |
66
|
|
|
|
|
|
|
); |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head3 skipping |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Not used. This attribute is ignored. JSON files must follow specific formatting |
72
|
|
|
|
|
|
|
rules. Extra rows are parsed as data. There's nothing to skip. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 Methods |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head3 run |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
This is the main loop. It opens the file, reads records, and closes it when |
79
|
|
|
|
|
|
|
done. This is the place to look if there are problems. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
L<ETL::Pipeline> automatically calls this method. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=cut |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub run { |
86
|
3
|
|
|
3
|
1
|
8
|
my ($self, $etl) = @_; |
87
|
|
|
|
|
|
|
|
88
|
3
|
|
|
|
|
44
|
my $parser = JSON->new->utf8; |
89
|
3
|
|
|
|
|
13
|
while (my $path = $self->next_path( $etl )) { |
90
|
6
|
|
|
|
|
55
|
my $text = $path->slurp; # Force scalar context, otherwise slurp breaks it into lines. |
91
|
6
|
|
|
|
|
1534
|
my $json = $parser->decode( $text ); |
92
|
6
|
50
|
|
|
|
22
|
croak "JSON file '$path', unable to parse" unless defined $json; |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# Find the node that is an array of records. This comes from the |
95
|
|
|
|
|
|
|
# "records_at" attribute. |
96
|
|
|
|
|
|
|
# |
97
|
|
|
|
|
|
|
# I assume that records are field/value pairs - a Perl hash. So if the |
98
|
|
|
|
|
|
|
# DPath matches an array, it is an array of record. I need to |
99
|
|
|
|
|
|
|
# de-reference that list to get to the actual records. |
100
|
6
|
|
|
|
|
237
|
my @matches = dpath( $self->records_at )->match( $json ); |
101
|
6
|
100
|
66
|
|
|
1242
|
my $list = (scalar( @matches ) == 1 && ref( $matches[0] ) eq 'ARRAY') ? $matches[0] : \@matches; |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# Process each record. And that's it. The record is a Perl data |
104
|
|
|
|
|
|
|
# structure corresponding with the JSON structure. |
105
|
6
|
|
|
|
|
28
|
$etl->record( $_ ) foreach (@$list); |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head1 SEE ALSO |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
L<ETL::Pipeline>, L<ETL::Pipeline::Input>, L<ETL::Pipeline::Input::File::List>, |
113
|
|
|
|
|
|
|
L<JSON> |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
with 'ETL::Pipeline::Input'; |
118
|
|
|
|
|
|
|
with 'ETL::Pipeline::Input::File::List'; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head1 AUTHOR |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Robert Wohlfarth <robert.j.wohlfarth@vumc.org> |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 LICENSE |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
Copyright 2021 (c) Vanderbilt University Medical Center |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
130
|
|
|
|
|
|
|
the same terms as Perl itself. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=cut |
133
|
|
|
|
|
|
|
|
134
|
1
|
|
|
1
|
|
7576
|
no Moose; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5
|
|
135
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |