File Coverage

blib/lib/ETL/Pipeline/Input/JsonFiles.pm

Criterion	Covered	Total	%
statement	29	29	100.0
branch	3	4	75.0
condition	2	3	66.6
subroutine	8	8	100.0
pod	1	1	100.0
total	43	45	95.5

line	stmt	bran	cond	sub	pod	time	code
1							=pod
2
3							=head1 NAME
4
5							ETL::Pipeline::Input::JsonFiles - Process JSON content from individual files
6
7							=head1 SYNOPSIS
8
9							use ETL::Pipeline;
10							ETL::Pipeline->new( {
11							input => ['JsonFiles', iname => qr/\.json$/i, records_at => '/json'],
12							mapping => {First => '/File/A', Second => '/File/Patient'},
13							output => ['UnitTest']
14							} )->process;
15
16							=head1 DESCRIPTION
17
18							B<ETL::Pipeline::Input::JsonFiles> defines an input source that reads one or
19							more records from one or more JSON files. Most of the time, there should be one
20							record per file. But the class handles multiple records per file too.
21
22							=cut
23
24							package ETL::Pipeline::Input::JsonFiles;
25
26	1			1		20	use 5.014000;
	1					3
27	1			1		6	use warnings;
	1					2
	1					25
28
29	1			1		6	use Carp;
	1					1
	1					61
30	1			1		7	use Data::DPath qw/dpath/;
	1					2
	1					9
31	1			1		1111	use JSON;
	1					8348
	1					5
32	1			1		133	use Moose;
	1					3
	1					8
33
34
35							our $VERSION = '2.00';
36
37
38							=head1 METHODS & ATTRIBUTES
39
40							=head2 Arguments for L<ETL::Pipeline/input>
41
42							=head3 records_at
43
44							Optional. The path to the record nodes, such as C</json/Record>. The
45							last item in the list is the name of the root for each individual record. The
46							default is B</> - one record in the file.
47
48							You might use this attribute in two cases...
49
50							=over
51
52							=item 1. Multiple records per file. This is the top of each record, like in L<ETL::Pipeline::Input::Xml>.
53
54							=item 2. Shorthand to leave off extra nodes from every path. One record per file, but you don't want extra path parts on the beginning of every field.
55
56							=back
57
58							This can be any value accepted by L<Data::DPath>.
59
60							=cut
61
62							has 'records_at' => (
63							default => '/',
64							is => 'ro',
65							isa => 'Str',
66							);
67
68
69							=head3 skipping
70
71							Not used. This attribute is ignored. JSON files must follow specific formatting
72							rules. Extra rows are parsed as data. There's nothing to skip.
73
74							=head2 Methods
75
76							=head3 run
77
78							This is the main loop. It opens the file, reads records, and closes it when
79							done. This is the place to look if there are problems.
80
81							L<ETL::Pipeline> automatically calls this method.
82
83							=cut
84
85							sub run {
86	3			3	1	8	my ($self, $etl) = @_;
87
88	3					44	my $parser = JSON->new->utf8;
89	3					13	while (my $path = $self->next_path( $etl )) {
90	6					55	my $text = $path->slurp; # Force scalar context, otherwise slurp breaks it into lines.
91	6					1534	my $json = $parser->decode( $text );
92	6	50				22	croak "JSON file '$path', unable to parse" unless defined $json;
93
94							# Find the node that is an array of records. This comes from the
95							# "records_at" attribute.
96							#
97							# I assume that records are field/value pairs - a Perl hash. So if the
98							# DPath matches an array, it is an array of record. I need to
99							# de-reference that list to get to the actual records.
100	6					237	my @matches = dpath( $self->records_at )->match( $json );
101	6	100	66			1242	my $list = (scalar( @matches ) == 1 && ref( $matches[0] ) eq 'ARRAY') ? $matches[0] : \@matches;
102
103							# Process each record. And that's it. The record is a Perl data
104							# structure corresponding with the JSON structure.
105	6					28	$etl->record( $_ ) foreach (@$list);
106							}
107							}
108
109
110							=head1 SEE ALSO
111
112							L<ETL::Pipeline>, L<ETL::Pipeline::Input>, L<ETL::Pipeline::Input::File::List>,
113							L<JSON>
114
115							=cut
116
117							with 'ETL::Pipeline::Input';
118							with 'ETL::Pipeline::Input::File::List';
119
120
121							=head1 AUTHOR
122
123							Robert Wohlfarth <robert.j.wohlfarth@vumc.org>
124
125							=head1 LICENSE
126
127							Copyright 2021 (c) Vanderbilt University Medical Center
128
129							This program is free software; you can redistribute it and/or modify it under
130							the same terms as Perl itself.
131
132							=cut
133
134	1			1		7576	no Moose;
	1					2
	1					5
135							__PACKAGE__->meta->make_immutable;