File Coverage

blib/lib/ETL/Pipeline/Input/XmlFiles.pm

Criterion	Covered	Total	%
statement	30	30	100.0
branch	1	2	50.0
condition	1	3	33.3
subroutine	8	8	100.0
pod	1	1	100.0
total	41	44	93.1

line	stmt	bran	cond	sub	pod	time	code
1							=pod
2
3							=head1 NAME
4
5							ETL::Pipeline::Input::XmlFiles - Process XML content from individual files
6
7							=head1 SYNOPSIS
8
9							use ETL::Pipeline;
10							ETL::Pipeline->new( {
11							input => ['XmlFiles', iname => qr/\.xlsx$/i, records_at => '/Xml'],
12							mapping => {First => '/File/A', Second => '/File/Patient'},
13							output => ['UnitTest']
14							} )->process;
15
16							=head1 DESCRIPTION
17
18							B<ETL::Pipeline::Input::XmlFiles> defines an input source that reads one or more
19							records from one or more XML files. Most of the time, there should be one record
20							per file. But the class handles multiple records per file too.
21
22							=cut
23
24							package ETL::Pipeline::Input::XmlFiles;
25
26	1			1		33	use 5.014000;
	1					3
27	1			1		6	use warnings;
	1					1
	1					37
28
29	1			1		6	use Carp;
	1					1
	1					87
30	1			1		6	use Data::DPath qw/dpath/;
	1					2
	1					29
31	1			1		977	use XML::Bare;
	1					7022
	1					82
32	1			1		14	use Moose;
	1					2
	1					14
33
34
35							our $VERSION = '2.00';
36
37
38							=head1 METHODS & ATTRIBUTES
39
40							=head2 Arguments for L<ETL::Pipeline/input>
41
42							=head3 records_at
43
44							Optional. The path to the record nodes, such as C</XMLDATA/Root/Record>. The
45							last item in the list is the name of the root for each individual record. The
46							default is B</> - one record in the file.
47
48							You might use this attribute in two cases...
49
50							=over
51
52							=item 1. Multiple records per file. This is the top of each record, like in L<ETL::Pipeline::Input::Xml>.
53
54							=item 2. Shorthand to leave off extra nodes from every path. One record per file, but you don't want extra path parts on the beginning of every field.
55
56							=back
57
58							This can be any value accepted by L<Data::DPath>. Fortunately, L<Data::Dpath>
59							takes paths that look like XPath for XML.
60
61							=cut
62
63							has 'records_at' => (
64							default => '/',
65							is => 'ro',
66							isa => 'Str',
67							);
68
69
70							=head3 skipping
71
72							Not used. This attribute is ignored. XML files must follow specific formatting
73							rules. Extra rows are parsed as data. There's nothing to skip.
74
75							=head2 Methods
76
77							=head3 run
78
79							This is the main loop. It opens the file, reads records, and closes it when
80							done. This is the place to look if there are problems.
81
82							L<ETL::Pipeline> automatically calls this method.
83
84							=cut
85
86							sub run {
87	3			3	1	10	my ($self, $etl) = @_;
88
89	3					19	while (my $path = $self->next_path( $etl )) {
90							# Load the XML file and turn it into a Perl hash.
91	6					47	my $parser = XML::Bare->new( file => "$path" );
92	6					2519	my $xml = $parser->parse;
93
94							# Find the node that is an array of records. dpath should return a list
95							# with one array reference. And that array has the actual records. But I
96							# check, just in case your XML is structured a little differently.
97							#
98							# XML should generate hashes - field/value pairs. In theory, there might
99							# be an XML file that sends back a single record as an array reference.
100							# Not likely when transfering data.
101	6					3275	my @matches = dpath( $self->records_at )->match( $xml );
102	6	50	33			2454	my $list = (scalar( @matches ) == 1 && ref( $matches[0] ) eq 'ARRAY') ? $matches[0] : \@matches;
103
104							# Process each record. And that's it.
105	6					201	my $source = $self->source;
106	6					26	foreach my $record (@$list) {
107	6					206	$self->source( sprintf( '%s character %d', $source, $record->{_pos} ) );
108	6					50	$etl->record( $record );
109							}
110							}
111							}
112
113
114							=head1 SEE ALSO
115
116							L<ETL::Pipeline>, L<ETL::Pipeline::Input>, L<ETL::Pipeline::Input::File::List>,
117							L<XML::Bare>
118
119							=cut
120
121							with 'ETL::Pipeline::Input';
122							with 'ETL::Pipeline::Input::File::List';
123
124
125							=head1 AUTHOR
126
127							Robert Wohlfarth <robert.j.wohlfarth@vumc.org>
128
129							=head1 LICENSE
130
131							Copyright 2021 (c) Vanderbilt University Medical Center
132
133							This program is free software; you can redistribute it and/or modify it under
134							the same terms as Perl itself.
135
136							=cut
137
138	1			1		7448	no Moose;
	1					3
	1					8
139							__PACKAGE__->meta->make_immutable;