File Coverage

blib/lib/ETL/Pipeline/Input/Xml.pm
Criterion Covered Total %
statement 20 20 100.0
branch n/a
condition n/a
subroutine 7 7 100.0
pod n/a
total 27 27 100.0


line stmt bran cond sub pod time code
1             =pod
2              
3             =head1 NAME
4              
5             ETL::Pipeline::Input::Xml - Records from an XML file
6              
7             =head1 SYNOPSIS
8              
9             use ETL::Pipeline;
10             ETL::Pipeline->new( {
11             input => ['Xml', iname => 'Data.xml', root => '/Root'],
12             mapping => {Name => 'Name', Address => 'Address'},
13             output => ['UnitTest']
14             } )->process;
15              
16             =head1 DESCRIPTION
17              
18             B<ETL::Pipeline::Input::Xml> defines an input source that reads multiple records
19             from a single XML file. Individual records are repeating subnodes under
20             L</root>.
21              
22             =cut
23              
24             package ETL::Pipeline::Input::Xml;
25              
26 1     1   30 use 5.014000;
  1         4  
27 1     1   6 use warnings;
  1         3  
  1         36  
28              
29 1     1   5 use Carp;
  1         2  
  1         89  
30 1     1   6 use Data::DPath qw/dpath/;
  1         2  
  1         13  
31 1     1   447 use Moose;
  1         2  
  1         12  
32 1     1   8342 use XML::Bare;
  1         7484  
  1         282  
33              
34              
35             our $VERSION = '3.00';
36              
37              
38             =head1 METHODS & ATTRIBUTES
39              
40             =head2 Arguments for L<ETL::Pipeline/input>
41              
42             =head3 records_at
43              
44             Required. The path to the record nodes, such as C</XMLDATA/Root/Record>. The
45             last item in the list is the name of the root for each individual record. The
46             code loops over all of these nodes.
47              
48             This can be any value accepted by L<Data::DPath>. Fortunately, L<Data::Dpath>
49             takes paths that look like XPath for XML.
50              
51             =cut
52              
53             has 'records_at' => (
54             default => '/',
55             is => 'ro',
56             isa => 'Str',
57             );
58              
59              
60             =head3 skipping
61              
62             Not used. This attribute is ignored. XML files must follow specific formatting
63             rules. Extra rows are parsed as data. There's nothing to skip.
64              
65             =head2 Methods
66              
67             =head3 run
68              
69             This is the main loop. It opens the file, reads records, and closes it when
70             done. This is the place to look if there are problems.
71              
72             L<ETL::Pipeline> automatically calls this method.
73              
74             =cut
75              
76             sub run {
77             my ($self, $etl) = @_;
78              
79             my $path = $self->path;
80              
81             # Load the XML file and turn it into a Perl hash.
82             my $parser = XML::Bare->new( file => "$path" );
83             my $xml = $parser->parse;
84              
85             # Find the node that is an array of records. dpath should return a list with
86             # one array reference. And that array has the actual records. But I check,
87             # just in case your XML is structured a little differently.
88             #
89             # XML should generate hashes - field/value pairs. In theory, there might be
90             # an XML file that sends back a single record as an array reference. Not
91             # likely when transfering data.
92             my @matches = dpath( $self->records_at )->match( $xml );
93             my $list = (scalar( @matches ) == 1 && ref( $matches[0] ) eq 'ARRAY') ? $matches[0] : \@matches;
94              
95             # Process each record. And that's it.
96             my $source = $self->source;
97             foreach my $record (@$list) {
98             $self->source( sprintf( '%s character %d', $source, $record->{_pos} ) );
99             $etl->record( $record );
100             }
101             }
102              
103              
104             =head1 SEE ALSO
105              
106             L<ETL::Pipeline>, L<ETL::Pipeline::Input>, L<ETL::Pipeline::Input::File>,
107             L<XML::Bare>
108              
109             =cut
110              
111             with 'ETL::Pipeline::Input';
112             with 'ETL::Pipeline::Input::File';
113              
114              
115             =head1 AUTHOR
116              
117             Robert Wohlfarth <robert.j.wohlfarth@vumc.org>
118              
119             =head1 LICENSE
120              
121             Copyright 2021 (c) Vanderbilt University Medical Center
122              
123             This program is free software; you can redistribute it and/or modify it under
124             the same terms as Perl itself.
125              
126             =cut
127              
128 1     1   13 no Moose;
  1         3  
  1         13  
129             __PACKAGE__->meta->make_immutable;