line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=pod |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
ETL::Pipeline::Input::Xml - Records from an XML file |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use ETL::Pipeline; |
10
|
|
|
|
|
|
|
ETL::Pipeline->new( { |
11
|
|
|
|
|
|
|
input => ['Xml', iname => 'Data.xml', root => '/Root'], |
12
|
|
|
|
|
|
|
mapping => {Name => 'Name', Address => 'Address'}, |
13
|
|
|
|
|
|
|
output => ['UnitTest'] |
14
|
|
|
|
|
|
|
} )->process; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 DESCRIPTION |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
B<ETL::Pipeline::Input::Xml> defines an input source that reads multiple records |
19
|
|
|
|
|
|
|
from a single XML file. Individual records are repeating subnodes under |
20
|
|
|
|
|
|
|
L</root>. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=cut |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
package ETL::Pipeline::Input::Xml; |
25
|
|
|
|
|
|
|
|
26
|
1
|
|
|
1
|
|
30
|
use 5.014000; |
|
1
|
|
|
|
|
4
|
|
27
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
36
|
|
28
|
|
|
|
|
|
|
|
29
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
89
|
|
30
|
1
|
|
|
1
|
|
6
|
use Data::DPath qw/dpath/; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
13
|
|
31
|
1
|
|
|
1
|
|
447
|
use Moose; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
12
|
|
32
|
1
|
|
|
1
|
|
8342
|
use XML::Bare; |
|
1
|
|
|
|
|
7484
|
|
|
1
|
|
|
|
|
282
|
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
our $VERSION = '3.00'; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 METHODS & ATTRIBUTES |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head2 Arguments for L<ETL::Pipeline/input> |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head3 records_at |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Required. The path to the record nodes, such as C</XMLDATA/Root/Record>. The |
45
|
|
|
|
|
|
|
last item in the list is the name of the root for each individual record. The |
46
|
|
|
|
|
|
|
code loops over all of these nodes. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
This can be any value accepted by L<Data::DPath>. Fortunately, L<Data::Dpath> |
49
|
|
|
|
|
|
|
takes paths that look like XPath for XML. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=cut |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
has 'records_at' => ( |
54
|
|
|
|
|
|
|
default => '/', |
55
|
|
|
|
|
|
|
is => 'ro', |
56
|
|
|
|
|
|
|
isa => 'Str', |
57
|
|
|
|
|
|
|
); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head3 skipping |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Not used. This attribute is ignored. XML files must follow specific formatting |
63
|
|
|
|
|
|
|
rules. Extra rows are parsed as data. There's nothing to skip. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head2 Methods |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head3 run |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
This is the main loop. It opens the file, reads records, and closes it when |
70
|
|
|
|
|
|
|
done. This is the place to look if there are problems. |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
L<ETL::Pipeline> automatically calls this method. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=cut |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub run { |
77
|
|
|
|
|
|
|
my ($self, $etl) = @_; |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
my $path = $self->path; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# Load the XML file and turn it into a Perl hash. |
82
|
|
|
|
|
|
|
my $parser = XML::Bare->new( file => "$path" ); |
83
|
|
|
|
|
|
|
my $xml = $parser->parse; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# Find the node that is an array of records. dpath should return a list with |
86
|
|
|
|
|
|
|
# one array reference. And that array has the actual records. But I check, |
87
|
|
|
|
|
|
|
# just in case your XML is structured a little differently. |
88
|
|
|
|
|
|
|
# |
89
|
|
|
|
|
|
|
# XML should generate hashes - field/value pairs. In theory, there might be |
90
|
|
|
|
|
|
|
# an XML file that sends back a single record as an array reference. Not |
91
|
|
|
|
|
|
|
# likely when transfering data. |
92
|
|
|
|
|
|
|
my @matches = dpath( $self->records_at )->match( $xml ); |
93
|
|
|
|
|
|
|
my $list = (scalar( @matches ) == 1 && ref( $matches[0] ) eq 'ARRAY') ? $matches[0] : \@matches; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# Process each record. And that's it. |
96
|
|
|
|
|
|
|
my $source = $self->source; |
97
|
|
|
|
|
|
|
foreach my $record (@$list) { |
98
|
|
|
|
|
|
|
$self->source( sprintf( '%s character %d', $source, $record->{_pos} ) ); |
99
|
|
|
|
|
|
|
$etl->record( $record ); |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head1 SEE ALSO |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
L<ETL::Pipeline>, L<ETL::Pipeline::Input>, L<ETL::Pipeline::Input::File>, |
107
|
|
|
|
|
|
|
L<XML::Bare> |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=cut |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
with 'ETL::Pipeline::Input'; |
112
|
|
|
|
|
|
|
with 'ETL::Pipeline::Input::File'; |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 AUTHOR |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Robert Wohlfarth <robert.j.wohlfarth@vumc.org> |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head1 LICENSE |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
Copyright 2021 (c) Vanderbilt University Medical Center |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
124
|
|
|
|
|
|
|
the same terms as Perl itself. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=cut |
127
|
|
|
|
|
|
|
|
128
|
1
|
|
|
1
|
|
13
|
no Moose; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
13
|
|
129
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |