line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=pod |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
ETL::Pipeline::Input::File::List - Role for input sources with multiple files |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# In the input source... |
10
|
|
|
|
|
|
|
use Moose; |
11
|
|
|
|
|
|
|
with 'ETL::Pipeline::Input'; |
12
|
|
|
|
|
|
|
with 'ETL::Pipeline::Input::File::List'; |
13
|
|
|
|
|
|
|
... |
14
|
|
|
|
|
|
|
sub run { |
15
|
|
|
|
|
|
|
my ($self, $etl) = @_; |
16
|
|
|
|
|
|
|
... |
17
|
|
|
|
|
|
|
while (my $path = $self->next_path( $etl )) { |
18
|
|
|
|
|
|
|
... |
19
|
|
|
|
|
|
|
} |
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 DESCRIPTION |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
This is a role used by input sources. It defines everything you need to process |
25
|
|
|
|
|
|
|
multiple input files of the same format. The role uses L<Path::Class::Rule> to |
26
|
|
|
|
|
|
|
locate matching files. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Your input source calls the L</next_path> method in a loop. That's it. The role |
29
|
|
|
|
|
|
|
automatically processes constructor arguments that match L<Path::Class::Rule> |
30
|
|
|
|
|
|
|
criteria. It then builds a list of matching files the first time your code calls |
31
|
|
|
|
|
|
|
L</next_path>. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=cut |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
package ETL::Pipeline::Input::File::List; |
36
|
|
|
|
|
|
|
|
37
|
4
|
|
|
4
|
|
4718
|
use 5.014000; |
|
4
|
|
|
|
|
13
|
|
38
|
|
|
|
|
|
|
|
39
|
4
|
|
|
4
|
|
32
|
use Carp; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
282
|
|
40
|
4
|
|
|
4
|
|
21
|
use Moose::Role; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
45
|
|
41
|
4
|
|
|
4
|
|
19783
|
use MooseX::Types::Path::Class; |
|
4
|
|
|
|
|
11
|
|
|
4
|
|
|
|
|
76
|
|
42
|
4
|
|
|
4
|
|
3725
|
use Path::Class::Rule; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
1981
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
our $VERSION = '3.00'; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 METHODS & ATTRIBUTES |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head2 Arguments for L<ETL::Pipeline/input> |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
B<ETL::Pipeline::Input::File::List> accepts any of the tests provided by |
53
|
|
|
|
|
|
|
L<Path::Iterator::Rule>. The value of the argument is passed directly into the |
54
|
|
|
|
|
|
|
test. For boolean tests (e.g. readable, exists, etc.), pass an C<undef> value. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
B<ETL::Pipeline::Input::File> automatically applies the C<file> filter. Do not |
57
|
|
|
|
|
|
|
pass C<file> through L<ETL::Pipeline/input>. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
C<iname> is the most common one that I use. It matches the file name, supports |
60
|
|
|
|
|
|
|
wildcards and regular expressions, and is case insensitive. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# Search using a regular expression... |
63
|
|
|
|
|
|
|
$etl->input( 'XmlFiles', iname => qr/\.xml$/ ); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Search using a file glob... |
66
|
|
|
|
|
|
|
$etl->input( 'XmlFiles', iname => '*.xml' ); |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=cut |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# BUILD in the consuming class will override this one. I add a fake BUILD in |
71
|
|
|
|
|
|
|
# case the class doesn't have one. The method modifier then runs the code to |
72
|
|
|
|
|
|
|
# extract search criteria from the constructor arguments. The modifier will |
73
|
|
|
|
|
|
|
# run even if the consuming class has its own BUILD. |
74
|
|
|
|
|
|
|
# https://www.perlmonks.org/?node_id=837369 |
75
|
|
|
|
16
|
0
|
|
sub BUILD {} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
after 'BUILD' => sub { |
78
|
|
|
|
|
|
|
my $self = shift; |
79
|
|
|
|
|
|
|
my $arguments = shift; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
while (my ($name, $value) = each %$arguments) { |
82
|
|
|
|
|
|
|
$self->_add_criteria( $name, $value ) |
83
|
|
|
|
|
|
|
if $name ne 'file' && Path::Class::Rule->can( $name ); |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
}; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head3 path |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
L<Path::Class::File> object for the currently selected file. This is first file |
91
|
|
|
|
|
|
|
that matches the criteria. When you call L</next_path>, it finds the next match |
92
|
|
|
|
|
|
|
and sets B<path>. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
So B<path> always points to the current file. It should be used by your input |
95
|
|
|
|
|
|
|
source class as the file name. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Inside the input source class... |
98
|
|
|
|
|
|
|
while ($self->next_path( $etl )) { |
99
|
|
|
|
|
|
|
open my $io, '<', $self->path; |
100
|
|
|
|
|
|
|
... |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
C<undef> means no more matches. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=cut |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
has 'path' => ( |
108
|
|
|
|
|
|
|
coerce => 1, |
109
|
|
|
|
|
|
|
is => 'ro', |
110
|
|
|
|
|
|
|
isa => 'Path::Class::File|Undef', |
111
|
|
|
|
|
|
|
writer => '_set_path', |
112
|
|
|
|
|
|
|
); |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head2 Methods |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head3 next_path |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Looks for the next match in the list and sets the L</path> attribute. It also |
120
|
|
|
|
|
|
|
returns the matching path. Your input source class should setup a loop calling |
121
|
|
|
|
|
|
|
this method. Inside the loop, process each file. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
B<next_path> takes one parameter - the L<ETL::Pipeline> object. The method |
124
|
|
|
|
|
|
|
matches files in L<ETL::Pipeline/data_in>. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=cut |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub next_path { |
129
|
72
|
|
|
72
|
1
|
840
|
my ($self, $etl) = @_; |
130
|
|
|
|
|
|
|
|
131
|
72
|
100
|
|
|
|
1838
|
if ($self->_list_built) { |
132
|
|
|
|
|
|
|
# Get the next file from the list. We'll return "undef" if you query |
133
|
|
|
|
|
|
|
# beyond the end of the list. |
134
|
57
|
|
|
|
|
1734
|
$self->_next_file; |
135
|
|
|
|
|
|
|
} else { |
136
|
|
|
|
|
|
|
# Build the list the first time through. |
137
|
15
|
|
|
|
|
150
|
my $rule = Path::Class::Rule->new->file; |
138
|
15
|
|
|
|
|
1091
|
foreach my $pair ($self->_search_criteria) { |
139
|
14
|
|
|
|
|
36
|
my $name = $pair->[0]; |
140
|
14
|
|
|
|
|
26
|
my $value = $pair->[1]; |
141
|
|
|
|
|
|
|
|
142
|
14
|
|
|
|
|
948
|
eval "\$rule = \$rule->$name( \$value )"; |
143
|
14
|
50
|
|
|
|
2704
|
croak $@ unless $@ eq ''; |
144
|
|
|
|
|
|
|
} |
145
|
15
|
|
|
|
|
75
|
$self->_matches( $rule->all( $etl->data_in ) ); |
146
|
15
|
|
|
|
|
596
|
$self->_list_built( 1 ); |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# Set "position" to something more readable. |
150
|
72
|
|
|
|
|
1697
|
my $file = $self->_set_path( $self->_file( $self->_file_index ) ); |
151
|
|
|
|
|
|
|
|
152
|
72
|
100
|
|
|
|
182
|
if (defined $file) { |
153
|
57
|
|
|
|
|
233
|
$self->source( $file->relative( $etl->work_in )->stringify ); |
154
|
57
|
|
|
|
|
295
|
$etl->status( 'INFO', 'Next file' ); |
155
|
15
|
|
|
|
|
341
|
} else { $self->source( '' ); } |
156
|
|
|
|
|
|
|
|
157
|
72
|
|
|
|
|
369
|
return $file; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
162
|
|
|
|
|
|
|
# Internal methods and attributes |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# Search criteria for the file list. I capture the criteria from the constructor |
165
|
|
|
|
|
|
|
# but don't build the iterator until the loop kicks off. Since the search |
166
|
|
|
|
|
|
|
# depends on "data_in", this allows the user to setup the pipeline in whatever |
167
|
|
|
|
|
|
|
# order they want and it will do the right thing. |
168
|
|
|
|
|
|
|
has '_criteria' => ( |
169
|
|
|
|
|
|
|
default => sub { {} }, |
170
|
|
|
|
|
|
|
handles => {_add_criteria => 'set', _search_criteria => 'kv'}, |
171
|
|
|
|
|
|
|
is => 'ro', |
172
|
|
|
|
|
|
|
isa => 'HashRef[Any]', |
173
|
|
|
|
|
|
|
traits => [qw/Hash/], |
174
|
|
|
|
|
|
|
); |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# Index into "_file_list" for the current file. This counter is used to loop |
178
|
|
|
|
|
|
|
# through the list by calling "next_path". |
179
|
|
|
|
|
|
|
has '_file_index' => ( |
180
|
|
|
|
|
|
|
default => 0, |
181
|
|
|
|
|
|
|
handles => {_next_file => 'inc'}, |
182
|
|
|
|
|
|
|
is => 'ro', |
183
|
|
|
|
|
|
|
isa => 'Int', |
184
|
|
|
|
|
|
|
traits => [qw/Counter/], |
185
|
|
|
|
|
|
|
); |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# List of files that match the search criteria. The list is built at the |
189
|
|
|
|
|
|
|
# beginning of the pipeline. So your pipeline can't add files on the fly. |
190
|
|
|
|
|
|
|
has '_file_list' => ( |
191
|
|
|
|
|
|
|
default => sub { [] }, |
192
|
|
|
|
|
|
|
handles => {_file => 'get', _matches => 'push'}, |
193
|
|
|
|
|
|
|
is => 'ro', |
194
|
|
|
|
|
|
|
isa => 'ArrayRef[Any]', |
195
|
|
|
|
|
|
|
traits => [qw/Array/], |
196
|
|
|
|
|
|
|
); |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# Since the list always exists, I needed a way to tell the difference between |
200
|
|
|
|
|
|
|
# "no matches" and "not built yet". That way, "next_record" can build the list |
201
|
|
|
|
|
|
|
# on the first pass. |
202
|
|
|
|
|
|
|
has '_list_built' => ( |
203
|
|
|
|
|
|
|
default => 0, |
204
|
|
|
|
|
|
|
is => 'rw', |
205
|
|
|
|
|
|
|
isa => 'Bool', |
206
|
|
|
|
|
|
|
); |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head1 SEE ALSO |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
L<ETL::Pipeline>, L<ETL::Pipeline::Input>, L<Path::Class::File>, |
212
|
|
|
|
|
|
|
L<Path::Class::Rule>, L<Path::Iterator::Rule> |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=cut |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=head1 AUTHOR |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
Robert Wohlfarth <robert.j.wohlfarth@vumc.org> |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=head1 LICENSE |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Copyright 2021 (c) Vanderbilt University Medical Center |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
225
|
|
|
|
|
|
|
the same terms as Perl itself. |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=cut |
228
|
|
|
|
|
|
|
|
229
|
4
|
|
|
4
|
|
33
|
no Moose; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
36
|
|
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
# Required by Perl to load the module. |
232
|
|
|
|
|
|
|
1; |