| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | =pod | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | =head1 NAME | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | ETL::Pipeline::Input::File - Role for file based input sources | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | # In the input source... | 
| 10 |  |  |  |  |  |  | use Moose; | 
| 11 |  |  |  |  |  |  | with 'ETL::Pipeline::Input::File'; | 
| 12 |  |  |  |  |  |  | ... | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | # In the ETL::Pipeline script... | 
| 15 |  |  |  |  |  |  | ETL::Pipeline->new( { | 
| 16 |  |  |  |  |  |  | work_in   => {search => 'C:\Data', find => qr/Ficticious/}, | 
| 17 |  |  |  |  |  |  | input     => ['Excel', matching => qr/\.xlsx?$/          ], | 
| 18 |  |  |  |  |  |  | mapping   => {Name => 'A', Address => 'B', ID => 'C'     }, | 
| 19 |  |  |  |  |  |  | constants => {Type => 1, Information => 'Demographic'    }, | 
| 20 |  |  |  |  |  |  | output    => ['SQL', table => 'NewData'                  ], | 
| 21 |  |  |  |  |  |  | } )->process; | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | # Or with a specific file... | 
| 24 |  |  |  |  |  |  | ETL::Pipeline->new( { | 
| 25 |  |  |  |  |  |  | work_in   => {search => 'C:\Data', find => qr/Ficticious/}, | 
| 26 |  |  |  |  |  |  | input     => ['Excel', file => 'ExportedData.xlsx'       ], | 
| 27 |  |  |  |  |  |  | mapping   => {Name => 'A', Address => 'B', ID => 'C'     }, | 
| 28 |  |  |  |  |  |  | constants => {Type => 1, Information => 'Demographic'    }, | 
| 29 |  |  |  |  |  |  | output    => ['SQL', table => 'NewData'                  ], | 
| 30 |  |  |  |  |  |  | } )->process; | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | B<ETL::Pipeline::Input::File> provides methods and attributes common to | 
| 35 |  |  |  |  |  |  | file based input sources. It makes file searches available for any file | 
| 36 |  |  |  |  |  |  | format. With B<ETL::Pipeline::Input::File>, you can... | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | =over | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | =item Specify the exact path to the file. | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | =item Or search the file system for a matching name. | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | =back | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | For setting an exact path, see the L</path> attribute. For searches, see the | 
| 47 |  |  |  |  |  |  | L</find> attribute. | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | =head2 File vs. DataFile | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | L<ETL::Pipeline::Input::DataFile> extends B<ETL::Pipeline::Input::File>. | 
| 52 |  |  |  |  |  |  | This role, B<ETL::Pipeline::Input::File> makes no assumptions about the file | 
| 53 |  |  |  |  |  |  | format. It works CSV text files, MS Access databases, spread sheets, XML, or | 
| 54 |  |  |  |  |  |  | any other format found on disk. | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | L<ETL::Pipeline::Input::DataFile> assumes that each record is stored on one | 
| 57 |  |  |  |  |  |  | row. And the data is divided into fields (columns). Basically, | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | =cut | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | package ETL::Pipeline::Input::File; | 
| 62 | 4 |  |  | 4 |  | 12637 | use Moose::Role; | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 37 |  | 
| 63 |  |  |  |  |  |  |  | 
| 64 | 4 |  |  | 4 |  | 16131 | use 5.014000; | 
|  | 4 |  |  |  |  | 10 |  | 
| 65 | 4 |  |  | 4 |  | 18 | use Carp; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 323 |  | 
| 66 | 4 |  |  | 4 |  | 21 | use MooseX::Types::Path::Class qw/Dir File/; | 
|  | 4 |  |  |  |  | 4 |  | 
|  | 4 |  |  |  |  | 60 |  | 
| 67 | 4 |  |  | 4 |  | 4373 | use Path::Class::Rule; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 129 |  | 
| 68 | 4 |  |  | 4 |  | 17 | use String::Util qw/hascontent/; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 1173 |  | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | our $VERSION = '2.00'; | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | =head1 METHODS & ATTRIBUTES | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | =head2 Arguments for L<ETL::Pipeline/input> | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | =head3 matching | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | B<matching> locates the first file that matches the given pattern. The | 
| 81 |  |  |  |  |  |  | pattern can be a glob or regular expression. B<matching> sets L</file> | 
| 82 |  |  |  |  |  |  | to the first file that matches. Search patterns are case insensitive. | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | # Search using a regular expression... | 
| 85 |  |  |  |  |  |  | $etl->input( 'Excel', matching => qr/\.xlsx$/i ); | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | # Search using a file glob... | 
| 88 |  |  |  |  |  |  | $etl->input( 'Excel', matching => '*.xlsx' ); | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | For very weird cases, B<matching> also accepts a code reference. | 
| 91 |  |  |  |  |  |  | B<matching> executes the subroutine against the file names. B<matching> | 
| 92 |  |  |  |  |  |  | sets L</file> to the first file where the subroutine returns a true | 
| 93 |  |  |  |  |  |  | value. | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | B<matching> passes two parameters into the subroutine... | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | =over | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | =item The L<ETL::Pipeline> object | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | =item The L<Path::Class::File> object | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | =back | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | # File larger than 2K... | 
| 106 |  |  |  |  |  |  | $etl->input( 'Excel', matching => sub { | 
| 107 |  |  |  |  |  |  | my ($etl, $file) = @_; | 
| 108 |  |  |  |  |  |  | return (!$file->is_dir && $file->size > 2048 ? 1 : 0); | 
| 109 |  |  |  |  |  |  | } ); | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | B<matching> searches inside the L<ETL::Pipeline/data_in> directory. | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | =cut | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | has 'matching' => ( | 
| 116 |  |  |  |  |  |  | is  => 'ro', | 
| 117 |  |  |  |  |  |  | isa => 'Maybe[CodeRef|RegexpRef|Str]', | 
| 118 |  |  |  |  |  |  | ); | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | =head3 file | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | B<file> holds a L<Path::Class::File> object pointing to the input file. | 
| 124 |  |  |  |  |  |  | If L<ETL::Pipeline/input> does not set B<file>, then the L</matching> | 
| 125 |  |  |  |  |  |  | attribute searches the file system for a match. If | 
| 126 |  |  |  |  |  |  | L<ETL::Pipeline/input> sets B<file>, then L</matching> is ignored. | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | B<file> is relative to L<ETL::Pipeline/data_in>, unless you set it to an | 
| 129 |  |  |  |  |  |  | absolute path name. With L</matching>, the search is always limited to | 
| 130 |  |  |  |  |  |  | L<ETL::Pipeline/data_in>. | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | # File inside of "data_in"... | 
| 133 |  |  |  |  |  |  | $etl->input( 'Excel', file => 'Data.xlsx' ); | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | # Absolute path name... | 
| 136 |  |  |  |  |  |  | $etl->input( 'Excel', file => 'C:\Data.xlsx' ); | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | =cut | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | has 'file' => ( | 
| 141 |  |  |  |  |  |  | builder => '_build_file', | 
| 142 |  |  |  |  |  |  | coerce  => 1, | 
| 143 |  |  |  |  |  |  | is      => 'ro', | 
| 144 |  |  |  |  |  |  | isa     => File, | 
| 145 |  |  |  |  |  |  | lazy    => 1, | 
| 146 |  |  |  |  |  |  | trigger => \&_trigger_file, | 
| 147 |  |  |  |  |  |  | writer  => '_set_file', | 
| 148 |  |  |  |  |  |  | ); | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | sub _build_file { | 
| 152 | 9 |  |  | 9 |  | 10 | my $self = shift; | 
| 153 |  |  |  |  |  |  |  | 
| 154 | 9 |  |  |  |  | 82 | my $rule     = Path::Class::Rule->new; | 
| 155 | 9 |  |  |  |  | 251 | my $pattern  = $self->matching; | 
| 156 | 9 |  |  |  |  | 218 | my $pipeline = $self->pipeline; | 
| 157 |  |  |  |  |  |  |  | 
| 158 | 9 | 100 |  |  |  | 25 | if (ref( $pattern ) eq 'CODE') { | 
| 159 | 1 |  |  |  |  | 3 | my $search = $rule->iter( $pipeline->data_in ); | 
| 160 | 1 |  |  |  |  | 123 | while (my $file = $search->()) { | 
| 161 | 2 | 100 |  |  |  | 787 | return $file if $pipeline->execute_code_ref( $pattern, $file ); | 
| 162 |  |  |  |  |  |  | } | 
| 163 | 0 |  |  |  |  | 0 | croak 'No file matched for "input"'; | 
| 164 | 0 |  |  |  |  | 0 | return undef; | 
| 165 |  |  |  |  |  |  | } else { | 
| 166 | 8 |  |  |  |  | 28 | $rule->file; | 
| 167 | 8 | 50 |  |  |  | 257 | $rule->iname( $pattern ) if defined $pattern; | 
| 168 | 8 |  |  |  |  | 1372 | my $search = $rule->iter( $pipeline->data_in ); | 
| 169 |  |  |  |  |  |  |  | 
| 170 | 8 |  |  |  |  | 1601 | my $file = $search->(); | 
| 171 | 8 | 50 |  |  |  | 10468 | croak 'No file matched for "input"' unless defined $file; | 
| 172 | 8 |  |  |  |  | 404 | return $file; | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | sub _trigger_file { | 
| 178 | 2 |  |  | 2 |  | 24 | my ($self, $old, $new) = @_; | 
| 179 | 2 | 50 | 33 |  |  | 48 | $self->_set_file( $new->absolute( $self->pipeline->data_in ) ) | 
| 180 |  |  |  |  |  |  | if defined( $new ) && $new->is_relative; | 
| 181 |  |  |  |  |  |  | } | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | L<ETL::Pipeline>, L<ETL::Pipeline::Input>, L<ETL::Pipeline::Input::TabularFile> | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | =head1 AUTHOR | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | Robert Wohlfarth <robert.j.wohlfarth@vanderbilt.edu> | 
| 191 |  |  |  |  |  |  |  | 
| 192 |  |  |  |  |  |  | =head1 LICENSE | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | Copyright 2016 (c) Vanderbilt University Medical Center | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | This program is free software; you can redistribute it and/or modify it under | 
| 197 |  |  |  |  |  |  | the same terms as Perl itself. | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | =cut | 
| 200 |  |  |  |  |  |  |  | 
| 201 | 4 |  |  | 4 |  | 20 | no Moose; | 
|  | 4 |  |  |  |  | 4 |  | 
|  | 4 |  |  |  |  | 25 |  | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | # Required by Perl to load the module. | 
| 204 |  |  |  |  |  |  | 1; |