File Coverage

blib/lib/Text/CSV/Merge.pm

Criterion	Covered	Total	%
statement	31	68	45.5
branch	1	22	4.5
condition	0	12	0.0
subroutine	10	14	71.4
pod	2	2	100.0
total	44	118	37.2

line	stmt	bran	cond	sub	pod	time	code
1							package Text::CSV::Merge;
2							{
3							$Text::CSV::Merge::VERSION = '0.05';
4							}
5							# ABSTRACT: Fill in gaps in a CSV file from another CSV file
6
7	3			3		75005	use Modern::Perl '2010';
	3					81419
	3					21
8	3			3		60199	use autodie;
	3					68835
	3					20
9	3			3		28793	use utf8;
	3					44
	3					18
10
11	3			3		4967	use Moo 1.001000;
	3					52649
	3					40
12	3			3		7402	use Carp;
	3					7
	3					250
13	3			3		15	use IO::File;
	3					8
	3					484
14	3			3		8657	use Text::CSV_XS;
	3					37682
	3					318
15	3			3		40558	use DBI; # for DBD::CSV
	3					74514
	3					292
16	3			3		4286	use Log::Dispatch;
	3					48265
	3					5053
17
18
19							has +logger => (
20							is => 'lazy',
21							builder => sub {
22	0			0		0	Log::Dispatch->new(
23							outputs => [
24							[ 'File', autoflush => 1, min_level => 'debug', filename => 'merge.log', newline => 1, mode => '>>' ],
25							#[ 'Screen', min_level => 'info', newline => 1 ],
26							],
27							);
28							}
29							);
30
31							has +csv_parser => (
32							is => 'lazy',
33							builder => sub {
34	0	0		0		0	Text::CSV_XS->new({ binary => 1, eol => $/ })
35							or croak("Cannot use module Text::CSV_XS: " . Text::CSV_XS->error_diag());
36							}
37							);
38
39							has +dbh => (
40							is => 'lazy',
41							# must return only a code ref
42							builder => sub {
43	0	0		0		0	DBI->connect("dbi:CSV:", undef, undef, {
44							RaiseError => 1,
45							PrintError => 1,
46							f_ext => ".csv/r",
47							# Better performance with XS
48							csv_class => "Text::CSV_XS",
49							# csv_null => 1,
50							}) or croak("Cannot connect to CSV file via DBI: $DBI::errstr");
51							}
52							);
53
54							has base_file => (
55							is => 'rw',
56							required => 1,
57							#allow external names to be different from class attribute
58							init_arg => 'base',
59							#validate it
60							#isa => sub {},
61							coerce => sub {
62							my $base_fh = IO::File->new( $_[0], '<' ) or croak("Open file: '$_[0]' failed: $!");
63							$base_fh->binmode(":utf8");
64
65							return $base_fh;
66							}
67							);
68
69							has merge_file => (
70							# We use only the raw file name/path and do not create a FH here, unlike base_file().
71							is => 'rw',
72							init_arg => 'merge',
73							required => 1
74							);
75
76							has output_file => (
77							is => 'rw',
78							init_arg => 'output',
79							# an output file name is NOT required
80							required => 0,
81							default => 'merged_output.csv',
82							coerce => sub {
83							my $output_fh = IO::File->new( $_[0], '>' ) or croak("Open file: '$_[0]' failed: $!");
84							$output_fh->binmode(":utf8");
85
86							return $output_fh;
87							}
88							);
89
90							has columns=> (
91							is => 'rw',
92							required => 1,
93							);
94
95							has search_field => (
96							is => 'rw',
97							required => 1,
98							init_arg => 'search'
99							#,
100							#isa => sub {
101							# validate that search_field is one of the columns in the base file
102							#die "Search parameter: '$_[0]' is not one of the columns: @{$self->columns}";
103							# unless ( $_[0] ~~ @{$self->columns} );
104							#}
105							);
106
107							has first_row_is_headers => (
108							is => 'rw',
109							required => 1,
110							#validate it
111							isa => sub {
112							# @TODO: there's got to be a better way to do this!
113							croak("Option 'first_row_is_headers' must be 1 or 0") unless ( $_[0] =~ m{'1'\|'0'}x \|\| $_[0] == 1 \|\| $_[0] == 0 );
114							},
115							);
116
117							sub merge {
118	0			0	1	0	my $self = shift;
119
120							# validate that search_field is one of the columns in the base file
121	0					0	croak("Search parameter: '$self->search_field' is not one of the columns: @{$self->columns}")
	0					0
122	0	0				0	unless ( scalar(grep { $_ eq $self->search_field } @{$self->columns}) );
	0					0
123							# Use scalar() to force grep to return the number of matches;
124							# 0 -> false for the 'unless' statement.
125
126	0					0	$self->csv_parser->column_names( $self->columns );
127
128							# Loop through the base file to find missing data
129							#@TODO: make into $self->rows?
130	0					0	my @rows;
131
132	0					0	while ( my $row = $self->csv_parser->getline_hr( $self->base_file ) ) {
133							# skip column names
134	0	0	0			0	next if ($. == 1 and $self->first_row_is_headers);
135
136	0	0				0	if ( $self->csv_parser->parse($row) ) {
137							# keep a list of null columns in this row
138	0					0	my @nulls;
139
140							# might be slightly more efficient to use while()
141	0					0	foreach my $key ( keys %{$row} ) {
	0					0
142							# which fields is this row missing?
143	0	0	0			0	if ( $row->{$key} eq 'NULL' or $row->{$key} eq "" ) {
144	0					0	push @nulls, $key;
145
146	0					0	$self->logger->info("Missing data in column: $key for '$row->{$self->search_field}'");
147							}
148							}
149
150							# make a hash of arrays
151	0	0				0	if ( @nulls ) {
152							# search $merge_file for the missing data's row
153	0					0	local $" = ','; # reset the list separator for array interpolation to suit SQL
154
155							# To get the original case for the columns, specify the column
156							# names rather than using SELECT *, since it normalizes to
157							# lowercase, per:
158							# http://stackoverflow.com/questions/3350775/dbdcsv-returns-header-in-lower-case
159	0	0				0	my $sth = $self->dbh->prepare(
160	0					0	"select @{$self->columns} from $self->{merge_file} where $self->{search_field} = ?"
161							) or croak("Cannot prepare DBI statement: " . $self->dbh->errstr ());
162
163	0					0	$sth->execute($row->{$self->search_field});
164
165	0					0	while ( my $filler = $sth->fetchrow_hashref() ) {
166	0					0	foreach my $gap ( @nulls ) {
167	0	0	0			0	if (exists $filler->{$gap} and defined $filler->{$gap} and $filler->{$gap} ne "") {
			0
168	0					0	$self->logger->info(
169							"Found Data: '$gap' = '$filler->{$gap}' for '$row->{$self->search_field}'"
170							);
171
172	0					0	$row->{$gap} = $filler->{$gap};
173							} else {
174	0					0	$self->logger->info(
175							"Data not Found for column: '$gap' for '$row->{$self->search_field}' $self->{merge_file}"
176							);
177							}
178							}
179							}
180
181							# Be efficient and neat!
182	0					0	$sth->finish();
183							}
184
185							# insert the updated row as a reference; even if not processed, the
186							# row will still appear in the final output.
187	0					0	push @rows, $row;
188							} else {
189	0					0	my $err = $self->csv_parser->error_input;
190	0					0	$self->logger->error("Failed to parse line: $err");
191							}
192							}
193
194							# Ensure we've processed to the end of the file
195	0	0				0	$self->csv_parser->eof or $self->csv_parser->error_diag();
196
197							# print does NOT want an actual array! Use a hash slice, instead:
198							#$self->csv_parser->print($output_fh, [ @$_{@columns} ]) for @rows;
199							#
200							# Or, here I've switched to Text::CSV_XS's specific print_hr(), which
201							# is simply missing from the PP (Pure Perl) version.
202	0					0	$self->csv_parser->print_hr($self->output_file, $_) for @rows;
203
204	0					0	return 1;
205							};
206
207							sub DEMOLISH {
208	1			1	1	3050	my $self = shift;
209
210							## Clean up!
211	1					8	$self->base_file->close();
212	1	50				674	$self->output_file->close() or croak("Close 'output.csv' failed: $!");
213
214	1					757	return;
215							}
216
217
218							1;
219
220							__END__