File Coverage

blib/lib/App/Fasops/Command/mergecsv.pm

Criterion	Covered	Total	%
statement	79	85	92.9
branch	13	18	72.2
condition			n/a
subroutine	11	11	100.0
pod	6	6	100.0
total	109	120	90.8

line	stmt	bran	sub	pod	time	code
1						package App::Fasops::Command::mergecsv;
2	21		21		15030	use strict;
	21				56
	21				667
3	21		21		116	use warnings;
	21				40
	21				554
4	21		21		105	use autodie;
	21				48
	21				121
5
6	21		21		107514	use App::Fasops -command;
	21				73
	21				212
7	21		21		7763	use App::Fasops::Common;
	21				50
	21				21393
8
9						sub abstract {
10	2		2	1	54	return 'merge csv files based on @fields';
11						}
12
13						sub opt_spec {
14						return (
15	6		6	1	54	[ "outfile\|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
16						[ 'fields\|f=i@', 'fields as identifies, 0 as first column', { default => [0] }, ],
17						[ 'concat\|c', 'do concat other than merge. Keep first ID fields', ],
18						{ show_defaults => 1, }
19						);
20						}
21
22						sub usage_desc {
23	6		6	1	44831	return "fasops mergecsv [options] [more files]";
24						}
25
26						sub description {
27	1		1	1	824	my $desc;
28	1				5	$desc .= ucfirst(abstract) . ".\n";
29	1				3	$desc .= <<'MARKDOWN';
30
31						* Accept one or more csv files
32						* infile == stdin means reading from STDIN
33
34						cat 1.csv 2.csv \| egaz mergecsv -f 0 -f 1
35						egaz mergecsv -f 0 -f 1 1.csv 2.csv
36
37						MARKDOWN
38
39	1				2	return $desc;
40						}
41
42						sub validate_args {
43	5		5	1	4379	my ( $self, $opt, $args ) = @_;
44
45	5	100			8	if ( @{$args} < 1 ) {
	5				14
46	1				2	my $message = "This command need one or more input files.\n\tIt found";
47	1				2	$message .= sprintf " [%s]", $_ for @{$args};
	1				3
48	1				3	$message .= ".\n";
49	1				8	$self->usage_error($message);
50						}
51	4				9	for ( @{$args} ) {
	4				8
52	6	50			133	next if lc $_ eq "stdin";
53	6	100			47	if ( !Path::Tiny::path($_)->is_file ) {
54	2				146	$self->usage_error("The input file [$_] doesn't exist.");
55						}
56						}
57
58						# make array splicing happier
59	2				71	$opt->{fields} = [ sort @{ $opt->{fields} } ];
	2				17
60						}
61
62						sub execute {
63	2		2	1	12	my ( $self, $opt, $args ) = @_;
64
65						#----------------------------#
66						# read
67						#----------------------------#
68	2				3	my $index_of = {}; # index of ids in @lines
69	2				5	my @lines;
70	2				5	my ( $count_all, $index ) = ( 0, 0 );
71
72	2				3	for my $infile ( @{$args} ) {
	2				5
73
74						#@type IO::Handle
75	4				304	my $in_fh;
76	4	50			13	if ( lc $infile eq "stdin" ) {
77	0				0	$in_fh = *STDIN{IO};
78						}
79						else {
80	4				20	$in_fh = IO::Zlib->new( $infile, "rb" );
81						}
82
83	4				5076	while ( !$in_fh->eof ) {
84	20				837	my $line = $in_fh->getline;
85	20				2446	chomp $line;
86	20	50			35	next unless $line;
87
88	20				23	$count_all++;
89	20				24	my $id = join( "_", ( split ",", $line )[ @{ $opt->{fields} } ] );
	20				66
90	20	100			43	if ( exists $index_of->{$id} ) {
91	10	100			34	if ( $opt->{concat} ) {
92	5				6	my $ori_index = $index_of->{$id};
93	5				8	my $ori_line = $lines[$ori_index];
94
95	5				10	my @fs = split ",", $line;
96	5				6	for my $f_idx ( reverse @{ $opt->{fields} } ) {
	5				10
97	5				20	splice @fs, $f_idx, 1;
98						}
99	5				27	$lines[$ori_index] = join ",", $ori_line, @fs;
100						}
101						}
102						else {
103	10				20	$index_of->{$id} = $index;
104	10				15	push @lines, $line;
105	10				38	$index++;
106						}
107						}
108
109	4				501	$in_fh->close;
110						}
111
112						#----------------------------#
113						# check
114						#----------------------------#
115						{
116	2				210	my %seen;
	2				2
117	2				6	for (@lines) {
118	10				17	my $number = scalar split(",");
119	10				15	$seen{$number}++;
120						}
121	2	50			11	if ( keys(%seen) > 1 ) {
122	0				0	Carp::carp "*** Fields not identical, be careful.\n";
123	0				0	Carp::carp YAML::Syck::Dump { fields => \%seen, };
124						}
125						}
126
127						#----------------------------#
128						# write outputs
129						#----------------------------#
130	2				4	my $out_fh;
131	2	50			8	if ( lc( $opt->{outfile} ) eq "stdout" ) {
132	2				5	$out_fh = *STDOUT{IO};
133						}
134						else {
135	0				0	open $out_fh, ">", $opt->{outfile};
136						}
137
138	2				5	for (@lines) {
139	10				91	print {$out_fh} $_ . "\n";
	10				36
140						}
141	2				28	close $out_fh;
142
143	0					printf STDERR "Total lines [%d]; Result lines [%d].\n", $count_all, scalar @lines;
144
145	0					return;
146						}
147
148						1;