File Coverage

blib/lib/App/Fasops/Command/mergecsv.pm

Criterion	Covered	Total	%
statement	79	85	92.9
branch	13	18	72.2
condition			n/a
subroutine	11	11	100.0
pod	6	6	100.0
total	109	120	90.8

line	stmt	bran	sub	pod	time	code
1						package App::Fasops::Command::mergecsv;
2	20		20		13947	use strict;
	20				48
	20				633
3	20		20		105	use warnings;
	20				44
	20				502
4	20		20		106	use autodie;
	20				42
	20				113
5
6	20		20		108752	use App::Fasops -command;
	20				58
	20				199
7	20		20		6735	use App::Fasops::Common;
	20				51
	20				20025
8
9						sub abstract {
10	2		2	1	48	return 'merge csv files based on @fields';
11						}
12
13						sub opt_spec {
14						return (
15	6		6	1	49	[ "outfile\|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
16						[ 'fields\|f=i@', 'fields as identifies, 0 as first column', { default => [0] }, ],
17						[ 'concat\|c', 'do concat other than merge. Keep first ID fields', ],
18						{ show_defaults => 1, }
19						);
20						}
21
22						sub usage_desc {
23	6		6	1	53849	return "fasops mergecsv [options] [more files]";
24						}
25
26						sub description {
27	1		1	1	1006	my $desc;
28	1				4	$desc .= ucfirst(abstract) . ".\n";
29	1				3	$desc .= <<'MARKDOWN';
30
31						* Accept one or more csv files
32						* infile == stdin means reading from STDIN
33
34						cat 1.csv 2.csv \| egaz mergecsv -f 0 -f 1
35						egaz mergecsv -f 0 -f 1 1.csv 2.csv
36
37						MARKDOWN
38
39	1				3	return $desc;
40						}
41
42						sub validate_args {
43	5		5	1	5315	my ( $self, $opt, $args ) = @_;
44
45	5	100			11	if ( @{$args} < 1 ) {
	5				17
46	1				3	my $message = "This command need one or more input files.\n\tIt found";
47	1				2	$message .= sprintf " [%s]", $_ for @{$args};
	1				4
48	1				3	$message .= ".\n";
49	1				10	$self->usage_error($message);
50						}
51	4				6	for ( @{$args} ) {
	4				11
52	6	50			130	next if lc $_ eq "stdin";
53	6	100			20	if ( !Path::Tiny::path($_)->is_file ) {
54	2				171	$self->usage_error("The input file [$_] doesn't exist.");
55						}
56						}
57
58						# make array splicing happier
59	2				97	$opt->{fields} = [ sort @{ $opt->{fields} } ];
	2				20
60						}
61
62						sub execute {
63	2		2	1	13	my ( $self, $opt, $args ) = @_;
64
65						#----------------------------#
66						# read
67						#----------------------------#
68	2				5	my $index_of = {}; # index of ids in @lines
69	2				3	my @lines;
70	2				6	my ( $count_all, $index ) = ( 0, 0 );
71
72	2				5	for my $infile ( @{$args} ) {
	2				4
73
74						#@type IO::Handle
75	4				330	my $in_fh;
76	4	50			13	if ( lc $infile eq "stdin" ) {
77	0				0	$in_fh = *STDIN{IO};
78						}
79						else {
80	4				19	$in_fh = IO::Zlib->new( $infile, "rb" );
81						}
82
83	4				6167	while ( !$in_fh->eof ) {
84	20				973	my $line = $in_fh->getline;
85	20				3009	chomp $line;
86	20	50			42	next unless $line;
87
88	20				31	$count_all++;
89	20				28	my $id = join( "_", ( split ",", $line )[ @{ $opt->{fields} } ] );
	20				76
90	20	100			54	if ( exists $index_of->{$id} ) {
91	10	100			43	if ( $opt->{concat} ) {
92	5				10	my $ori_index = $index_of->{$id};
93	5				9	my $ori_line = $lines[$ori_index];
94
95	5				12	my @fs = split ",", $line;
96	5				9	for my $f_idx ( reverse @{ $opt->{fields} } ) {
	5				12
97	5				12	splice @fs, $f_idx, 1;
98						}
99	5				32	$lines[$ori_index] = join ",", $ori_line, @fs;
100						}
101						}
102						else {
103	10				38	$index_of->{$id} = $index;
104	10				22	push @lines, $line;
105	10				45	$index++;
106						}
107						}
108
109	4				603	$in_fh->close;
110						}
111
112						#----------------------------#
113						# check
114						#----------------------------#
115						{
116	2				257	my %seen;
	2				7
117	2				5	for (@lines) {
118	10				20	my $number = scalar split(",");
119	10				19	$seen{$number}++;
120						}
121	2	50			12	if ( keys(%seen) > 1 ) {
122	0				0	Carp::carp "*** Fields not identical, be careful.\n";
123	0				0	Carp::carp YAML::Syck::Dump { fields => \%seen, };
124						}
125						}
126
127						#----------------------------#
128						# write outputs
129						#----------------------------#
130	2				4	my $out_fh;
131	2	50			8	if ( lc( $opt->{outfile} ) eq "stdout" ) {
132	2				5	$out_fh = *STDOUT{IO};
133						}
134						else {
135	0				0	open $out_fh, ">", $opt->{outfile};
136						}
137
138	2				5	for (@lines) {
139	10				113	print {$out_fh} $_ . "\n";
	10				38
140						}
141	2				38	close $out_fh;
142
143	0					printf STDERR "Total lines [%d]; Result lines [%d].\n", $count_all, scalar @lines;
144
145	0					return;
146						}
147
148						1;