line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::CSV::UniqueColumns; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
28522
|
use 5.008008; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
28
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
5
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
1475
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
require Exporter; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '0.3'; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
my (%headerHash, @cleanup); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub new { |
17
|
0
|
|
|
0
|
0
|
|
my ($class) = shift; |
18
|
0
|
|
0
|
|
|
|
my ($sFile) = shift || die "Please provide csv file as argument\n"; |
19
|
0
|
|
|
|
|
|
my ($sCols) = shift; |
20
|
0
|
0
|
|
|
|
|
die "$sFile not found" if (!-e $sFile); |
21
|
|
|
|
|
|
|
|
22
|
0
|
|
|
|
|
|
my $self = { |
23
|
|
|
|
|
|
|
'_file' => $sFile, |
24
|
|
|
|
|
|
|
'_cols' => $sCols, |
25
|
|
|
|
|
|
|
'_headers' => "" |
26
|
|
|
|
|
|
|
}; |
27
|
|
|
|
|
|
|
|
28
|
0
|
|
|
|
|
|
bless $self, $class; |
29
|
|
|
|
|
|
|
|
30
|
0
|
|
|
|
|
|
getHeaders($self,$sFile); |
31
|
0
|
0
|
|
|
|
|
die "Could not get headers\n" if (!$self->{'_headers'}); |
32
|
|
|
|
|
|
|
|
33
|
0
|
|
|
|
|
|
return $self; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub checkUniq { |
37
|
0
|
|
|
0
|
0
|
|
my ($self) = shift; |
38
|
0
|
|
0
|
|
|
|
my $sCols = shift || return "Provide cols as arguments\n"; |
39
|
0
|
|
|
|
|
|
$self->{'_cols'} = $sCols; |
40
|
0
|
|
|
|
|
|
my $iCount = 1; |
41
|
0
|
|
|
|
|
|
my $sPasteOutput; |
42
|
0
|
|
|
|
|
|
my $sPasteFiles = " "; |
43
|
0
|
|
|
|
|
|
my @CompositeCols = split(',',$sCols); |
44
|
0
|
|
|
|
|
|
foreach my $sCol (@CompositeCols) { |
45
|
0
|
|
|
|
|
|
chomp($sCol); |
46
|
0
|
0
|
|
|
|
|
if ($headerHash{$sCol}) { |
47
|
0
|
|
|
|
|
|
$headerHash{"Composite$iCount"} = `cut -f$headerHash{$sCol} -d , $self->{'_file'}`; |
48
|
0
|
0
|
|
|
|
|
open (FILE , ">Composite$iCount") or return "Cannot write Composite$iCount $! \n"; |
49
|
0
|
|
|
|
|
|
push (@cleanup, "Composite$iCount"); |
50
|
0
|
|
|
|
|
|
print FILE $headerHash{"Composite$iCount"}; |
51
|
0
|
|
|
|
|
|
$sPasteFiles .= " Composite$iCount"; |
52
|
0
|
|
|
|
|
|
close(FILE); |
53
|
0
|
|
|
|
|
|
$iCount++; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
else { |
56
|
0
|
|
|
|
|
|
return "Column - $sCol not found\n INFO - Use \"-l\" option to list columns in file\n"; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
} |
59
|
0
|
|
|
|
|
|
my $sCmd = "paste -d , $sPasteFiles > pasteOutput "; |
60
|
0
|
|
|
|
|
|
push (@cleanup, 'pasteOutput'); |
61
|
0
|
|
|
|
|
|
$sPasteOutput = `$sCmd`; |
62
|
0
|
|
|
|
|
|
my $iCount1 = `cat pasteOutput | sed s/' '//g | wc -l`; |
63
|
0
|
|
|
|
|
|
my $iCount2 = `cat pasteOutput | sed s/' '//g | sort | uniq | wc -l`; |
64
|
0
|
|
|
|
|
|
cleanUp(); |
65
|
0
|
0
|
|
|
|
|
if ($iCount1 == $iCount2) { |
66
|
0
|
|
|
|
|
|
return "1"; #unique |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
else { |
69
|
0
|
|
|
|
|
|
return "0"; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub getColumnList { |
74
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
75
|
0
|
|
|
|
|
|
foreach (@{$self->{'_headers'}}) { |
|
0
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
return join(',', @{$self->{'_headers'}}); |
|
0
|
|
|
|
|
|
|
77
|
0
|
|
|
|
|
|
print "$_\n"; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub getUniqCols { |
83
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
84
|
0
|
|
|
|
|
|
my ($sUniqCols) = " "; |
85
|
0
|
|
|
|
|
|
foreach my $iNo ( 0 .. (scalar(@{$self->{'_headers'}}) - 1)) { |
|
0
|
|
|
|
|
|
|
86
|
0
|
|
|
|
|
|
my $iField = $iNo + 1; |
87
|
0
|
|
|
|
|
|
my $sCmd = "cut -f$iField -d , $self->{'_file'} | sed s/' '//g | wc -l;"; |
88
|
0
|
|
|
|
|
|
$sCmd .= "cut -f$iField -d , $self->{'_file'} | sed s/' '//g | sort | uniq | wc -l"; |
89
|
0
|
|
|
|
|
|
my ($iCount1, $iCount2) = split("\n",`$sCmd`); |
90
|
|
|
|
|
|
|
|
91
|
0
|
0
|
|
|
|
|
if ( $iCount1 == $iCount2) { |
92
|
0
|
|
|
|
|
|
$sUniqCols .= $self->{'_headers'}->[$iNo].","; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
else { |
95
|
0
|
|
|
|
|
|
next; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
} |
98
|
0
|
|
|
|
|
|
chop ($sUniqCols); |
99
|
0
|
|
|
|
|
|
return $sUniqCols; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub buildHeaderHash { |
104
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
105
|
0
|
|
|
|
|
|
my $iColNo = 1; |
106
|
0
|
|
|
|
|
|
foreach my $sCol (@{$self->{'_headers'}}) { |
|
0
|
|
|
|
|
|
|
107
|
0
|
|
|
|
|
|
$sCol =~ s/\s+//g; |
108
|
0
|
|
|
|
|
|
$sCol =~ s/\n//g; |
109
|
0
|
|
|
|
|
|
$headerHash{$sCol} = $iColNo; |
110
|
0
|
|
|
|
|
|
$iColNo++; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub getHeaders { |
116
|
0
|
|
|
0
|
0
|
|
my ($self,$sFile) = @_; |
117
|
0
|
|
|
|
|
|
print "file is $sFile \n"; |
118
|
0
|
|
|
|
|
|
my @headers = split(',', `head -1 $sFile`); |
119
|
0
|
|
|
|
|
|
$self->{'_headers'} = \@headers; |
120
|
0
|
|
|
|
|
|
buildHeaderHash($self); |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub cleanUp { |
124
|
0
|
|
|
0
|
0
|
|
foreach my $sFile (@cleanup){ |
125
|
0
|
|
|
|
|
|
`rm -f $sFile`; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
1; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
__END__ |