| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Text::CSV::UniqueColumns; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
28522
|
use 5.008008; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
28
|
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
29
|
|
|
5
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
|
1
|
|
|
|
|
6
|
|
|
|
1
|
|
|
|
|
1475
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
require Exporter; |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '0.3'; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
my (%headerHash, @cleanup); |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub new { |
|
17
|
0
|
|
|
0
|
0
|
|
my ($class) = shift; |
|
18
|
0
|
|
0
|
|
|
|
my ($sFile) = shift || die "Please provide csv file as argument\n"; |
|
19
|
0
|
|
|
|
|
|
my ($sCols) = shift; |
|
20
|
0
|
0
|
|
|
|
|
die "$sFile not found" if (!-e $sFile); |
|
21
|
|
|
|
|
|
|
|
|
22
|
0
|
|
|
|
|
|
my $self = { |
|
23
|
|
|
|
|
|
|
'_file' => $sFile, |
|
24
|
|
|
|
|
|
|
'_cols' => $sCols, |
|
25
|
|
|
|
|
|
|
'_headers' => "" |
|
26
|
|
|
|
|
|
|
}; |
|
27
|
|
|
|
|
|
|
|
|
28
|
0
|
|
|
|
|
|
bless $self, $class; |
|
29
|
|
|
|
|
|
|
|
|
30
|
0
|
|
|
|
|
|
getHeaders($self,$sFile); |
|
31
|
0
|
0
|
|
|
|
|
die "Could not get headers\n" if (!$self->{'_headers'}); |
|
32
|
|
|
|
|
|
|
|
|
33
|
0
|
|
|
|
|
|
return $self; |
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub checkUniq { |
|
37
|
0
|
|
|
0
|
0
|
|
my ($self) = shift; |
|
38
|
0
|
|
0
|
|
|
|
my $sCols = shift || return "Provide cols as arguments\n"; |
|
39
|
0
|
|
|
|
|
|
$self->{'_cols'} = $sCols; |
|
40
|
0
|
|
|
|
|
|
my $iCount = 1; |
|
41
|
0
|
|
|
|
|
|
my $sPasteOutput; |
|
42
|
0
|
|
|
|
|
|
my $sPasteFiles = " "; |
|
43
|
0
|
|
|
|
|
|
my @CompositeCols = split(',',$sCols); |
|
44
|
0
|
|
|
|
|
|
foreach my $sCol (@CompositeCols) { |
|
45
|
0
|
|
|
|
|
|
chomp($sCol); |
|
46
|
0
|
0
|
|
|
|
|
if ($headerHash{$sCol}) { |
|
47
|
0
|
|
|
|
|
|
$headerHash{"Composite$iCount"} = `cut -f$headerHash{$sCol} -d , $self->{'_file'}`; |
|
48
|
0
|
0
|
|
|
|
|
open (FILE , ">Composite$iCount") or return "Cannot write Composite$iCount $! \n"; |
|
49
|
0
|
|
|
|
|
|
push (@cleanup, "Composite$iCount"); |
|
50
|
0
|
|
|
|
|
|
print FILE $headerHash{"Composite$iCount"}; |
|
51
|
0
|
|
|
|
|
|
$sPasteFiles .= " Composite$iCount"; |
|
52
|
0
|
|
|
|
|
|
close(FILE); |
|
53
|
0
|
|
|
|
|
|
$iCount++; |
|
54
|
|
|
|
|
|
|
} |
|
55
|
|
|
|
|
|
|
else { |
|
56
|
0
|
|
|
|
|
|
return "Column - $sCol not found\n INFO - Use \"-l\" option to list columns in file\n"; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
} |
|
59
|
0
|
|
|
|
|
|
my $sCmd = "paste -d , $sPasteFiles > pasteOutput "; |
|
60
|
0
|
|
|
|
|
|
push (@cleanup, 'pasteOutput'); |
|
61
|
0
|
|
|
|
|
|
$sPasteOutput = `$sCmd`; |
|
62
|
0
|
|
|
|
|
|
my $iCount1 = `cat pasteOutput | sed s/' '//g | wc -l`; |
|
63
|
0
|
|
|
|
|
|
my $iCount2 = `cat pasteOutput | sed s/' '//g | sort | uniq | wc -l`; |
|
64
|
0
|
|
|
|
|
|
cleanUp(); |
|
65
|
0
|
0
|
|
|
|
|
if ($iCount1 == $iCount2) { |
|
66
|
0
|
|
|
|
|
|
return "1"; #unique |
|
67
|
|
|
|
|
|
|
} |
|
68
|
|
|
|
|
|
|
else { |
|
69
|
0
|
|
|
|
|
|
return "0"; |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub getColumnList { |
|
74
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
75
|
0
|
|
|
|
|
|
foreach (@{$self->{'_headers'}}) { |
|
|
0
|
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
return join(',', @{$self->{'_headers'}}); |
|
|
0
|
|
|
|
|
|
|
|
77
|
0
|
|
|
|
|
|
print "$_\n"; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub getUniqCols { |
|
83
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
84
|
0
|
|
|
|
|
|
my ($sUniqCols) = " "; |
|
85
|
0
|
|
|
|
|
|
foreach my $iNo ( 0 .. (scalar(@{$self->{'_headers'}}) - 1)) { |
|
|
0
|
|
|
|
|
|
|
|
86
|
0
|
|
|
|
|
|
my $iField = $iNo + 1; |
|
87
|
0
|
|
|
|
|
|
my $sCmd = "cut -f$iField -d , $self->{'_file'} | sed s/' '//g | wc -l;"; |
|
88
|
0
|
|
|
|
|
|
$sCmd .= "cut -f$iField -d , $self->{'_file'} | sed s/' '//g | sort | uniq | wc -l"; |
|
89
|
0
|
|
|
|
|
|
my ($iCount1, $iCount2) = split("\n",`$sCmd`); |
|
90
|
|
|
|
|
|
|
|
|
91
|
0
|
0
|
|
|
|
|
if ( $iCount1 == $iCount2) { |
|
92
|
0
|
|
|
|
|
|
$sUniqCols .= $self->{'_headers'}->[$iNo].","; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
else { |
|
95
|
0
|
|
|
|
|
|
next; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
} |
|
98
|
0
|
|
|
|
|
|
chop ($sUniqCols); |
|
99
|
0
|
|
|
|
|
|
return $sUniqCols; |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub buildHeaderHash { |
|
104
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
105
|
0
|
|
|
|
|
|
my $iColNo = 1; |
|
106
|
0
|
|
|
|
|
|
foreach my $sCol (@{$self->{'_headers'}}) { |
|
|
0
|
|
|
|
|
|
|
|
107
|
0
|
|
|
|
|
|
$sCol =~ s/\s+//g; |
|
108
|
0
|
|
|
|
|
|
$sCol =~ s/\n//g; |
|
109
|
0
|
|
|
|
|
|
$headerHash{$sCol} = $iColNo; |
|
110
|
0
|
|
|
|
|
|
$iColNo++; |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub getHeaders { |
|
116
|
0
|
|
|
0
|
0
|
|
my ($self,$sFile) = @_; |
|
117
|
0
|
|
|
|
|
|
print "file is $sFile \n"; |
|
118
|
0
|
|
|
|
|
|
my @headers = split(',', `head -1 $sFile`); |
|
119
|
0
|
|
|
|
|
|
$self->{'_headers'} = \@headers; |
|
120
|
0
|
|
|
|
|
|
buildHeaderHash($self); |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub cleanUp { |
|
124
|
0
|
|
|
0
|
0
|
|
foreach my $sFile (@cleanup){ |
|
125
|
0
|
|
|
|
|
|
`rm -f $sFile`; |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
1; |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
__END__ |