line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
2
|
|
|
|
|
|
|
# Copyright (c) 2002-2003 Vivendi Universal Net USA |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# May be copied under the same terms as perl itself. |
5
|
|
|
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# Database-like operations on tab-delimited files. |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# Given two files: |
11
|
|
|
|
|
|
|
# band_data.tab with fields band_id, band_name, and band_status |
12
|
|
|
|
|
|
|
# song_data.tab with fields song_id, band_id, song_title |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
# The following sequence is more or less equivalent to |
15
|
|
|
|
|
|
|
# |
16
|
|
|
|
|
|
|
# SELECT song_id, band_data.band_id AS band_id, |
17
|
|
|
|
|
|
|
# song_title, band_name, |
18
|
|
|
|
|
|
|
# int(band_id/1000) AS band_dir |
19
|
|
|
|
|
|
|
# FROM song_data INNER JOIN band_data ON song_data.band_id=band_data.band_id |
20
|
|
|
|
|
|
|
# WHERE band_status = 'APPROVED' |
21
|
|
|
|
|
|
|
# ORDER BY band_name |
22
|
|
|
|
|
|
|
# INTO TABLE songband |
23
|
|
|
|
|
|
|
# |
24
|
|
|
|
|
|
|
# |
25
|
|
|
|
|
|
|
# $band_data = Text::TabTable->import_headered("band_data.tab") ; |
26
|
|
|
|
|
|
|
# $song_data = Text::TabTable->import_headered("song_data.tab") ; |
27
|
|
|
|
|
|
|
# $joined = $song_data->join($band_data, "band_id", "band_id", "INNER") ; |
28
|
|
|
|
|
|
|
# $selected = $joined->select( |
29
|
|
|
|
|
|
|
# [ |
30
|
|
|
|
|
|
|
# 'song_id', |
31
|
|
|
|
|
|
|
# ['band_data.band_id', 'band_id'], |
32
|
|
|
|
|
|
|
# 'song_title', |
33
|
|
|
|
|
|
|
# 'band_name', |
34
|
|
|
|
|
|
|
# [ sub { int($_[0] / 1000) }, "band_dir", ["band_id"]], |
35
|
|
|
|
|
|
|
# ], |
36
|
|
|
|
|
|
|
# |
37
|
|
|
|
|
|
|
# sub { $_[0]->band_status eq 'APPROVED' }, |
38
|
|
|
|
|
|
|
# ) ; |
39
|
|
|
|
|
|
|
# $out = $selected->order("band_name") ; |
40
|
|
|
|
|
|
|
# $out->export_headered("songband.tab") ; |
41
|
|
|
|
|
|
|
# |
42
|
|
|
|
|
|
|
############################################################################## |
43
|
|
|
|
|
|
|
# |
44
|
|
|
|
|
|
|
# You can speed up LEFT and INNER joins on primary keys if you create an index |
45
|
|
|
|
|
|
|
# for the primary key column on the *right-side* table using |
46
|
|
|
|
|
|
|
# |
47
|
|
|
|
|
|
|
# $righttable->build_primary_index("band_id") ; |
48
|
|
|
|
|
|
|
# $newtable = $lefttable->join($righttable, "band_id", "band_id", "LEFT") ; |
49
|
|
|
|
|
|
|
# |
50
|
|
|
|
|
|
|
# If both tables are already sorted by the primary key because order() was |
51
|
|
|
|
|
|
|
# previously used, this will be slower. |
52
|
|
|
|
|
|
|
# |
53
|
|
|
|
|
|
|
# The index will not be used for RIGHT joins. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
package Text::TabTable ; |
56
|
|
|
|
|
|
|
|
57
|
1
|
|
|
1
|
|
749
|
use strict ; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
58
|
1
|
|
|
1
|
|
4
|
use Carp ; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
162
|
|
59
|
1
|
|
|
1
|
|
1122
|
use Data::Dumper ; |
|
1
|
|
|
|
|
11085
|
|
|
1
|
|
|
|
|
68
|
|
60
|
1
|
|
|
1
|
|
10
|
use Fcntl qw(O_WRONLY O_EXCL O_CREAT) ; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
62
|
|
61
|
|
|
|
|
|
|
|
62
|
1
|
|
|
1
|
|
5
|
use vars qw($SORT $JOIN $VERBOSE $TMPDIR $VERSION) ; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
13886
|
|
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
$VERSION = "1.02" ; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
$TMPDIR = "." ; |
67
|
|
|
|
|
|
|
$SORT = "/bin/sort" ; |
68
|
|
|
|
|
|
|
$JOIN = "/usr/bin/join" ; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
$VERBOSE=$ENV{TABTABLE_VERBOSE} ; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
#### |
73
|
|
|
|
|
|
|
# Constructor. Takes a tab delimited file with a field name |
74
|
|
|
|
|
|
|
# header line and returns a TabTable object. Parses the header line |
75
|
|
|
|
|
|
|
# and creates a temporary file without the header line. |
76
|
|
|
|
|
|
|
#### |
77
|
|
|
|
|
|
|
sub import_headered |
78
|
|
|
|
|
|
|
{ |
79
|
0
|
|
|
0
|
0
|
|
my ($package,$fname) = @_ ; |
80
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
81
|
|
|
|
|
|
|
|
82
|
0
|
0
|
|
|
|
|
carp "importing $fname" if $VERBOSE ; |
83
|
0
|
0
|
|
|
|
|
open(F, $fname) || return undef ; |
84
|
0
|
0
|
|
|
|
|
open(NEWF, ">$newf") || return undef ; |
85
|
0
|
|
|
|
|
|
my $header = ; |
86
|
0
|
|
|
|
|
|
my $buf ; |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# copy the unheadered version of the file to a new file. |
89
|
0
|
|
|
|
|
|
while( read(F, $buf, 2048) ) { |
90
|
0
|
|
|
|
|
|
print NEWF $buf ; |
91
|
|
|
|
|
|
|
} |
92
|
0
|
|
|
|
|
|
close F ; |
93
|
0
|
|
|
|
|
|
close NEWF ; |
94
|
|
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
chomp $header ; |
96
|
0
|
|
|
|
|
|
my @fieldnames = split(/\t/, $header) ; |
97
|
0
|
|
|
|
|
|
my @fields = map { Text::TabTable::Field->new($_) } @fieldnames ; |
|
0
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
|
my $name = $fname ; |
100
|
0
|
|
|
|
|
|
$name =~ s/\..*$// ; # remove extensions |
101
|
0
|
|
|
|
|
|
$name =~ s@.*\/@@ ; # remove path |
102
|
|
|
|
|
|
|
|
103
|
0
|
|
|
|
|
|
my $self = { |
104
|
|
|
|
|
|
|
filename => $newf, |
105
|
|
|
|
|
|
|
fieldlist => Text::TabTable::FieldList->new(@fields), |
106
|
|
|
|
|
|
|
name => $name, |
107
|
|
|
|
|
|
|
} ; |
108
|
0
|
|
|
|
|
|
bless $self, $package ; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
#### |
112
|
|
|
|
|
|
|
# Alternate constructor. Takes a tab delimited file *without* a field name |
113
|
|
|
|
|
|
|
# header line, plus the field names, and returns a TabTable object. This |
114
|
|
|
|
|
|
|
# saves time because it doesn't require making a tempfile without the header. |
115
|
|
|
|
|
|
|
#### |
116
|
|
|
|
|
|
|
sub import_unheadered |
117
|
|
|
|
|
|
|
{ |
118
|
0
|
|
|
0
|
0
|
|
my ($package,$fname, @fieldnames) = @_ ; |
119
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
120
|
|
|
|
|
|
|
|
121
|
0
|
0
|
|
|
|
|
carp "importing $fname (unheadered)" if $VERBOSE ; |
122
|
0
|
0
|
0
|
|
|
|
return undef if !-f $fname || !-r $fname ; |
123
|
|
|
|
|
|
|
|
124
|
0
|
|
|
|
|
|
my @fields = map { Text::TabTable::Field->new($_) } @fieldnames ; |
|
0
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
|
126
|
0
|
|
|
|
|
|
my $name = $fname ; |
127
|
0
|
|
|
|
|
|
$name =~ s/\..*$// ; # remove extensions |
128
|
0
|
|
|
|
|
|
$name =~ s@.*\/@@ ; # remove path |
129
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
my $self = { |
131
|
|
|
|
|
|
|
filename => $fname, |
132
|
|
|
|
|
|
|
dontdelete => 1, # so we know it's not a tempfile. |
133
|
|
|
|
|
|
|
fieldlist => Text::TabTable::FieldList->new(@fields), |
134
|
|
|
|
|
|
|
name => $name, |
135
|
|
|
|
|
|
|
} ; |
136
|
0
|
|
|
|
|
|
bless $self, $package ; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
#### |
141
|
|
|
|
|
|
|
# Undoes the escaping done by MediaExtractor. |
142
|
|
|
|
|
|
|
#### |
143
|
|
|
|
|
|
|
sub unescape |
144
|
|
|
|
|
|
|
{ |
145
|
0
|
|
|
0
|
0
|
|
my ($str) = @_ ; |
146
|
|
|
|
|
|
|
|
147
|
0
|
|
|
|
|
|
my $x = $str ; |
148
|
0
|
|
|
|
|
|
$str =~ s/\\\\/\xff/g ; |
149
|
0
|
|
|
|
|
|
$str =~ s/\\n/\n/g ; |
150
|
0
|
|
|
|
|
|
$str =~ s/\\t/\t/g ; |
151
|
0
|
|
|
|
|
|
$str =~ s/\xff/\\/g ; |
152
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
return $str ; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
#### |
157
|
|
|
|
|
|
|
# This is the same escaping done by MediaExtractor. |
158
|
|
|
|
|
|
|
#### |
159
|
|
|
|
|
|
|
sub escape |
160
|
|
|
|
|
|
|
{ |
161
|
0
|
|
|
0
|
0
|
|
my ($str) = @_ ; |
162
|
|
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
$str =~ s/\\/\\\\/g; |
164
|
0
|
|
|
|
|
|
$str =~ s/\t/\\t/g; |
165
|
0
|
|
|
|
|
|
$str =~ s/\n/\\n/g; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
|
168
|
0
|
|
|
|
|
|
return $str ; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
#### |
172
|
|
|
|
|
|
|
# Writes out a table as a file with a header. |
173
|
|
|
|
|
|
|
#### |
174
|
|
|
|
|
|
|
sub export_headered |
175
|
|
|
|
|
|
|
{ |
176
|
0
|
|
|
0
|
0
|
|
my ($self, $filename) = @_ ; |
177
|
0
|
0
|
|
|
|
|
carp "exporting $self->{name} to $filename" if $VERBOSE ; |
178
|
|
|
|
|
|
|
|
179
|
0
|
0
|
|
|
|
|
open(F, ">$filename") || croak "$filename: $!\n" ; |
180
|
0
|
|
|
|
|
|
print F $self->{fieldlist}->as_string(), "\n" ; |
181
|
0
|
|
|
|
|
|
close F ; |
182
|
0
|
|
|
|
|
|
system "cat $self->{filename} >> $filename" ; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
#### |
186
|
|
|
|
|
|
|
# Writes out a table as a file without a header. |
187
|
|
|
|
|
|
|
#### |
188
|
|
|
|
|
|
|
sub export_unheadered |
189
|
|
|
|
|
|
|
{ |
190
|
0
|
|
|
0
|
0
|
|
my ($self, $filename) = @_ ; |
191
|
0
|
0
|
|
|
|
|
carp "exporting $self->{name} to $filename" if $VERBOSE ; |
192
|
0
|
|
|
|
|
|
system "cp $self->{filename} $filename" ; |
193
|
0
|
0
|
|
|
|
|
if ($?) { |
194
|
0
|
|
|
|
|
|
unlink $filename ; |
195
|
0
|
|
|
|
|
|
croak "can't export to $filename: $!" ; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
#### |
200
|
|
|
|
|
|
|
# Returns a new table that has only one of each value in the specified |
201
|
|
|
|
|
|
|
# column. |
202
|
|
|
|
|
|
|
#### |
203
|
|
|
|
|
|
|
sub uniq |
204
|
|
|
|
|
|
|
{ |
205
|
0
|
|
|
0
|
0
|
|
my ($table, $colname) = @_ ; |
206
|
|
|
|
|
|
|
|
207
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($colname) ; |
208
|
0
|
0
|
|
|
|
|
croak "no field $colname in table" if !$colname ; |
209
|
|
|
|
|
|
|
|
210
|
0
|
0
|
0
|
|
|
|
if (!$table->{sorted_column} || $table->{sorted_column} != $colnum) { |
211
|
0
|
|
|
|
|
|
my $name = $table->name() ; |
212
|
0
|
|
|
|
|
|
$table = $table->order($colname) ; |
213
|
0
|
|
|
|
|
|
$table->name($name) ; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
0
|
0
|
|
|
|
|
carp "uniquing $table->{name} by $colname" if $VERBOSE ; |
217
|
|
|
|
|
|
|
|
218
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
219
|
|
|
|
|
|
|
|
220
|
0
|
0
|
|
|
|
|
open(OLDF, "<$table->{filename}") || die ; |
221
|
0
|
0
|
|
|
|
|
open(NEWF, ">$newf") || croak "$newf: $!\n" ; |
222
|
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
my $oldval = undef ; |
224
|
0
|
|
|
|
|
|
while () { |
225
|
0
|
|
|
|
|
|
chomp ; |
226
|
0
|
|
|
|
|
|
my @f = split(/\t/, $_, -1) ; |
227
|
0
|
0
|
0
|
|
|
|
if ($oldval ne $f[$colnum-1] || !defined $oldval) { |
228
|
0
|
|
|
|
|
|
print NEWF $_, "\n" ; |
229
|
0
|
|
|
|
|
|
$oldval = $f[$colnum-1] ; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
0
|
|
|
|
|
|
close(OLDF) ; |
234
|
0
|
|
|
|
|
|
close(NEWF) ; |
235
|
|
|
|
|
|
|
|
236
|
0
|
|
|
|
|
|
my $newtable = { |
237
|
|
|
|
|
|
|
filename => $newf, |
238
|
|
|
|
|
|
|
fieldlist => $table->{fieldlist}->deepcopy(), |
239
|
|
|
|
|
|
|
sorted_colnum => $colnum, |
240
|
|
|
|
|
|
|
name => $table->name(), |
241
|
|
|
|
|
|
|
} ; |
242
|
|
|
|
|
|
|
|
243
|
0
|
0
|
|
|
|
|
if (!defined wantarray ) { |
244
|
0
|
|
|
|
|
|
carp "Warning: Useless uniq in void context." ; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
|
bless $newtable, ref $table ; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
#### |
251
|
|
|
|
|
|
|
# Export to a cdb file. |
252
|
|
|
|
|
|
|
# There will be a special key "*FIELDNAMES*" whose value is a tab |
253
|
|
|
|
|
|
|
# separated list of the names of the fields. |
254
|
|
|
|
|
|
|
# The rest of the cdb file will be of the form key => tab-delimited-values. |
255
|
|
|
|
|
|
|
# |
256
|
|
|
|
|
|
|
# The key must be unique; however as a special case multiple blank keys |
257
|
|
|
|
|
|
|
# are allowed to be present; only the first one is used. This is a hack, |
258
|
|
|
|
|
|
|
# but is too good an optimization to pass up. |
259
|
|
|
|
|
|
|
#### |
260
|
|
|
|
|
|
|
sub export_cdb |
261
|
|
|
|
|
|
|
{ |
262
|
0
|
|
|
0
|
0
|
|
my ($self, $filename, $colname) = @_ ; |
263
|
0
|
|
|
|
|
|
require CDB_File ; |
264
|
0
|
0
|
|
|
|
|
carp "exporting $self->{name} to cdb $filename" if $VERBOSE ; |
265
|
|
|
|
|
|
|
|
266
|
0
|
0
|
|
|
|
|
my $t = CDB_File->new($filename, "$filename.new$$") or croak "$filename: $!" ; |
267
|
|
|
|
|
|
|
|
268
|
0
|
|
|
|
|
|
$t->insert("*FIELDNAMES*", $self->{fieldlist}->as_string()) ; |
269
|
|
|
|
|
|
|
|
270
|
0
|
0
|
|
|
|
|
open(F, "< $self->{filename}") || die "$self->{filename}: $!" ; |
271
|
0
|
|
|
|
|
|
my $colnum = $self->{fieldlist}->find_colnum($colname) ; |
272
|
0
|
|
|
|
|
|
$colnum-- ; |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# Create a regex to skip over the columns before the key column and |
275
|
|
|
|
|
|
|
# collect the key column in $1. |
276
|
0
|
|
|
|
|
|
my $regex = '^' . ('[^\t]*\t' x $colnum) . '([^\t]*)' ; |
277
|
0
|
|
|
|
|
|
$regex = qr($regex) ; |
278
|
0
|
|
|
|
|
|
my $didblankkey = 0 ; |
279
|
0
|
|
|
|
|
|
while () { |
280
|
0
|
|
|
|
|
|
chomp ; |
281
|
0
|
0
|
|
|
|
|
/$regex/ || die ; |
282
|
0
|
|
|
|
|
|
my $key = $1 ; |
283
|
|
|
|
|
|
|
|
284
|
0
|
0
|
0
|
|
|
|
next if $key eq '' && $didblankkey++ ; |
285
|
|
|
|
|
|
|
|
286
|
0
|
|
|
|
|
|
$t->insert($key, $_) ; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
0
|
|
|
|
|
|
$t->finish() ; |
290
|
0
|
|
|
|
|
|
close(F) ; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
#### |
294
|
|
|
|
|
|
|
# Returns the named column of the table as an array. |
295
|
|
|
|
|
|
|
#### |
296
|
|
|
|
|
|
|
sub export_column |
297
|
|
|
|
|
|
|
{ |
298
|
0
|
|
|
0
|
0
|
|
my ($table, $colname) = @_ ; |
299
|
0
|
0
|
|
|
|
|
carp "exporting $table->{name} column $colname" if $VERBOSE ; |
300
|
|
|
|
|
|
|
|
301
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($colname) ; |
302
|
|
|
|
|
|
|
|
303
|
0
|
0
|
|
|
|
|
if (!defined $colnum) { |
304
|
0
|
|
|
|
|
|
croak "no column $colname" ; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
0
|
|
|
|
|
|
my @arr ; |
308
|
0
|
0
|
|
|
|
|
open(CUT, "cut -f$colnum $table->{filename}|") || die ; |
309
|
0
|
|
|
|
|
|
while(defined ($_=)) { |
310
|
0
|
|
|
|
|
|
chomp ; |
311
|
0
|
|
|
|
|
|
push @arr, $_ ; |
312
|
|
|
|
|
|
|
} |
313
|
0
|
|
|
|
|
|
close CUT ; |
314
|
|
|
|
|
|
|
|
315
|
0
|
|
|
|
|
|
return @arr ; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
#### |
319
|
|
|
|
|
|
|
# Returns a new TabTable that is sorted by the requested column. Dies |
320
|
|
|
|
|
|
|
# if no such column. You can specify -descending=>1 or -numeric =>1 |
321
|
|
|
|
|
|
|
# after the fieldname. |
322
|
|
|
|
|
|
|
# |
323
|
|
|
|
|
|
|
# The sort is stable, so you can sort on multiple fields by doing |
324
|
|
|
|
|
|
|
# multiple sorts, with the most important one last. |
325
|
|
|
|
|
|
|
#### |
326
|
|
|
|
|
|
|
sub order |
327
|
|
|
|
|
|
|
{ |
328
|
0
|
|
|
0
|
0
|
|
my ($self, $fieldname, %args) = @_ ; |
329
|
0
|
0
|
|
|
|
|
carp "sorting $self->{name} by $fieldname" if $VERBOSE ; |
330
|
|
|
|
|
|
|
|
331
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# This is a flag that gets turned off if the sort is not alphabetic |
334
|
|
|
|
|
|
|
# and ascending. In that case, the sort order is not correct for the |
335
|
|
|
|
|
|
|
# join() method, and so join() would have to re-sort. |
336
|
0
|
|
|
|
|
|
my $joinable_sort = 1 ; |
337
|
|
|
|
|
|
|
|
338
|
0
|
|
|
|
|
|
my $colnum = $self->{fieldlist}->find_colnum($fieldname) ; |
339
|
0
|
0
|
|
|
|
|
if (!$colnum) { |
340
|
0
|
|
|
|
|
|
unlink $newf ; |
341
|
0
|
|
|
|
|
|
croak "No such field $fieldname" ; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
0
|
|
|
|
|
|
my @sortargs = ("-s", |
345
|
|
|
|
|
|
|
"-T$TMPDIR", |
346
|
|
|
|
|
|
|
"-t\t", |
347
|
|
|
|
|
|
|
"-k$colnum,$colnum", |
348
|
|
|
|
|
|
|
"-o$newf", |
349
|
|
|
|
|
|
|
$self->{filename} |
350
|
|
|
|
|
|
|
) ; |
351
|
0
|
0
|
|
|
|
|
if ($args{-descending}) { |
352
|
0
|
|
|
|
|
|
unshift @sortargs, "-r" ; |
353
|
0
|
|
|
|
|
|
$joinable_sort = 0 ; |
354
|
|
|
|
|
|
|
} |
355
|
0
|
0
|
|
|
|
|
if ($args{-numeric}) { |
356
|
0
|
|
|
|
|
|
unshift @sortargs, "-n" ; |
357
|
0
|
|
|
|
|
|
$joinable_sort = 0 ; |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
0
|
|
|
|
|
|
system $SORT, @sortargs ; |
361
|
|
|
|
|
|
|
|
362
|
0
|
0
|
|
|
|
|
if ($?) { |
363
|
0
|
|
|
|
|
|
unlink $newf ; |
364
|
0
|
|
|
|
|
|
croak "sort error" ; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
0
|
0
|
|
|
|
|
my $newtable = { |
368
|
|
|
|
|
|
|
filename => $newf, |
369
|
|
|
|
|
|
|
fieldlist => $self->{fieldlist}->deepcopy(), |
370
|
|
|
|
|
|
|
sorted_colnum => $joinable_sort ? $colnum : undef, |
371
|
|
|
|
|
|
|
name => $self->name(), |
372
|
|
|
|
|
|
|
} ; |
373
|
|
|
|
|
|
|
|
374
|
0
|
0
|
|
|
|
|
if (!defined wantarray ) { |
375
|
0
|
|
|
|
|
|
carp "Warning: Useless order in void context." ; |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
|
378
|
0
|
|
|
|
|
|
bless $newtable, ref $self ; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
#### |
382
|
|
|
|
|
|
|
# Returns a new table with two columns. The first column will contain the |
383
|
|
|
|
|
|
|
# unique values of the specified field, the second column will contain the |
384
|
|
|
|
|
|
|
# number of occurrences of that value. |
385
|
|
|
|
|
|
|
#### |
386
|
|
|
|
|
|
|
sub groupby_and_count |
387
|
|
|
|
|
|
|
{ |
388
|
0
|
|
|
0
|
0
|
|
my ($table, $fieldname, $newfieldname, %args) = @_ ; |
389
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($fieldname) ; |
390
|
0
|
0
|
|
|
|
|
if (!$colnum) { |
391
|
0
|
|
|
|
|
|
croak "No such field $fieldname" ; |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
# Create a temporary table that is sorted by the specified column. |
394
|
0
|
|
|
|
|
|
my $sortedtable = $table->order($fieldname, %args); |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# Taken from uniq(), One pass through the file counting the number |
397
|
|
|
|
|
|
|
# of times the specified column appears and creating a new file |
398
|
|
|
|
|
|
|
# with the specified column and count. |
399
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
400
|
0
|
0
|
|
|
|
|
open(OLDF, "<$sortedtable->{filename}") || die ; |
401
|
0
|
0
|
|
|
|
|
open(NEWF, ">$newf") || croak "$newf: $!\n" ; |
402
|
|
|
|
|
|
|
|
403
|
0
|
|
|
|
|
|
my $count = 0 ; |
404
|
0
|
|
|
|
|
|
my $oldval = undef ; |
405
|
0
|
|
|
|
|
|
while () { |
406
|
0
|
|
|
|
|
|
chomp ; |
407
|
0
|
|
|
|
|
|
my @f = split(/\t/, $_, -1) ; |
408
|
0
|
0
|
|
|
|
|
if (!defined $oldval) { |
|
|
0
|
|
|
|
|
|
409
|
0
|
|
|
|
|
|
$oldval = $f[$colnum-1] ; |
410
|
0
|
|
|
|
|
|
$count = 1 ; |
411
|
|
|
|
|
|
|
} elsif ($oldval ne $f[$colnum-1]) { |
412
|
0
|
|
|
|
|
|
print NEWF $oldval, "\t", $count, "\n" ; |
413
|
0
|
|
|
|
|
|
$oldval = $f[$colnum-1] ; |
414
|
0
|
|
|
|
|
|
$count = 1 ; |
415
|
|
|
|
|
|
|
} else { |
416
|
0
|
|
|
|
|
|
$count++ ; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
} |
419
|
0
|
0
|
|
|
|
|
if (defined $oldval) { |
420
|
0
|
|
|
|
|
|
print NEWF $oldval, "\t", $count, "\n" ; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
|
423
|
0
|
|
|
|
|
|
close(OLDF) ; |
424
|
0
|
|
|
|
|
|
close(NEWF) ; |
425
|
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
|
my @newfieldnames = ($fieldname, $newfieldname) ; |
427
|
0
|
|
|
|
|
|
my @newfields = map { Text::TabTable::Field->new($_) } @newfieldnames ; |
|
0
|
|
|
|
|
|
|
428
|
0
|
|
|
|
|
|
my $newtable = { |
429
|
|
|
|
|
|
|
filename => $newf, |
430
|
|
|
|
|
|
|
fieldlist => Text::TabTable::FieldList->new(@newfields), |
431
|
|
|
|
|
|
|
sorted_colnum => 1, |
432
|
|
|
|
|
|
|
name => $table->name(), |
433
|
|
|
|
|
|
|
} ; |
434
|
|
|
|
|
|
|
|
435
|
0
|
|
|
|
|
|
bless $newtable, ref $table ; |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
#### |
439
|
|
|
|
|
|
|
# Takes a list of pairs of oldname=>newname and changes the names of fields |
440
|
|
|
|
|
|
|
# of the table. This wipes out the old field names entirely. |
441
|
|
|
|
|
|
|
sub rename_fields |
442
|
|
|
|
|
|
|
{ |
443
|
0
|
|
|
0
|
0
|
|
my ($table, @renames) = @_ ; |
444
|
|
|
|
|
|
|
|
445
|
0
|
|
|
|
|
|
my $oldname ; |
446
|
|
|
|
|
|
|
my $newname ; |
447
|
0
|
|
|
|
|
|
my @fields = $table->{fieldlist}->fields() ; |
448
|
0
|
|
0
|
|
|
|
while ( ($oldname = shift(@renames)) && ($newname = shift(@renames)) ) { |
449
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($oldname) ; |
450
|
0
|
|
|
|
|
|
$fields[$colnum-1]->set_name($newname) ; |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
#### |
455
|
|
|
|
|
|
|
# Gets or sets the name of the table. |
456
|
|
|
|
|
|
|
#### |
457
|
|
|
|
|
|
|
sub name |
458
|
|
|
|
|
|
|
{ |
459
|
0
|
|
|
0
|
0
|
|
my ($self, $name) = @_ ; |
460
|
0
|
0
|
|
|
|
|
if (defined $name) { |
461
|
0
|
|
|
|
|
|
$self->{name} = $name ; |
462
|
|
|
|
|
|
|
} |
463
|
0
|
|
|
|
|
|
return $self->{name} ; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
#### |
467
|
|
|
|
|
|
|
# takes a table and exports it as a cdb file, creating a primary key |
468
|
|
|
|
|
|
|
# index. This can make joins go faster if this table is on the right side |
469
|
|
|
|
|
|
|
# of the join, since neither table has to be sorted, and building a cdb |
470
|
|
|
|
|
|
|
# is generally faster than sorting (~O(n) instead of O(nLogn). |
471
|
|
|
|
|
|
|
#### |
472
|
|
|
|
|
|
|
sub build_primary_index |
473
|
|
|
|
|
|
|
{ |
474
|
0
|
|
|
0
|
0
|
|
my ($self, $colname) = @_ ; |
475
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
476
|
|
|
|
|
|
|
|
477
|
0
|
|
|
|
|
|
$self->export_cdb($newf, $colname) ; |
478
|
0
|
|
|
|
|
|
my $colnum = $self->{fieldlist}->find_colnum($colname) ; |
479
|
|
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
$self->{cdb}{$colnum} = $newf ; |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
#### |
484
|
|
|
|
|
|
|
# Returns a new table created by joining the two tables on a specified |
485
|
|
|
|
|
|
|
# column. the $side parameter can be specified as LEFT or RIGHT to |
486
|
|
|
|
|
|
|
# create LEFT/RIGHT joins, or can be INNER, OUTER, or undef. |
487
|
|
|
|
|
|
|
# $leftfield/$rightfield are the field names to be used in the two tables |
488
|
|
|
|
|
|
|
# for joining. |
489
|
|
|
|
|
|
|
# |
490
|
|
|
|
|
|
|
# If the right table has a primary index on the join column (created |
491
|
|
|
|
|
|
|
# by build_primary_index()), and it's either a left or inner join, |
492
|
|
|
|
|
|
|
# a simpler join algorithm will be used that does not require sorting. |
493
|
|
|
|
|
|
|
# |
494
|
|
|
|
|
|
|
# Both tables must have names. Tables get names either by setting them |
495
|
|
|
|
|
|
|
# with the name() method, or from the filename in the import_headered |
496
|
|
|
|
|
|
|
# method. |
497
|
|
|
|
|
|
|
#### |
498
|
|
|
|
|
|
|
sub join |
499
|
|
|
|
|
|
|
{ |
500
|
0
|
|
|
0
|
0
|
|
my ($lefttable, $righttable, $leftfield, $rightfield, $side) = @_ ; |
501
|
|
|
|
|
|
|
|
502
|
0
|
0
|
0
|
|
|
|
if (!$lefttable->name() || !$righttable->name()) { |
503
|
0
|
|
|
|
|
|
croak "both tables must have name()s" ; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
|
my $leftcol = $lefttable->{fieldlist}->find_colnum($leftfield) ; |
507
|
0
|
0
|
|
|
|
|
croak "no field $leftfield in left table" if !$leftfield ; |
508
|
0
|
|
|
|
|
|
my $rightcol = $righttable->{fieldlist}->find_colnum($rightfield) ; |
509
|
0
|
0
|
|
|
|
|
croak "no field $rightfield in right table" if !$rightfield ; |
510
|
|
|
|
|
|
|
|
511
|
0
|
0
|
0
|
|
|
|
if ($righttable->{cdb}{$rightcol} && $side ne 'RIGHT' && $side ne 'OUTER') { |
|
|
|
0
|
|
|
|
|
512
|
0
|
0
|
|
|
|
|
if ($VERBOSE) { |
513
|
0
|
|
|
|
|
|
carp "index joining $lefttable->{name} with $righttable->{name}" ; |
514
|
|
|
|
|
|
|
} |
515
|
0
|
|
|
|
|
|
return $lefttable->_join_using_index($righttable, |
516
|
|
|
|
|
|
|
$leftcol, $rightcol, $side) ; |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
# tables must be sorted by field. |
520
|
0
|
0
|
0
|
|
|
|
if (!$lefttable->{sorted_colnum} || $lefttable->{sorted_colnum} ne $leftcol) { |
521
|
0
|
|
|
|
|
|
$lefttable = $lefttable->order($leftfield) ; |
522
|
|
|
|
|
|
|
} |
523
|
0
|
0
|
0
|
|
|
|
if (!$righttable->{sorted_colnum} || $righttable->{sorted_colnum} ne $rightcol) { |
524
|
0
|
|
|
|
|
|
$righttable = $righttable->order($rightfield) ; |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
0
|
0
|
|
|
|
|
carp "joining $lefttable->{name} with $righttable->{name}" if $VERBOSE ; |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
# create a format string for join(1). |
531
|
|
|
|
|
|
|
# Looks like |
532
|
|
|
|
|
|
|
# 1.1,1.2,1.3,1.4, ... ,2.1,2.2,2.3, ... |
533
|
|
|
|
|
|
|
|
534
|
0
|
|
|
|
|
|
my $format = |
535
|
0
|
|
|
|
|
|
join(",", map { "1.$_" } 1..$lefttable->{fieldlist}->fieldcount()) |
536
|
|
|
|
|
|
|
. "," . |
537
|
0
|
|
|
|
|
|
join(",", map { "2.$_" } 1..$righttable->{fieldlist}->fieldcount()) ; |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
|
541
|
0
|
|
|
|
|
|
my $command = "$JOIN -1 $leftcol -2 $rightcol -o $format -t '\t' " ; |
542
|
|
|
|
|
|
|
|
543
|
0
|
0
|
0
|
|
|
|
if ($side eq 'LEFT') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
544
|
0
|
|
|
|
|
|
$command .= "-a 1 " |
545
|
|
|
|
|
|
|
} elsif ($side eq 'RIGHT') { |
546
|
0
|
|
|
|
|
|
$command .= "-a 2 " ; |
547
|
|
|
|
|
|
|
} elsif ($side eq 'OUTER') { |
548
|
0
|
|
|
|
|
|
$command .= "-a 1 -a 2 " ; |
549
|
|
|
|
|
|
|
} elsif (defined $side && $side ne 'INNER') { |
550
|
0
|
|
|
|
|
|
croak "invalid side argument" ; |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
|
553
|
0
|
|
|
|
|
|
$command .= $lefttable->{filename} . " " ; |
554
|
0
|
|
|
|
|
|
$command .= $righttable->{filename} . " " ; |
555
|
|
|
|
|
|
|
|
556
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
557
|
0
|
|
|
|
|
|
$command .= "> $newf" ; |
558
|
|
|
|
|
|
|
|
559
|
0
|
|
|
|
|
|
system $command ; |
560
|
|
|
|
|
|
|
|
561
|
0
|
0
|
|
|
|
|
croak "join failed" if $? ; |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
# We've now joined the files, so we just have to create a fieldlist |
565
|
|
|
|
|
|
|
# for the new table. |
566
|
|
|
|
|
|
|
|
567
|
0
|
|
|
|
|
|
my $leftlistcopy = $lefttable->{fieldlist}->deepcopy ; |
568
|
0
|
|
|
|
|
|
foreach my $field ($leftlistcopy->fields) { |
569
|
0
|
|
|
|
|
|
$field->add_name( $lefttable->name . "." . $field->name() ) ; |
570
|
|
|
|
|
|
|
} |
571
|
0
|
|
|
|
|
|
my $rightlistcopy = $righttable->{fieldlist}->deepcopy ; |
572
|
0
|
|
|
|
|
|
foreach my $field ($rightlistcopy->fields) { |
573
|
0
|
|
|
|
|
|
$field->add_name( $righttable->name . "." . $field->name() ) ; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
# we've now got copies of the two fieldlists, with new aliases for |
577
|
|
|
|
|
|
|
# the field names of the form tablename.fieldname. Construct |
578
|
|
|
|
|
|
|
# a final field list from these two lists. |
579
|
|
|
|
|
|
|
|
580
|
0
|
|
|
|
|
|
my @fields = ($leftlistcopy->fields, $rightlistcopy->fields) ; |
581
|
|
|
|
|
|
|
|
582
|
0
|
|
|
|
|
|
my $newtable = { |
583
|
|
|
|
|
|
|
name => $lefttable->{name}, |
584
|
|
|
|
|
|
|
filename => $newf, |
585
|
|
|
|
|
|
|
fieldlist => Text::TabTable::FieldList->new(@fields), |
586
|
|
|
|
|
|
|
sorted_colnum => $leftcol, |
587
|
|
|
|
|
|
|
} ; |
588
|
|
|
|
|
|
|
|
589
|
0
|
0
|
|
|
|
|
if (!defined wantarray ) { |
590
|
0
|
|
|
|
|
|
carp "Warning: Useless join in void context." ; |
591
|
|
|
|
|
|
|
} |
592
|
0
|
|
|
|
|
|
bless $newtable, ref $lefttable ; |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
#### |
596
|
|
|
|
|
|
|
# called by ->join() to perform a join when there is an appropriate cdb index |
597
|
|
|
|
|
|
|
# present on the right side table and it's not a right join. |
598
|
|
|
|
|
|
|
# |
599
|
|
|
|
|
|
|
# The column numbers passed in are 1-based. |
600
|
|
|
|
|
|
|
#### |
601
|
|
|
|
|
|
|
sub _join_using_index |
602
|
|
|
|
|
|
|
{ |
603
|
0
|
|
|
0
|
|
|
my ($lefttable, $righttable, $leftcol, $rightcol, $side) = @_ ; |
604
|
|
|
|
|
|
|
|
605
|
0
|
|
|
|
|
|
my $isleftjoin = $side eq 'LEFT' ; |
606
|
0
|
|
|
|
|
|
my $emptyright ; |
607
|
0
|
0
|
|
|
|
|
if ($isleftjoin) { |
608
|
0
|
|
|
|
|
|
$emptyright = "\t" x ($righttable->{fieldlist}->fieldcount() - 1) ; |
609
|
|
|
|
|
|
|
} |
610
|
|
|
|
|
|
|
|
611
|
0
|
0
|
|
|
|
|
open(LEFTF, $lefttable->{filename}) || croak ; |
612
|
0
|
|
|
|
|
|
require CDB_File ; |
613
|
|
|
|
|
|
|
|
614
|
0
|
|
|
|
|
|
my $newf = _make_tempfile() ; |
615
|
0
|
0
|
|
|
|
|
open(NEWF, ">$newf") || croak "$newf: $!" ; |
616
|
|
|
|
|
|
|
|
617
|
0
|
|
|
|
|
|
my %right ; |
618
|
|
|
|
|
|
|
|
619
|
0
|
0
|
|
|
|
|
tie (%right, 'CDB_File', $righttable->{cdb}{$rightcol}) || die ; |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
# create a regex that will extract the join field from a tab delimited |
622
|
|
|
|
|
|
|
# line. |
623
|
0
|
|
|
|
|
|
my $regex = '^' . ('[^\t]*\t' x ($leftcol-1)) . '([^\t]*)' ; |
624
|
0
|
|
|
|
|
|
$regex = qr($regex) ; |
625
|
|
|
|
|
|
|
|
626
|
0
|
|
|
|
|
|
my $leftfieldcount = $lefttable->{fieldlist}->fieldcount() ; |
627
|
0
|
|
|
|
|
|
my $rightfieldcount = $righttable->{fieldlist}->fieldcount() ; |
628
|
|
|
|
|
|
|
|
629
|
0
|
|
|
|
|
|
while () { |
630
|
0
|
|
|
|
|
|
chomp ; |
631
|
0
|
|
|
|
|
|
_add_missing_tabs(\$_, $leftfieldcount) ; |
632
|
0
|
0
|
|
|
|
|
/$regex/ || die "malformed temp file in line $_" ; |
633
|
0
|
|
|
|
|
|
my $key = $1 ; |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
|
636
|
0
|
0
|
|
|
|
|
if (exists $right{$key}) { |
637
|
|
|
|
|
|
|
# found a match. Print a complete line. |
638
|
0
|
|
|
|
|
|
my $val = $right{$key} ; |
639
|
0
|
|
|
|
|
|
_add_missing_tabs(\$val, $rightfieldcount) ; |
640
|
0
|
|
|
|
|
|
print NEWF CORE::join("\t", $_, $val), "\n" ; |
641
|
|
|
|
|
|
|
} else { |
642
|
|
|
|
|
|
|
# didn't match. print a line if it's a left join, otherwise skip it. |
643
|
0
|
0
|
|
|
|
|
if ($isleftjoin) { |
644
|
0
|
|
|
|
|
|
print NEWF CORE::join("\t", $_, $emptyright), "\n" ; |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
|
649
|
0
|
|
|
|
|
|
untie %right ; |
650
|
|
|
|
|
|
|
|
651
|
0
|
|
|
|
|
|
close LEFTF ; |
652
|
0
|
|
|
|
|
|
close NEWF ; |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
# We've now joined the files, so we just have to create a fieldlist |
655
|
|
|
|
|
|
|
# for the new table. |
656
|
|
|
|
|
|
|
|
657
|
0
|
|
|
|
|
|
my $leftlistcopy = $lefttable->{fieldlist}->deepcopy ; |
658
|
0
|
|
|
|
|
|
foreach my $field ($leftlistcopy->fields) { |
659
|
0
|
|
|
|
|
|
$field->add_name( $lefttable->name . "." . $field->name() ) ; |
660
|
|
|
|
|
|
|
} |
661
|
0
|
|
|
|
|
|
my $rightlistcopy = $righttable->{fieldlist}->deepcopy ; |
662
|
0
|
|
|
|
|
|
foreach my $field ($rightlistcopy->fields) { |
663
|
0
|
|
|
|
|
|
$field->add_name( $righttable->name . "." . $field->name() ) ; |
664
|
|
|
|
|
|
|
} |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
# we've now got copies of the two fieldlists, with new aliases for |
667
|
|
|
|
|
|
|
# the field names of the form tablename.fieldname. Construct |
668
|
|
|
|
|
|
|
# a final field list from these two lists. |
669
|
|
|
|
|
|
|
|
670
|
0
|
|
|
|
|
|
my @fields = ($leftlistcopy->fields, $rightlistcopy->fields) ; |
671
|
|
|
|
|
|
|
|
672
|
0
|
|
|
|
|
|
my $newtable = { |
673
|
|
|
|
|
|
|
name => $lefttable->{name}, |
674
|
|
|
|
|
|
|
filename => $newf, |
675
|
|
|
|
|
|
|
fieldlist => Text::TabTable::FieldList->new(@fields), |
676
|
|
|
|
|
|
|
} ; |
677
|
|
|
|
|
|
|
|
678
|
0
|
0
|
|
|
|
|
if (!defined wantarray ) { |
679
|
0
|
|
|
|
|
|
carp "Warning: Useless join in void context." ; |
680
|
|
|
|
|
|
|
} |
681
|
0
|
|
|
|
|
|
bless $newtable, ref $lefttable ; |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
# Given a ref to a string that's supposed to have n columns, make sure there are |
686
|
|
|
|
|
|
|
# n-1 tabs by adding more at the end. |
687
|
|
|
|
|
|
|
sub _add_missing_tabs |
688
|
|
|
|
|
|
|
{ |
689
|
0
|
|
|
0
|
|
|
my ($strref, $n) = @_ ; |
690
|
|
|
|
|
|
|
|
691
|
0
|
|
|
|
|
|
my $tabcount = ($$strref =~ tr/\t/\t/) ; |
692
|
|
|
|
|
|
|
|
693
|
0
|
0
|
|
|
|
|
if ($tabcount < $n-1) { |
694
|
0
|
|
|
|
|
|
$$strref .= "\t" x ( $n-1-$tabcount ) ; |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
} |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
#### |
700
|
|
|
|
|
|
|
# processes a table and creates a new one with different stuff. |
701
|
|
|
|
|
|
|
# |
702
|
|
|
|
|
|
|
# parameters: |
703
|
|
|
|
|
|
|
# table is a Text::TabTable object. |
704
|
|
|
|
|
|
|
# |
705
|
|
|
|
|
|
|
# fieldspecs is a listref containing items of any of the following forms |
706
|
|
|
|
|
|
|
# fieldname ( a simple scalar ) |
707
|
|
|
|
|
|
|
# [fieldname, newfieldname] ( for "cd_table.id as cd_id") |
708
|
|
|
|
|
|
|
# [sub {...}, newfieldname, [list of fieldnames]] |
709
|
|
|
|
|
|
|
# (for calculated fields. The sub receives values for the |
710
|
|
|
|
|
|
|
# listed fields as parameters, and returns the new value) |
711
|
|
|
|
|
|
|
# fieldspecs can also be a simple "*", which returns all fields unchanged. |
712
|
|
|
|
|
|
|
# |
713
|
|
|
|
|
|
|
# wheresub is an optional subref. It is passed an object with getvalue, |
714
|
|
|
|
|
|
|
# setvalue, and autoloaded field-name-named methods to get and set values |
715
|
|
|
|
|
|
|
# of fields by name. It is expected to return a true value if the |
716
|
|
|
|
|
|
|
# row should be included in the output. |
717
|
|
|
|
|
|
|
#### |
718
|
|
|
|
|
|
|
sub select |
719
|
|
|
|
|
|
|
{ |
720
|
0
|
|
|
0
|
0
|
|
my ($table, $fieldspecs, $wheresub) = @_ ; |
721
|
|
|
|
|
|
|
|
722
|
0
|
0
|
|
|
|
|
carp "selecting from $table->{name}" if $VERBOSE ; |
723
|
|
|
|
|
|
|
|
724
|
0
|
|
|
|
|
|
my $newtable = { name => $table->{name} } ; |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
# this gets set to zero if there is a where clause or calculated columns. |
727
|
|
|
|
|
|
|
# Otherwise it just runs /bin/cut to pick the right columns. |
728
|
0
|
|
|
|
|
|
my $cut_ok = 1 ; |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
# create a field list for the new table based on the selected fields. |
731
|
|
|
|
|
|
|
# also create an array saying how to calculate each output field. |
732
|
0
|
|
|
|
|
|
my @fieldrules ; |
733
|
0
|
0
|
0
|
|
|
|
if (!ref $fieldspecs && ($fieldspecs eq '*' || !defined $fieldspecs)) { |
|
|
|
0
|
|
|
|
|
734
|
|
|
|
|
|
|
# simple case. Just copy the fieldlist. |
735
|
0
|
|
|
|
|
|
undef $fieldspecs ; |
736
|
0
|
|
|
|
|
|
$newtable->{fieldlist} = $table->{fieldlist}->deepcopy() ; |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
# we don't need any rules in this case; input = output |
739
|
|
|
|
|
|
|
} else { |
740
|
|
|
|
|
|
|
# make a new fieldlist, and rules. |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
# @fields is the list of Field objects being built. |
743
|
0
|
|
|
|
|
|
my @fields ; |
744
|
|
|
|
|
|
|
|
745
|
0
|
|
|
|
|
|
foreach my $fieldspec (@$fieldspecs) { |
746
|
0
|
0
|
|
|
|
|
if (!ref $fieldspec) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
747
|
|
|
|
|
|
|
# a simple scalar, representing a field name. |
748
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($fieldspec) ; |
749
|
0
|
0
|
|
|
|
|
if (!$colnum) { |
750
|
0
|
|
|
|
|
|
croak "no field $fieldspec in table" ; |
751
|
|
|
|
|
|
|
} |
752
|
|
|
|
|
|
|
# find_colnum returns 1-based column numbers. |
753
|
0
|
|
|
|
|
|
push @fieldrules, $colnum - 1 ; |
754
|
0
|
|
|
|
|
|
push @fields, Text::TabTable::Field->new($fieldspec) ; |
755
|
|
|
|
|
|
|
} elsif (@$fieldspec == 2) { |
756
|
|
|
|
|
|
|
# a field name to look up and what to call it in the output table. |
757
|
|
|
|
|
|
|
|
758
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($fieldspec->[0]) ; |
759
|
0
|
0
|
|
|
|
|
if (!$colnum) { |
760
|
0
|
|
|
|
|
|
croak "no field $fieldspec->[0] in table" ; |
761
|
|
|
|
|
|
|
} |
762
|
|
|
|
|
|
|
# find_colnum returns 1-based column numbers. |
763
|
0
|
|
|
|
|
|
push @fieldrules, $colnum - 1 ; |
764
|
0
|
|
|
|
|
|
push @fields, Text::TabTable::Field->new($fieldspec->[1]) ; |
765
|
|
|
|
|
|
|
} elsif (@$fieldspec == 3) { |
766
|
|
|
|
|
|
|
# A subref, a new column name, and a list of columns to pass to |
767
|
|
|
|
|
|
|
# the subref. |
768
|
0
|
|
|
|
|
|
my @paramcols ; |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
# since we're doing a calculated column, we have to use perl instead |
771
|
|
|
|
|
|
|
# of /bin/cut. |
772
|
0
|
|
|
|
|
|
$cut_ok = 0 ; |
773
|
|
|
|
|
|
|
|
774
|
0
|
|
|
|
|
|
foreach my $fieldname (@{$fieldspec->[2]}) { |
|
0
|
|
|
|
|
|
|
775
|
0
|
|
|
|
|
|
my $colnum = $table->{fieldlist}->find_colnum($fieldname) ; |
776
|
0
|
0
|
|
|
|
|
if (!$colnum) { |
777
|
0
|
|
|
|
|
|
croak "no field $fieldname in table" ; |
778
|
|
|
|
|
|
|
} |
779
|
0
|
|
|
|
|
|
push @paramcols, $colnum-1 ; |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
# create a rule consiting of the subref followed by a listref of |
782
|
|
|
|
|
|
|
# what columns to get parameters from. |
783
|
0
|
|
|
|
|
|
push @fieldrules, [ $fieldspec->[0], \@paramcols ] ; |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
# fieldname is the new name passed in. |
786
|
0
|
|
|
|
|
|
push @fields, Text::TabTable::Field->new($fieldspec->[1]) ; |
787
|
|
|
|
|
|
|
} else { |
788
|
0
|
|
|
|
|
|
croak "bad fieldspec" ; |
789
|
|
|
|
|
|
|
} |
790
|
|
|
|
|
|
|
} |
791
|
|
|
|
|
|
|
|
792
|
0
|
|
|
|
|
|
$newtable->{fieldlist} = Text::TabTable::FieldList->new(@fields) ; |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
|
795
|
0
|
|
|
|
|
|
$newtable->{filename} = _make_tempfile() ; |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
# build a hash saying which field is in which position in the input. |
798
|
0
|
|
|
|
|
|
my %fieldloc ; |
799
|
0
|
|
|
|
|
|
$table->_build_fieldloc(\%fieldloc) ; |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
# cut won't reorder columns, which angers me. So if the columns aren't |
802
|
|
|
|
|
|
|
# sorted, don't use cut. |
803
|
0
|
|
|
|
|
|
my $test_unsort = CORE::join(" ", @fieldrules) ; |
804
|
0
|
|
|
|
|
|
my $test_sort = CORE::join(" ", sort {$a <=> $b } @fieldrules) ; |
|
0
|
|
|
|
|
|
|
805
|
0
|
0
|
|
|
|
|
if ($test_unsort ne $test_sort) { |
806
|
0
|
|
|
|
|
|
$cut_ok = 0 ; |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
# now $newtable->{fieldlist} contains the table names. We're done with that. |
810
|
|
|
|
|
|
|
# $newtable->{filename} contains the name of the file to be created. |
811
|
|
|
|
|
|
|
# @fieldrules tells us how to create each output column. |
812
|
|
|
|
|
|
|
# %fieldloc says which column number a field name can be found in. |
813
|
|
|
|
|
|
|
# so it's time to start processing. |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
|
816
|
0
|
0
|
0
|
|
|
|
if ($cut_ok && !$wheresub && $fieldspecs) { |
|
|
|
0
|
|
|
|
|
817
|
|
|
|
|
|
|
# there aren't any calculated columns, and there's no where clause, |
818
|
|
|
|
|
|
|
# so we can just use cut to pick the columns they wanted. |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
# @fieldrules has zero-based column numbers. Make one-based. |
821
|
0
|
|
|
|
|
|
my @cutfields = map { $_ + 1 } @fieldrules ; |
|
0
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
|
823
|
0
|
0
|
|
|
|
|
carp "...selecting using cut" if $VERBOSE ; |
824
|
0
|
|
|
|
|
|
system "cut -f" . CORE::join(',', @cutfields) . |
825
|
|
|
|
|
|
|
" $table->{filename} > $newtable->{filename}" ; |
826
|
0
|
0
|
|
|
|
|
if ($?) { |
827
|
0
|
|
|
|
|
|
unlink $newtable->{filename} ; |
828
|
0
|
|
|
|
|
|
croak "cut error in select" ; |
829
|
|
|
|
|
|
|
} |
830
|
|
|
|
|
|
|
} else { |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
# process the file using perl. |
833
|
|
|
|
|
|
|
|
834
|
0
|
0
|
|
|
|
|
open(INFILE, $table->{filename}) || croak "can't open table file" ; |
835
|
0
|
0
|
|
|
|
|
open(OUTFILE, ">$newtable->{filename}") || croak "can't open output table file" ; |
836
|
|
|
|
|
|
|
|
837
|
0
|
|
|
|
|
|
while() { |
838
|
0
|
|
|
|
|
|
chomp ; |
839
|
0
|
|
|
|
|
|
my @values = split(/\t/, $_, 999999) ; |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
# run the where clause subroutine, if any, and skip if it says to. |
842
|
0
|
0
|
|
|
|
|
if ($wheresub) { |
843
|
0
|
|
|
|
|
|
my $rowdata = bless([\%fieldloc, \@values], 'Text::TabTable::DataRow') ; |
844
|
0
|
0
|
|
|
|
|
next if !&$wheresub($rowdata) ; |
845
|
|
|
|
|
|
|
} |
846
|
|
|
|
|
|
|
|
847
|
0
|
0
|
|
|
|
|
if (!$fieldspecs) { |
848
|
|
|
|
|
|
|
# select *. Just print them out. |
849
|
0
|
|
|
|
|
|
print OUTFILE CORE::join("\t", @values), "\n" ; |
850
|
|
|
|
|
|
|
} else { |
851
|
0
|
|
|
|
|
|
my @outvals ; |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
# use the @fieldrules to create @outvals from @values. |
854
|
0
|
|
|
|
|
|
foreach my $rule (@fieldrules) { |
855
|
0
|
0
|
|
|
|
|
if (!ref $rule) { |
856
|
0
|
|
|
|
|
|
push @outvals, $values[$rule] ; |
857
|
|
|
|
|
|
|
} else { |
858
|
|
|
|
|
|
|
# it's an arrayref containing a subref and a bunch of column |
859
|
|
|
|
|
|
|
# numbers. Call the subroutine with the values pointed to |
860
|
|
|
|
|
|
|
# by those column numbers and use the return value as the |
861
|
|
|
|
|
|
|
# output field value. |
862
|
0
|
|
|
|
|
|
my @params = map { $values[$_] } @{$rule->[1]} ; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
863
|
0
|
|
|
|
|
|
my $subref = $rule->[0] ; |
864
|
0
|
|
|
|
|
|
push @outvals, scalar(&$subref(@params)) ; |
865
|
|
|
|
|
|
|
} |
866
|
|
|
|
|
|
|
} |
867
|
|
|
|
|
|
|
|
868
|
0
|
|
|
|
|
|
print OUTFILE CORE::join("\t", @outvals), "\n" ; |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
|
872
|
0
|
|
|
|
|
|
close OUTFILE ; |
873
|
0
|
|
|
|
|
|
close INFILE ; |
874
|
|
|
|
|
|
|
} |
875
|
|
|
|
|
|
|
|
876
|
0
|
0
|
|
|
|
|
if (!defined wantarray ) { |
877
|
0
|
|
|
|
|
|
carp "Warning: select used in void context." ; |
878
|
|
|
|
|
|
|
} |
879
|
|
|
|
|
|
|
|
880
|
0
|
|
|
|
|
|
return bless $newtable, ref $table ; |
881
|
|
|
|
|
|
|
} |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
#### |
884
|
|
|
|
|
|
|
# Fills in a hash with a mapping from field name to column number. |
885
|
|
|
|
|
|
|
#### |
886
|
|
|
|
|
|
|
sub _build_fieldloc |
887
|
|
|
|
|
|
|
{ |
888
|
0
|
|
|
0
|
|
|
my ($table, $hr_fieldloc) = @_ ; |
889
|
0
|
|
|
|
|
|
my $pos = 0 ; |
890
|
0
|
|
|
|
|
|
foreach my $field ($table->{fieldlist}->fields()) { |
891
|
0
|
|
|
|
|
|
foreach my $fieldname ($field->names()) { |
892
|
0
|
0
|
|
|
|
|
$hr_fieldloc->{$fieldname} = $pos if !exists $hr_fieldloc->{$fieldname} ; |
893
|
|
|
|
|
|
|
} |
894
|
0
|
|
|
|
|
|
$pos++ ; |
895
|
|
|
|
|
|
|
} |
896
|
|
|
|
|
|
|
} |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
#### |
899
|
|
|
|
|
|
|
# Runs through the rows of a tab table, calling a subroutine for each |
900
|
|
|
|
|
|
|
# line. The subroutine has the same calling convention as the where |
901
|
|
|
|
|
|
|
# part of a select() call. |
902
|
|
|
|
|
|
|
# |
903
|
|
|
|
|
|
|
# This is like a select(undef,sub {}) but does not return a new table. |
904
|
|
|
|
|
|
|
#### |
905
|
|
|
|
|
|
|
sub iterate |
906
|
|
|
|
|
|
|
{ |
907
|
0
|
|
|
0
|
0
|
|
my ($table, $wheresub) = @_ ; |
908
|
0
|
0
|
|
|
|
|
carp "iterating over $table->{name}" if $VERBOSE ; |
909
|
|
|
|
|
|
|
|
910
|
0
|
0
|
|
|
|
|
open(INFILE, $table->{filename}) || croak "can't open table file" ; |
911
|
|
|
|
|
|
|
|
912
|
0
|
0
|
|
|
|
|
die if !$wheresub ; |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
# build a hash saying which field is in which position in the input. |
915
|
0
|
|
|
|
|
|
my %fieldloc ; |
916
|
0
|
|
|
|
|
|
$table->_build_fieldloc(\%fieldloc) ; |
917
|
|
|
|
|
|
|
|
918
|
0
|
|
|
|
|
|
while() { |
919
|
0
|
|
|
|
|
|
chomp ; |
920
|
0
|
|
|
|
|
|
my @values = split(/\t/, $_, 999999) ; |
921
|
|
|
|
|
|
|
|
922
|
0
|
|
|
|
|
|
my $rowdata = bless([\%fieldloc, \@values], 'Text::TabTable::DataRow') ; |
923
|
0
|
|
|
|
|
|
&$wheresub($rowdata) ; |
924
|
|
|
|
|
|
|
} |
925
|
|
|
|
|
|
|
|
926
|
0
|
|
|
|
|
|
close INFILE ; |
927
|
|
|
|
|
|
|
} |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
#### |
930
|
|
|
|
|
|
|
# Returns an object with a next() method, which gives one row object each |
931
|
|
|
|
|
|
|
# time next() is called. |
932
|
|
|
|
|
|
|
# |
933
|
|
|
|
|
|
|
# If -unescape=>1, the tab/backslash/newline escaping will be removed. |
934
|
|
|
|
|
|
|
#### |
935
|
|
|
|
|
|
|
sub make_iterator |
936
|
|
|
|
|
|
|
{ |
937
|
0
|
|
|
0
|
0
|
|
my ($table, %args) = @_ ; |
938
|
0
|
0
|
|
|
|
|
carp "iterating over $table->{name}" if $VERBOSE ; |
939
|
|
|
|
|
|
|
|
940
|
0
|
0
|
|
|
|
|
if ($args{-unescape}) { |
941
|
0
|
|
|
|
|
|
return Text::TabTable::Iterator::Unescaping->new($table) ; |
942
|
|
|
|
|
|
|
} else { |
943
|
0
|
|
|
|
|
|
return Text::TabTable::Iterator->new($table) ; |
944
|
|
|
|
|
|
|
} |
945
|
|
|
|
|
|
|
} |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
sub DESTROY |
948
|
|
|
|
|
|
|
{ |
949
|
0
|
|
|
0
|
|
|
my $self = shift ; |
950
|
0
|
0
|
|
|
|
|
if (!$self->{dontdelete}) { |
951
|
0
|
|
|
|
|
|
unlink $self->{filename} ; |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
|
954
|
0
|
|
|
|
|
|
foreach my $cdbfile ( values %{$self->{cdb}} ) { |
|
0
|
|
|
|
|
|
|
955
|
0
|
|
|
|
|
|
unlink $cdbfile ; |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
} |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
#### |
960
|
|
|
|
|
|
|
# Creates a temporary file and returns its filename. |
961
|
|
|
|
|
|
|
#### |
962
|
1
|
|
|
1
|
|
15
|
use vars qw(@TEMPFILES) ; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
388
|
|
963
|
|
|
|
|
|
|
sub _make_tempfile |
964
|
|
|
|
|
|
|
{ |
965
|
0
|
|
|
0
|
|
|
my $watchdog = 0 ; |
966
|
0
|
|
|
|
|
|
while ($watchdog++ < 1000) { |
967
|
0
|
|
|
|
|
|
my $fname = "$TMPDIR/$$." . int(rand(9999999)) ; |
968
|
0
|
|
|
|
|
|
my $status = sysopen TEMPF, $fname, O_CREAT | O_WRONLY | O_EXCL, 0666 ; |
969
|
0
|
|
|
|
|
|
close(TEMPF) ; |
970
|
0
|
0
|
|
|
|
|
if (defined $status) { |
971
|
0
|
|
|
|
|
|
push @TEMPFILES, $fname ; |
972
|
0
|
|
|
|
|
|
return $fname ; |
973
|
|
|
|
|
|
|
} |
974
|
|
|
|
|
|
|
} |
975
|
0
|
|
|
|
|
|
die "couldn't create a temporary file\n" ; |
976
|
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
END { |
979
|
|
|
|
|
|
|
# delete any tempfiles that didn't get deleted. This shouldn't happen. |
980
|
1
|
|
|
1
|
|
308
|
foreach my $file (@TEMPFILES) { |
981
|
0
|
|
|
|
|
|
unlink $file ; |
982
|
|
|
|
|
|
|
} |
983
|
|
|
|
|
|
|
} |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
############################################################################### |
987
|
|
|
|
|
|
|
# Text::TabTable::Field |
988
|
|
|
|
|
|
|
############################################################################### |
989
|
|
|
|
|
|
|
package Text::TabTable::Field ; |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
sub new |
992
|
|
|
|
|
|
|
{ |
993
|
0
|
|
|
0
|
|
|
my ($package, $fieldname) = @_ ; |
994
|
0
|
|
|
|
|
|
my $self = { names => [$fieldname] } ; |
995
|
0
|
|
|
|
|
|
bless $self, $package ; |
996
|
|
|
|
|
|
|
} |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
sub name |
999
|
|
|
|
|
|
|
{ |
1000
|
0
|
|
|
0
|
|
|
return $_[0]->{names}->[0] ; |
1001
|
|
|
|
|
|
|
} |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
#### |
1004
|
|
|
|
|
|
|
# Return all the names for this field. |
1005
|
|
|
|
|
|
|
sub names |
1006
|
|
|
|
|
|
|
{ |
1007
|
0
|
|
|
0
|
|
|
return @{$_[0]->{names}} ; |
|
0
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
} |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
sub has_name |
1011
|
|
|
|
|
|
|
{ |
1012
|
0
|
|
|
0
|
|
|
my $self = shift ; |
1013
|
0
|
|
|
|
|
|
my $name = shift ; |
1014
|
0
|
0
|
|
|
|
|
if (grep( $_ eq $name, @{$self->{names}})) { |
|
0
|
|
|
|
|
|
|
1015
|
0
|
|
|
|
|
|
return 1 ; |
1016
|
|
|
|
|
|
|
} else { |
1017
|
0
|
|
|
|
|
|
return 0 ; |
1018
|
|
|
|
|
|
|
} |
1019
|
|
|
|
|
|
|
} |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
#### |
1022
|
|
|
|
|
|
|
# Add an alias name to a field. |
1023
|
|
|
|
|
|
|
#### |
1024
|
|
|
|
|
|
|
sub add_name |
1025
|
|
|
|
|
|
|
{ |
1026
|
0
|
|
|
0
|
|
|
my ($self, @names) = @_ ; |
1027
|
0
|
|
|
|
|
|
push @{$self->{names}}, @names ; |
|
0
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
} |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
#### |
1031
|
|
|
|
|
|
|
# sets the name of the field, wiping out all previous aliases. |
1032
|
|
|
|
|
|
|
#### |
1033
|
|
|
|
|
|
|
sub set_name |
1034
|
|
|
|
|
|
|
{ |
1035
|
0
|
|
|
0
|
|
|
my ($self, $name) = @_ ; |
1036
|
0
|
|
|
|
|
|
$self->{names} = [$name] ; |
1037
|
|
|
|
|
|
|
} |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
############################################################################### |
1040
|
|
|
|
|
|
|
# Text::TabTable::FieldList |
1041
|
|
|
|
|
|
|
# |
1042
|
|
|
|
|
|
|
# Represents the list of Fields on a table. |
1043
|
|
|
|
|
|
|
############################################################################### |
1044
|
|
|
|
|
|
|
package Text::TabTable::FieldList ; |
1045
|
|
|
|
|
|
|
|
1046
|
1
|
|
|
1
|
|
5
|
use Data::Dumper ; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
113
|
|
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
sub new |
1049
|
|
|
|
|
|
|
{ |
1050
|
0
|
|
|
0
|
|
|
my ($package, @fields) = @_ ; |
1051
|
|
|
|
|
|
|
|
1052
|
0
|
|
|
|
|
|
bless { fields => \@fields }, $package ; |
1053
|
|
|
|
|
|
|
} |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
sub deepcopy |
1056
|
|
|
|
|
|
|
{ |
1057
|
0
|
|
|
0
|
|
|
my ($self) = @_ ; |
1058
|
|
|
|
|
|
|
|
1059
|
1
|
|
|
1
|
|
8
|
no strict ; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
292
|
|
1060
|
0
|
|
|
|
|
|
my $newent = eval Dumper($self) ; |
1061
|
|
|
|
|
|
|
} |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
#### |
1064
|
|
|
|
|
|
|
# Returns a 1-based column number for the given field, or undef |
1065
|
|
|
|
|
|
|
# if not present. |
1066
|
|
|
|
|
|
|
#### |
1067
|
|
|
|
|
|
|
sub find_colnum |
1068
|
|
|
|
|
|
|
{ |
1069
|
0
|
|
|
0
|
|
|
my ($self, $fieldname) = @_ ; |
1070
|
0
|
|
|
|
|
|
for (my $i = 0 ; $i < @{$self->{fields}} ; $i++) { |
|
0
|
|
|
|
|
|
|
1071
|
0
|
0
|
|
|
|
|
if ($self->{fields}->[$i]->has_name($fieldname)) { |
1072
|
0
|
|
|
|
|
|
return $i+1 ; |
1073
|
|
|
|
|
|
|
} |
1074
|
|
|
|
|
|
|
} |
1075
|
0
|
|
|
|
|
|
return undef ; |
1076
|
|
|
|
|
|
|
} |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
#### |
1079
|
|
|
|
|
|
|
# Return the number of fields. |
1080
|
|
|
|
|
|
|
#### |
1081
|
|
|
|
|
|
|
sub fieldcount |
1082
|
|
|
|
|
|
|
{ |
1083
|
0
|
|
|
0
|
|
|
my $self = shift ; |
1084
|
0
|
|
|
|
|
|
return scalar(@{$self->{fields}}) ; |
|
0
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
} |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
#### |
1088
|
|
|
|
|
|
|
# Return field names as a tab-delimited string. |
1089
|
|
|
|
|
|
|
#### |
1090
|
|
|
|
|
|
|
sub as_string |
1091
|
|
|
|
|
|
|
{ |
1092
|
0
|
|
|
0
|
|
|
my $self = shift ; |
1093
|
0
|
|
|
|
|
|
my @names = map {$_->name} @{$self->{fields}} ; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1094
|
0
|
|
|
|
|
|
return join("\t", @names) ; |
1095
|
|
|
|
|
|
|
} |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
sub fields |
1098
|
|
|
|
|
|
|
{ |
1099
|
0
|
|
|
0
|
|
|
return @{$_[0]->{fields}} ; |
|
0
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
} |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
############################################################################### |
1103
|
|
|
|
|
|
|
# Text::TabTable::Iterator |
1104
|
|
|
|
|
|
|
# Text::TabTable::Iterator::Unescaping |
1105
|
|
|
|
|
|
|
# |
1106
|
|
|
|
|
|
|
# A thing that returns one row at a time from a table. |
1107
|
|
|
|
|
|
|
# The ::Unescaping version will run Text::TabTable::Unescape on all the |
1108
|
|
|
|
|
|
|
# data first. |
1109
|
|
|
|
|
|
|
############################################################################### |
1110
|
|
|
|
|
|
|
package Text::TabTable::Iterator ; |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
@Text::TabTable::Iterator::Unescaping::ISA = ('MP3Com::TabTable::Iterator') ; |
1113
|
|
|
|
|
|
|
|
1114
|
1
|
|
|
1
|
|
6
|
use strict ; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
33
|
|
1115
|
1
|
|
|
1
|
|
5
|
use Carp ; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
355
|
|
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
sub new |
1118
|
|
|
|
|
|
|
{ |
1119
|
0
|
|
|
0
|
|
|
my ($package, $table) = @_ ; |
1120
|
|
|
|
|
|
|
|
1121
|
0
|
|
|
|
|
|
require IO::File ; |
1122
|
|
|
|
|
|
|
|
1123
|
0
|
|
|
|
|
|
my %fieldloc ; |
1124
|
0
|
|
|
|
|
|
$table->_build_fieldloc(\%fieldloc) ; |
1125
|
|
|
|
|
|
|
|
1126
|
0
|
|
0
|
|
|
|
my $fh = IO::File->new("<$table->{filename}") || croak ; |
1127
|
|
|
|
|
|
|
|
1128
|
0
|
|
|
|
|
|
my $self = { |
1129
|
|
|
|
|
|
|
fieldloc => \%fieldloc, |
1130
|
|
|
|
|
|
|
fh => $fh |
1131
|
|
|
|
|
|
|
} ; |
1132
|
0
|
|
|
|
|
|
bless $self, $package ; |
1133
|
|
|
|
|
|
|
} |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
sub next |
1136
|
|
|
|
|
|
|
{ |
1137
|
0
|
|
|
0
|
|
|
my ($self) = @_ ; |
1138
|
0
|
|
|
|
|
|
my $line = $self->{fh}->getline() ; |
1139
|
|
|
|
|
|
|
|
1140
|
0
|
0
|
|
|
|
|
if (!$line) { |
1141
|
0
|
|
|
|
|
|
return undef ; |
1142
|
0
|
|
|
|
|
|
delete $self->{fh} ; |
1143
|
|
|
|
|
|
|
} |
1144
|
0
|
|
|
|
|
|
chomp $line ; |
1145
|
0
|
|
|
|
|
|
my @values = split(/\t/, $line, -1) ; |
1146
|
|
|
|
|
|
|
|
1147
|
0
|
|
|
|
|
|
return bless([$self->{fieldloc}, \@values], 'Text::TabTable::DataRow') ; |
1148
|
|
|
|
|
|
|
} |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
sub Text::TabTable::Iterator::Unescaping::next |
1151
|
|
|
|
|
|
|
{ |
1152
|
0
|
|
|
0
|
|
|
my ($self) = @_ ; |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
# get a row and unescape the data in it. |
1155
|
|
|
|
|
|
|
|
1156
|
0
|
|
|
|
|
|
my $row = $self->Text::TabTable::Iterator::next() ; |
1157
|
0
|
0
|
|
|
|
|
return undef if !$row ; |
1158
|
|
|
|
|
|
|
|
1159
|
0
|
|
|
|
|
|
foreach my $val (@{$row->[1]}) { |
|
0
|
|
|
|
|
|
|
1160
|
0
|
|
|
|
|
|
$val = Text::TabTable::unescape($val) ; |
1161
|
|
|
|
|
|
|
} |
1162
|
|
|
|
|
|
|
|
1163
|
0
|
|
|
|
|
|
return $row ; |
1164
|
|
|
|
|
|
|
} |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
############################################################################### |
1167
|
|
|
|
|
|
|
# Text::TabTable::DataRow |
1168
|
|
|
|
|
|
|
# |
1169
|
|
|
|
|
|
|
# Represents a row of data from a TabTable. |
1170
|
|
|
|
|
|
|
############################################################################### |
1171
|
|
|
|
|
|
|
package Text::TabTable::DataRow ; |
1172
|
|
|
|
|
|
|
|
1173
|
1
|
|
|
1
|
|
5
|
use vars qw($AUTOLOAD) ; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
55
|
|
1174
|
1
|
|
|
1
|
|
4
|
use Carp ; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
44
|
|
1175
|
|
|
|
|
|
|
|
1176
|
1
|
|
|
1
|
|
5
|
use strict ; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
300
|
|
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
# This constructor is not actually used by select(); it blesses the |
1179
|
|
|
|
|
|
|
# right structure itself for speed purposes. |
1180
|
|
|
|
|
|
|
sub new |
1181
|
|
|
|
|
|
|
{ |
1182
|
0
|
|
|
0
|
|
|
my ($package, $name2colhash, $values) = @_ ; |
1183
|
0
|
|
|
|
|
|
bless [$name2colhash, $values], $package ; |
1184
|
|
|
|
|
|
|
} |
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
sub getvalue |
1187
|
|
|
|
|
|
|
{ |
1188
|
0
|
|
|
0
|
|
|
my $self = shift ; |
1189
|
0
|
|
|
|
|
|
my $name = shift ; |
1190
|
0
|
|
|
|
|
|
return $self->[1][ $self->[0]{$name} ] ; |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
sub setvalue |
1194
|
|
|
|
|
|
|
{ |
1195
|
0
|
|
|
0
|
|
|
my $self = shift ; |
1196
|
0
|
|
|
|
|
|
my $name = shift ; |
1197
|
0
|
|
|
|
|
|
my $newval = shift ; |
1198
|
0
|
|
|
|
|
|
$self->[1][$self->[0]{$name}] = $newval ; |
1199
|
|
|
|
|
|
|
} |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
# to save work for autoload. |
1202
|
0
|
|
|
0
|
|
|
sub DESTROY {} ; |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
#### |
1205
|
|
|
|
|
|
|
# implements field-named methods for getting values. |
1206
|
|
|
|
|
|
|
#### |
1207
|
|
|
|
|
|
|
sub AUTOLOAD |
1208
|
|
|
|
|
|
|
{ |
1209
|
0
|
|
|
0
|
|
|
my $self = shift ; |
1210
|
|
|
|
|
|
|
|
1211
|
0
|
|
|
|
|
|
my $name = $AUTOLOAD ; |
1212
|
0
|
|
|
|
|
|
$name =~ s/.*:// ; |
1213
|
|
|
|
|
|
|
|
1214
|
0
|
0
|
|
|
|
|
if (!exists $self->[0]{$name}) { |
1215
|
0
|
|
|
|
|
|
croak "No $name field in table" ; |
1216
|
|
|
|
|
|
|
} |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
# create a function to calculate it. |
1219
|
0
|
|
|
|
|
|
eval <
|
1220
|
|
|
|
|
|
|
sub $name { |
1221
|
|
|
|
|
|
|
return \$_[0]->[1][ \$_[0]->[0]{'$name'} ] ; |
1222
|
|
|
|
|
|
|
} |
1223
|
|
|
|
|
|
|
EOT |
1224
|
|
|
|
|
|
|
|
1225
|
0
|
|
|
|
|
|
return $self->$name() ; |
1226
|
|
|
|
|
|
|
} |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
1; |