| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Bio::Gonzales::Matrix::IO; |
|
2
|
|
|
|
|
|
|
|
|
3
|
8
|
|
|
8
|
|
275175
|
use warnings; |
|
|
8
|
|
|
|
|
36
|
|
|
|
8
|
|
|
|
|
293
|
|
|
4
|
8
|
|
|
8
|
|
49
|
use strict; |
|
|
8
|
|
|
|
|
16
|
|
|
|
8
|
|
|
|
|
157
|
|
|
5
|
8
|
|
|
8
|
|
45
|
use Carp; |
|
|
8
|
|
|
|
|
16
|
|
|
|
8
|
|
|
|
|
417
|
|
|
6
|
8
|
|
|
8
|
|
49
|
use Data::Dumper; |
|
|
8
|
|
|
|
|
13
|
|
|
|
8
|
|
|
|
|
379
|
|
|
7
|
8
|
|
|
8
|
|
1937
|
use List::MoreUtils qw/uniq zip/; |
|
|
8
|
|
|
|
|
45037
|
|
|
|
8
|
|
|
|
|
92
|
|
|
8
|
8
|
|
|
8
|
|
10512
|
use Bio::Gonzales::Util qw/flatten/; |
|
|
8
|
|
|
|
|
20
|
|
|
|
8
|
|
|
|
|
507
|
|
|
9
|
8
|
|
|
8
|
|
9208
|
use Text::CSV_XS qw/csv/; |
|
|
8
|
|
|
|
|
126783
|
|
|
|
8
|
|
|
|
|
569
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
8
|
|
|
8
|
|
3597
|
use Bio::Gonzales::Matrix::Util qw/uniq_rows/; |
|
|
8
|
|
|
|
|
23
|
|
|
|
8
|
|
|
|
|
517
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
8
|
|
|
8
|
|
137
|
use 5.010; |
|
|
8
|
|
|
|
|
27
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
8
|
|
|
8
|
|
57
|
use List::Util qw/max/; |
|
|
8
|
|
|
|
|
18
|
|
|
|
8
|
|
|
|
|
542
|
|
|
16
|
8
|
|
|
8
|
|
2946
|
use Bio::Gonzales::Util::File qw/open_on_demand slurpc expand_home/; |
|
|
8
|
|
|
|
|
31
|
|
|
|
8
|
|
|
|
|
701
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
8
|
|
|
8
|
|
60
|
use base 'Exporter'; |
|
|
8
|
|
|
|
|
24
|
|
|
|
8
|
|
|
|
|
32952
|
|
|
19
|
|
|
|
|
|
|
our ( @EXPORT, @EXPORT_OK, %EXPORT_TAGS ); |
|
20
|
|
|
|
|
|
|
our $VERSION = '0.083'; # VERSION |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
@EXPORT = qw(mslurp mspew lslurp miterate lspew dict_slurp dict_spew xcsv_slurp miterate_hash); |
|
23
|
|
|
|
|
|
|
%EXPORT_TAGS = (); |
|
24
|
|
|
|
|
|
|
@EXPORT_OK = qw(lspew xlsx_slurp xlsx_spew); |
|
25
|
|
|
|
|
|
|
my $COMMENT_RE = qr/^\s*#/; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub dict_slurp { |
|
28
|
6
|
|
|
6
|
1
|
5334
|
my ( $src, $cc ) = @_; |
|
29
|
|
|
|
|
|
|
|
|
30
|
6
|
50
|
33
|
|
|
34
|
croak "no config settings given" |
|
31
|
|
|
|
|
|
|
unless ( $cc && ref $cc eq 'HASH' ); |
|
32
|
|
|
|
|
|
|
|
|
33
|
6
|
|
33
|
|
|
17
|
$cc->{key_idx} //= $cc->{key_idcs}; |
|
34
|
6
|
|
33
|
|
|
14
|
$cc->{val_idx} //= $cc->{val_idcs}; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
croak "you have not specified key_idx" |
|
37
|
6
|
50
|
|
|
|
13
|
unless ( exists( $cc->{key_idx} ) ); |
|
38
|
|
|
|
|
|
|
|
|
39
|
6
|
|
50
|
|
|
13
|
$cc //= {}; |
|
40
|
6
|
|
|
|
|
74
|
my %c = ( |
|
41
|
|
|
|
|
|
|
sep => qr/\t/, |
|
42
|
|
|
|
|
|
|
header => undef, |
|
43
|
|
|
|
|
|
|
skip => -1, |
|
44
|
|
|
|
|
|
|
comment => $COMMENT_RE, |
|
45
|
|
|
|
|
|
|
key_idx => 0, |
|
46
|
|
|
|
|
|
|
record_filter => undef, |
|
47
|
|
|
|
|
|
|
commented_header => undef, |
|
48
|
|
|
|
|
|
|
concat_keys => 1, |
|
49
|
|
|
|
|
|
|
sort_keys => 0, |
|
50
|
|
|
|
|
|
|
strict => 0, |
|
51
|
|
|
|
|
|
|
%$cc |
|
52
|
|
|
|
|
|
|
); |
|
53
|
|
|
|
|
|
|
|
|
54
|
6
|
|
33
|
|
|
41
|
$c{header} //= $c{commented_header}; |
|
55
|
|
|
|
|
|
|
|
|
56
|
6
|
|
|
|
|
10
|
my $is_strict = $c{strict}; |
|
57
|
6
|
|
|
|
|
10
|
my $record_filter = $c{record_filter}; |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# concatenate keys to a big string |
|
60
|
6
|
|
|
|
|
8
|
my @kidcs; |
|
61
|
6
|
50
|
33
|
|
|
18
|
if ( $c{concat_keys} || !ref( $c{key_idx} ) ) { |
|
62
|
6
|
|
|
|
|
14
|
@kidcs = ( $c{key_idx} ); |
|
63
|
|
|
|
|
|
|
} else { |
|
64
|
|
|
|
|
|
|
# or treat them separately |
|
65
|
0
|
|
|
|
|
0
|
@kidcs = @{ $c{key_idx} }; |
|
|
0
|
|
|
|
|
0
|
|
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
6
|
|
|
|
|
10
|
my $vidx = $c{val_idx}; |
|
69
|
|
|
|
|
|
|
# make an array from it |
|
70
|
|
|
|
|
|
|
|
|
71
|
6
|
|
66
|
|
|
24
|
my $uniq = $c{uniq} // $c{uniq_vals} // $c{unique} // 0; |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
|
73
|
6
|
|
|
|
|
21
|
my ( $fh, $fh_was_open ) = open_on_demand( $src, '<' ); |
|
74
|
|
|
|
|
|
|
|
|
75
|
6
|
|
|
|
|
11
|
my @header; |
|
76
|
6
|
50
|
|
|
|
14
|
if ( $c{header} ) { |
|
77
|
0
|
|
|
|
|
0
|
while ( my $raw_row = <$fh> ) { |
|
78
|
0
|
0
|
0
|
|
|
0
|
if ( $c{comment} && $raw_row =~ /$c{comment}/ ) { |
|
79
|
0
|
0
|
|
|
|
0
|
if ( $c{commented_header} ) { |
|
80
|
0
|
|
|
|
|
0
|
$raw_row =~ s/$c{comment}//; |
|
81
|
|
|
|
|
|
|
} else { |
|
82
|
0
|
|
|
|
|
0
|
next; |
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
} |
|
85
|
0
|
|
|
|
|
0
|
$raw_row =~ s/\r$//; |
|
86
|
0
|
|
|
|
|
0
|
chomp $raw_row; |
|
87
|
0
|
|
|
|
|
0
|
@header = split /$c{sep}/, $raw_row, -1; |
|
88
|
0
|
0
|
0
|
|
|
0
|
@header = ref $vidx ? @header[@$vidx] : ( $header[$vidx] ) if ( defined $vidx && $vidx ne 'all' ); |
|
|
|
0
|
|
|
|
|
|
|
89
|
0
|
|
|
|
|
0
|
last; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
6
|
|
|
|
|
11
|
my %map; |
|
94
|
6
|
|
|
|
|
10
|
my $lnum = 0; |
|
95
|
6
|
|
|
|
|
22
|
while (<$fh>) { |
|
96
|
18
|
50
|
|
|
|
793
|
next if ( $lnum++ <= $c{skip} ); |
|
97
|
18
|
50
|
33
|
|
|
146
|
next if ( $c{comment} && /$c{comment}/ ); |
|
98
|
18
|
|
|
|
|
42
|
s/\r$//; |
|
99
|
18
|
|
|
|
|
32
|
chomp; |
|
100
|
18
|
50
|
|
|
|
64
|
next if (/^\s*$/); |
|
101
|
|
|
|
|
|
|
|
|
102
|
18
|
100
|
100
|
|
|
44
|
next if ( $record_filter && !$record_filter->($_) ); |
|
103
|
|
|
|
|
|
|
|
|
104
|
17
|
|
|
|
|
78
|
my @r = split /$c{sep}/, $_, -1; |
|
105
|
|
|
|
|
|
|
|
|
106
|
17
|
|
|
|
|
36
|
for my $kidx (@kidcs) { |
|
107
|
|
|
|
|
|
|
|
|
108
|
17
|
50
|
|
|
|
77
|
my @k = ( ref $kidx ? @r[@$kidx] : $r[$kidx] ); |
|
109
|
17
|
|
50
|
|
|
34
|
@k = map { $_ // '' } @k; |
|
|
17
|
|
|
|
|
58
|
|
|
110
|
17
|
50
|
|
|
|
38
|
@k = sort @k if ( $c{sort_keys} ); |
|
111
|
17
|
|
50
|
|
|
48
|
my $k = join( $;, @k ) // ''; |
|
112
|
|
|
|
|
|
|
|
|
113
|
17
|
50
|
66
|
|
|
68
|
if ( $uniq && !defined($vidx) ) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
114
|
0
|
|
|
|
|
0
|
$map{$k} = 1; |
|
115
|
|
|
|
|
|
|
} elsif ( not defined $vidx ) { |
|
116
|
0
|
|
|
|
|
0
|
$map{$k}++; |
|
117
|
|
|
|
|
|
|
} elsif ($uniq) { |
|
118
|
13
|
100
|
100
|
|
|
81
|
confess "strict mode: two times the same key $k" if ( $is_strict && defined( $map{$k} ) ); |
|
119
|
12
|
50
|
|
|
|
75
|
$map{$k} = ( ref $vidx ? [ @r[@$vidx] ] : ( $vidx eq 'all' ? \@r : $r[$vidx] ) ); |
|
|
|
100
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
} else { |
|
121
|
4
|
|
50
|
|
|
23
|
$map{$k} //= []; |
|
122
|
4
|
50
|
|
|
|
7
|
push @{ $map{$k} }, ( ref $vidx ? [ @r[@$vidx] ] : ( $vidx eq 'all' ? \@r : $r[$vidx] ) ); |
|
|
4
|
100
|
|
|
|
40
|
|
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
|
|
127
|
5
|
50
|
|
|
|
85
|
$fh->close unless ($fh_was_open); |
|
128
|
5
|
50
|
|
|
|
30
|
return wantarray ? ( \%map, \@header ) : \%map; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub dict_spew { |
|
132
|
0
|
|
|
0
|
0
|
0
|
my ( $dest, $m, $c ) = @_; |
|
133
|
|
|
|
|
|
|
|
|
134
|
0
|
|
0
|
|
|
0
|
my $uniq = $c->{uniq} // $c->{uniq_vals} // 0; |
|
|
|
|
0
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
|
136
|
0
|
|
|
|
|
0
|
my @flat; |
|
137
|
0
|
|
|
|
|
0
|
while ( my ( $k, $vv ) = each %$m ) { |
|
138
|
|
|
|
|
|
|
# $v => [ a, b, c] or $v => a or $v => [ [ a, b ], [c, d], ...] |
|
139
|
0
|
0
|
|
|
|
0
|
$vv = [$vv] unless ( ref $vv ); |
|
140
|
0
|
0
|
|
|
|
0
|
my $vals = [ map { ref $_ ? $_ : [$_] } @$vv ]; |
|
|
0
|
|
|
|
|
0
|
|
|
141
|
0
|
0
|
|
|
|
0
|
$vals = uniq_rows($vals) if ($uniq); |
|
142
|
0
|
|
|
|
|
0
|
for my $v (@$vals) { |
|
143
|
0
|
|
|
|
|
0
|
push @flat, [ $k, @$v ]; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
} |
|
146
|
0
|
|
|
|
|
0
|
return mspew( $dest, \@flat, $c ); |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub mslurp { |
|
150
|
2
|
|
|
2
|
1
|
1211
|
my ( $src, $cc ) = @_; |
|
151
|
2
|
|
|
|
|
4
|
my @m; |
|
152
|
|
|
|
|
|
|
|
|
153
|
2
|
|
|
|
|
7
|
my ( $fh, $fh_was_open ) = open_on_demand( $src, '<' ); |
|
154
|
|
|
|
|
|
|
|
|
155
|
2
|
|
100
|
|
|
11
|
$cc //= {}; |
|
156
|
2
|
|
|
|
|
22
|
my %c = ( |
|
157
|
|
|
|
|
|
|
sep => qr/\t/, |
|
158
|
|
|
|
|
|
|
header => 0, |
|
159
|
|
|
|
|
|
|
skip => -1, |
|
160
|
|
|
|
|
|
|
row_names => 0, |
|
161
|
|
|
|
|
|
|
comment => $COMMENT_RE, |
|
162
|
|
|
|
|
|
|
commented_header => undef, |
|
163
|
|
|
|
|
|
|
record_filter => undef, |
|
164
|
|
|
|
|
|
|
col_idx => undef, |
|
165
|
|
|
|
|
|
|
%$cc |
|
166
|
|
|
|
|
|
|
); |
|
167
|
|
|
|
|
|
|
|
|
168
|
2
|
|
|
|
|
5
|
my $record_filter = $c{record_filter}; |
|
169
|
|
|
|
|
|
|
|
|
170
|
2
|
|
|
|
|
5
|
my @col_idx; |
|
171
|
2
|
100
|
66
|
|
|
12
|
@col_idx = @{ $c{col_idx} } if ( $c{col_idx} && ref $c{col_idx} eq 'ARRAY' ); |
|
|
1
|
|
|
|
|
4
|
|
|
172
|
2
|
|
|
|
|
4
|
my @header; |
|
173
|
|
|
|
|
|
|
my @row_names; |
|
174
|
|
|
|
|
|
|
|
|
175
|
2
|
50
|
|
|
|
8
|
if ( $c{header} ) { |
|
176
|
0
|
|
|
|
|
0
|
while ( my $raw_row = <$fh> ) { |
|
177
|
0
|
0
|
0
|
|
|
0
|
if ( $c{comment} && $raw_row =~ /$c{comment}/ ) { |
|
178
|
0
|
0
|
|
|
|
0
|
if ( $c{commented_header} ) { |
|
179
|
0
|
|
|
|
|
0
|
$raw_row =~ s/$c{comment}//; |
|
180
|
|
|
|
|
|
|
} else { |
|
181
|
0
|
|
|
|
|
0
|
next; |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
|
|
185
|
0
|
|
|
|
|
0
|
$raw_row =~ s/\r\n/\n/; |
|
186
|
0
|
|
|
|
|
0
|
chomp $raw_row; |
|
187
|
0
|
|
|
|
|
0
|
@header = split /$c{sep}/, $raw_row, -1; |
|
188
|
0
|
|
|
|
|
0
|
last; |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
2
|
|
|
|
|
5
|
my $lnum = 0; |
|
193
|
2
|
|
|
|
|
7
|
while (<$fh>) { |
|
194
|
4
|
50
|
|
|
|
201
|
next if ( $lnum++ <= $c{skip} ); |
|
195
|
4
|
50
|
33
|
|
|
27
|
next if ( $c{comment} && /$c{comment}/ ); |
|
196
|
4
|
|
|
|
|
10
|
s/\r\n/\n/; |
|
197
|
4
|
|
|
|
|
8
|
chomp; |
|
198
|
4
|
50
|
|
|
|
29
|
next if (/^\s*$/); |
|
199
|
|
|
|
|
|
|
|
|
200
|
4
|
50
|
33
|
|
|
11
|
next if ( $record_filter && !$record_filter->($_) ); |
|
201
|
|
|
|
|
|
|
|
|
202
|
4
|
|
|
|
|
16
|
my @row = split /$c{sep}/, $_, -1; |
|
203
|
|
|
|
|
|
|
|
|
204
|
4
|
50
|
|
|
|
10
|
push @row_names, shift @row if ( $c{row_names} ); |
|
205
|
4
|
0
|
|
|
|
9
|
@row = map { $_ eq $c{na_value} ? undef : $_ } @row if ( $c{na_value} ); |
|
|
0
|
50
|
|
|
|
0
|
|
|
206
|
|
|
|
|
|
|
|
|
207
|
4
|
100
|
|
|
|
24
|
push @m, ( @col_idx ? [ @row[@col_idx] ] : \@row ); |
|
208
|
|
|
|
|
|
|
} |
|
209
|
2
|
50
|
|
|
|
31
|
$fh->close unless ($fh_was_open); |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
#remove first empty element of a header if same number of elements as first matrix element. |
|
212
|
2
|
0
|
33
|
|
|
7
|
shift @header if ( $c{row_names} && $c{header} && @m > 0 && @{ $m[0] } == @header && !$header[0] ); |
|
|
0
|
|
0
|
|
|
0
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
213
|
|
|
|
|
|
|
|
|
214
|
2
|
50
|
|
|
|
6
|
if (wantarray) { |
|
215
|
0
|
0
|
|
|
|
0
|
return ( \@m, ( @header ? \@header : undef ), ( @row_names ? \@row_names : undef ) ); |
|
|
|
0
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
} else { |
|
217
|
2
|
|
|
|
|
13
|
return \@m; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub xcsv_slurp { |
|
222
|
0
|
|
|
0
|
0
|
0
|
my ( $src, $c ) = @_; |
|
223
|
|
|
|
|
|
|
|
|
224
|
0
|
|
|
|
|
0
|
my ( $fh, $fh_was_open ) = open_on_demand( $src, '<' ); |
|
225
|
|
|
|
|
|
|
|
|
226
|
0
|
0
|
|
|
|
0
|
my $data = do { local $/; <$fh> } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
227
|
|
|
|
|
|
|
or confess "No data to analyze\n"; |
|
228
|
0
|
0
|
|
|
|
0
|
$fh->close unless ($fh_was_open); |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
$c->{sep} //= |
|
231
|
0
|
0
|
0
|
|
|
0
|
$data =~ m/["\d],["\d,]/ ? "," |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
: $data =~ m/["\d];["\d;]/ ? ";" |
|
233
|
|
|
|
|
|
|
: $data =~ m/["\d]\t["\d]/ ? "\t" |
|
234
|
|
|
|
|
|
|
: |
|
235
|
|
|
|
|
|
|
# If neither, then for unquoted strings |
|
236
|
|
|
|
|
|
|
$data =~ m/\w,[\w,]/ ? "," |
|
237
|
|
|
|
|
|
|
: $data =~ m/\w;[\w;]/ ? ";" |
|
238
|
|
|
|
|
|
|
: $data =~ m/\w\t[\w]/ ? "\t" |
|
239
|
|
|
|
|
|
|
: ","; |
|
240
|
0
|
0
|
|
|
|
0
|
open my $dfh, '<', \$data or die "Can't open filehandle: $!"; |
|
241
|
0
|
|
|
|
|
0
|
my $aoa = csv( in => $dfh, sep_char => $c->{sep}, quote_char => '"', escape_char => '"' ); |
|
242
|
0
|
|
|
|
|
0
|
close $dfh; |
|
243
|
0
|
|
|
|
|
0
|
return $aoa; |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
sub miterate { |
|
247
|
2
|
|
|
2
|
0
|
329
|
my ( $src, $cc ) = @_; |
|
248
|
2
|
|
|
|
|
10
|
my ( $fh, $fh_was_open ) = open_on_demand( $src, '<' ); |
|
249
|
|
|
|
|
|
|
|
|
250
|
2
|
|
100
|
|
|
11
|
$cc //= {}; |
|
251
|
2
|
|
|
|
|
24
|
my %c = ( |
|
252
|
|
|
|
|
|
|
sep => qr/\t/, |
|
253
|
|
|
|
|
|
|
skip => 0, |
|
254
|
|
|
|
|
|
|
comment => $COMMENT_RE, |
|
255
|
|
|
|
|
|
|
record_filter => undef, |
|
256
|
|
|
|
|
|
|
%$cc |
|
257
|
|
|
|
|
|
|
); |
|
258
|
|
|
|
|
|
|
|
|
259
|
2
|
|
|
|
|
6
|
my $record_filter = $c{record_filter}; |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
return sub { |
|
262
|
5
|
|
|
5
|
|
30
|
while (<$fh>) { |
|
263
|
|
|
|
|
|
|
|
|
264
|
8
|
100
|
|
|
|
379
|
next if ( --$c{skip} >= 0 ); |
|
265
|
|
|
|
|
|
|
|
|
266
|
7
|
100
|
66
|
|
|
54
|
next if ( $c{comment} && /$c{comment}/ ); |
|
267
|
5
|
|
|
|
|
12
|
s/\r\n/\n/; |
|
268
|
5
|
|
|
|
|
10
|
chomp; |
|
269
|
5
|
100
|
|
|
|
22
|
next if (/^\s*$/); |
|
270
|
3
|
50
|
33
|
|
|
8
|
next if ( $record_filter && !$record_filter->($_) ); |
|
271
|
|
|
|
|
|
|
|
|
272
|
3
|
|
|
|
|
13
|
my @row = split /$c{sep}/, $_, -1; |
|
273
|
3
|
|
|
|
|
23
|
return \@row; |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
} |
|
276
|
2
|
50
|
|
|
|
39
|
$fh->close unless ($fh_was_open); |
|
277
|
2
|
|
|
|
|
10
|
return; |
|
278
|
2
|
|
|
|
|
17
|
}; |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
sub miterate_hash { |
|
282
|
0
|
|
|
0
|
0
|
0
|
my $mit = miterate(@_); |
|
283
|
|
|
|
|
|
|
|
|
284
|
0
|
|
|
|
|
0
|
my $header = $mit->(); |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
return sub { |
|
287
|
0
|
|
|
0
|
|
0
|
my ( $from_idx, $to_idx ) = @_; |
|
288
|
|
|
|
|
|
|
|
|
289
|
0
|
|
|
|
|
0
|
my $row = $mit->(); |
|
290
|
0
|
0
|
|
|
|
0
|
return unless ($row); |
|
291
|
|
|
|
|
|
|
|
|
292
|
0
|
0
|
0
|
|
|
0
|
if ( defined($from_idx) && defined($to_idx) ) { |
|
293
|
0
|
|
|
|
|
0
|
my @h = @{$header}[ $from_idx .. $to_idx ]; |
|
|
0
|
|
|
|
|
0
|
|
|
294
|
0
|
|
|
|
|
0
|
my @r = @{$row}[ $from_idx .. $to_idx ]; |
|
|
0
|
|
|
|
|
0
|
|
|
295
|
0
|
|
|
|
|
0
|
return { zip @h, @r }; |
|
296
|
|
|
|
|
|
|
} else { |
|
297
|
0
|
|
|
|
|
0
|
return { zip @$header, @$row }; |
|
298
|
|
|
|
|
|
|
} |
|
299
|
|
|
|
|
|
|
|
|
300
|
0
|
|
|
|
|
0
|
}, $header; |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub lspew { |
|
304
|
0
|
|
|
0
|
1
|
0
|
my ( $dest, $l, $c ) = @_; |
|
305
|
0
|
|
0
|
|
|
0
|
my $delim = $c->{sep} // $c->{delim} // "\t"; |
|
|
|
|
0
|
|
|
|
|
|
306
|
0
|
|
0
|
|
|
0
|
my $header = $c->{header} // $c->{ids}; |
|
307
|
0
|
|
|
|
|
0
|
my ( $fh, $fh_was_open ) = open_on_demand( $dest, '>' ); |
|
308
|
|
|
|
|
|
|
|
|
309
|
0
|
0
|
0
|
|
|
0
|
say $fh join( $delim, @$header ) if ( $header && @$header > 0 ); |
|
310
|
|
|
|
|
|
|
|
|
311
|
0
|
0
|
|
|
|
0
|
if ( ref $l eq 'HASH' ) { |
|
|
|
0
|
|
|
|
|
|
|
312
|
0
|
|
|
|
|
0
|
while ( my ( $k, $v ) = each %$l ) { |
|
313
|
0
|
0
|
|
|
|
0
|
if ( ref $v eq 'ARRAY' ) { |
|
314
|
0
|
|
|
|
|
0
|
say $fh join $delim, ( $k, @$v ); |
|
315
|
|
|
|
|
|
|
} else { |
|
316
|
0
|
|
|
|
|
0
|
say $fh join $delim, ( $k, $v ); |
|
317
|
|
|
|
|
|
|
} |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
} elsif ( ref $l eq 'ARRAY' ) { |
|
320
|
0
|
|
|
|
|
0
|
for my $v (@$l) { |
|
321
|
0
|
0
|
|
|
|
0
|
if ( ref $v eq 'ARRAY' ) { |
|
322
|
0
|
|
|
|
|
0
|
say $fh join $delim, @$v; |
|
323
|
|
|
|
|
|
|
} else { |
|
324
|
0
|
|
|
|
|
0
|
say $fh $v; |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
} else { |
|
329
|
0
|
|
|
|
|
0
|
confess "need a reference for the list argument"; |
|
330
|
|
|
|
|
|
|
} |
|
331
|
0
|
0
|
|
|
|
0
|
$fh->close unless ($fh_was_open); |
|
332
|
|
|
|
|
|
|
|
|
333
|
0
|
|
|
|
|
0
|
return; |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
sub lslurp { |
|
337
|
0
|
|
|
0
|
0
|
0
|
my ($file) = @_; |
|
338
|
|
|
|
|
|
|
|
|
339
|
0
|
|
|
|
|
0
|
my @lines = slurpc($file); |
|
340
|
0
|
|
|
|
|
0
|
return \@lines; |
|
341
|
|
|
|
|
|
|
} |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
sub mspew { |
|
344
|
1
|
|
|
1
|
1
|
1017
|
my ( $dest, $m, $c ) = @_; |
|
345
|
|
|
|
|
|
|
|
|
346
|
1
|
50
|
33
|
|
|
9
|
confess "no matrix, you need to supply a matrix of the form [ [ 1,2,3 ], [ 4,5,6 ], ... ]" |
|
347
|
|
|
|
|
|
|
unless ( $m && ref $m eq 'ARRAY' ); |
|
348
|
|
|
|
|
|
|
|
|
349
|
1
|
|
33
|
|
|
7
|
my $header = $c->{header} // $c->{ids}; |
|
350
|
1
|
|
33
|
|
|
7
|
my $rownames = $c->{row_names} // $c->{rownames}; |
|
351
|
1
|
|
|
|
|
2
|
my $col_data = $c->{col_data}; |
|
352
|
1
|
|
|
|
|
3
|
my $square = $c->{square}; |
|
353
|
1
|
|
50
|
|
|
4
|
my $fill_rows = $c->{fill_rows} // 1; |
|
354
|
1
|
|
50
|
|
|
6
|
my $sep = $c->{sep} // "\t"; |
|
355
|
1
|
|
33
|
|
|
9
|
my $na_value = $c->{missing} // $c->{na_value} // 'NA'; |
|
|
|
|
50
|
|
|
|
|
|
356
|
1
|
|
|
|
|
3
|
my $quote_is_on = $c->{quote}; |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# get the number of rows |
|
359
|
1
|
|
|
|
|
3
|
my $num_rows = scalar @$m; |
|
360
|
|
|
|
|
|
|
# find the longest column |
|
361
|
1
|
0
|
|
|
|
5
|
my $num_cols = max - 1, map { defined $_ ? scalar @$_ : -1 } @$m; |
|
|
0
|
|
|
|
|
0
|
|
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# if the header is longer, it defines the number of cols |
|
364
|
1
|
50
|
33
|
|
|
8
|
$num_cols = max $num_cols, scalar @$header if ( $header && @$header > 0 ); |
|
365
|
|
|
|
|
|
|
|
|
366
|
1
|
0
|
33
|
|
|
20
|
$rownames = $header |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
367
|
|
|
|
|
|
|
if ( $header && @$header > 0 && $rownames && !ref $rownames && @$header >= @$m ); |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# adjust num rows if rownames are longer than the |
|
370
|
1
|
50
|
33
|
|
|
14
|
$num_rows = scalar @$rownames if ( ref $rownames eq 'ARRAY' && @$rownames > $num_rows ); |
|
371
|
|
|
|
|
|
|
|
|
372
|
1
|
50
|
|
|
|
5
|
if ($square) { |
|
373
|
0
|
0
|
|
|
|
0
|
$num_rows = $num_cols if ( $num_cols > $num_rows ); |
|
374
|
0
|
0
|
|
|
|
0
|
$num_cols = $num_rows if ( $num_rows > $num_cols ); |
|
375
|
|
|
|
|
|
|
#add one for the id in the first row |
|
376
|
0
|
0
|
|
|
|
0
|
$num_cols++ if ($rownames); |
|
377
|
|
|
|
|
|
|
} |
|
378
|
|
|
|
|
|
|
|
|
379
|
1
|
50
|
0
|
|
|
3
|
confess "error with rownames: not an array" |
|
|
|
|
33
|
|
|
|
|
|
380
|
|
|
|
|
|
|
if ( $rownames && !( ref $rownames eq 'ARRAY' && @$rownames >= @$m ) ); |
|
381
|
1
|
50
|
|
|
|
7
|
confess "no matrix" unless ( defined $m ); |
|
382
|
|
|
|
|
|
|
|
|
383
|
1
|
|
|
|
|
6
|
my ( $fh, $fh_was_open ) = open_on_demand( $dest, '>' ); |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
#print header if we have header |
|
386
|
1
|
50
|
|
|
|
4
|
if ($header) { |
|
387
|
1
|
|
|
|
|
3
|
my @qhead = @{ _quote( $header, $quote_is_on, $na_value ) }; |
|
|
1
|
|
|
|
|
4
|
|
|
388
|
1
|
50
|
33
|
|
|
11
|
unshift @qhead, '' if ( $rownames && !$c->{r_style_header} ); |
|
389
|
1
|
|
|
|
|
55
|
say $fh join $sep, @qhead; |
|
390
|
|
|
|
|
|
|
} |
|
391
|
|
|
|
|
|
|
|
|
392
|
1
|
50
|
|
|
|
6
|
if ($col_data) { |
|
393
|
0
|
|
|
|
|
0
|
for my $row (@$col_data) { |
|
394
|
0
|
|
|
|
|
0
|
my @qcd = @{ _quote( $row, $quote_is_on, $na_value ) }; |
|
|
0
|
|
|
|
|
0
|
|
|
395
|
0
|
0
|
0
|
|
|
0
|
unshift @qcd, '' if ( $rownames && !$c->{r_style_header} ); |
|
396
|
0
|
|
|
|
|
0
|
say $fh join $sep, @qcd; |
|
397
|
|
|
|
|
|
|
} |
|
398
|
|
|
|
|
|
|
} |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
#iterate through rows |
|
401
|
1
|
|
|
|
|
5
|
for ( my $i = 0; $i < $num_rows; $i++ ) { |
|
402
|
0
|
|
|
|
|
0
|
my @r; |
|
403
|
|
|
|
|
|
|
#add rowname as first column if desired |
|
404
|
0
|
0
|
0
|
|
|
0
|
push @r, ( $rownames->[$i] // "no_name" ) if ($rownames); |
|
405
|
|
|
|
|
|
|
#add the values |
|
406
|
0
|
0
|
0
|
|
|
0
|
push @r, @{ $m->[$i] // [] } if ( $i < @$m ); |
|
|
0
|
|
|
|
|
0
|
|
|
407
|
|
|
|
|
|
|
#fill the square if desired |
|
408
|
0
|
0
|
0
|
|
|
0
|
if ( $square || $fill_rows ) { |
|
409
|
0
|
|
|
|
|
0
|
my $missing = $num_cols - @r; |
|
410
|
0
|
|
|
|
|
0
|
push @r, (undef) x $missing; |
|
411
|
|
|
|
|
|
|
} |
|
412
|
|
|
|
|
|
|
#print row |
|
413
|
0
|
|
|
|
|
0
|
say $fh join $sep, @{ _quote( \@r, $quote_is_on, $na_value ) }; |
|
|
0
|
|
|
|
|
0
|
|
|
414
|
|
|
|
|
|
|
} |
|
415
|
1
|
50
|
|
|
|
3
|
$fh->close unless ($fh_was_open); |
|
416
|
1
|
|
|
|
|
28
|
return; |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
sub _quote { |
|
420
|
1
|
|
|
1
|
|
3
|
my ( $f, $q, $na ) = @_; |
|
421
|
1
|
50
|
33
|
|
|
5
|
$na = "$na" if ( $q && defined $na ); |
|
422
|
|
|
|
|
|
|
my @fields = map { |
|
423
|
1
|
50
|
33
|
|
|
4
|
if ( !defined ) { |
|
|
2
|
50
|
|
|
|
18
|
|
|
424
|
0
|
|
|
|
|
0
|
$na; |
|
425
|
|
|
|
|
|
|
} elsif ( !$q || /^\d+$/ ) { |
|
426
|
2
|
|
|
|
|
8
|
$_; |
|
427
|
|
|
|
|
|
|
} else { |
|
428
|
0
|
|
|
|
|
0
|
( my $str = $_ ) =~ s/"/""/g; |
|
429
|
0
|
|
|
|
|
0
|
qq{"$str"}; |
|
430
|
|
|
|
|
|
|
} |
|
431
|
|
|
|
|
|
|
} @$f; |
|
432
|
1
|
|
|
|
|
5
|
return \@fields; |
|
433
|
|
|
|
|
|
|
} |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
sub xlsx_spew { |
|
436
|
0
|
|
|
0
|
0
|
|
my ( $dest, $m, $c ) = @_; |
|
437
|
|
|
|
|
|
|
#eval "use Excel::Writer::XLSX"; |
|
438
|
|
|
|
|
|
|
#die "could not load Excel::Writer::XLSX $@" if ($@); |
|
439
|
0
|
0
|
|
|
|
|
eval "use Excel::Writer::XLSX; 1" or confess "could not load Excel::Writer::XLSX"; |
|
440
|
|
|
|
|
|
|
|
|
441
|
0
|
|
0
|
|
|
|
my $header = $c->{header} // $c->{ids}; |
|
442
|
0
|
|
0
|
|
|
|
my $rownames = $c->{row_names} // $c->{rownames}; |
|
443
|
0
|
|
0
|
|
|
|
my $sep = $c->{sep} // "\t"; |
|
444
|
0
|
|
0
|
|
|
|
my $na_value = $c->{missing} // $c->{na_value} // 'NA'; |
|
|
|
|
0
|
|
|
|
|
|
445
|
|
|
|
|
|
|
|
|
446
|
0
|
0
|
0
|
|
|
|
$rownames = $header |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
447
|
|
|
|
|
|
|
if ( $header && @$header > 0 && $rownames && !ref $rownames && @$header == @$m ); |
|
448
|
|
|
|
|
|
|
|
|
449
|
0
|
|
|
|
|
|
my @table; |
|
450
|
|
|
|
|
|
|
#print header if we have header |
|
451
|
0
|
0
|
|
|
|
|
if ($header) { |
|
452
|
0
|
|
|
|
|
|
my @qhead = @$header; |
|
453
|
0
|
0
|
|
|
|
|
unshift @qhead, '' if ($rownames); |
|
454
|
0
|
|
|
|
|
|
push @table, \@qhead; |
|
455
|
|
|
|
|
|
|
} |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
#iterate through rows |
|
458
|
0
|
|
|
|
|
|
for ( my $i = 0; $i < @$m; $i++ ) { |
|
459
|
0
|
|
|
|
|
|
my @r; |
|
460
|
|
|
|
|
|
|
#add rowname as first column if desired |
|
461
|
0
|
0
|
|
|
|
|
push @r, $rownames->[$i] if ($rownames); |
|
462
|
|
|
|
|
|
|
#add the values |
|
463
|
0
|
|
|
|
|
|
push @r, @{ $m->[$i] }; |
|
|
0
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
#fill the square if desired |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
#print row |
|
467
|
0
|
|
0
|
|
|
|
push @table, [ map { $_ // $na_value } @r ]; |
|
|
0
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
} |
|
469
|
|
|
|
|
|
|
|
|
470
|
0
|
|
|
|
|
|
my ( $fh, $fh_was_open ) = open_on_demand( $dest, '>' ); |
|
471
|
|
|
|
|
|
|
|
|
472
|
0
|
|
|
|
|
|
my $workbook = Excel::Writer::XLSX->new($fh); |
|
473
|
0
|
|
|
|
|
|
my $worksheet = $workbook->add_worksheet(); |
|
474
|
0
|
0
|
|
|
|
|
$worksheet->keep_leading_zeros() if ( $c->{keep_leading_zeros} ); |
|
475
|
0
|
|
|
|
|
|
$worksheet->write_col( 'A1', \@table ); |
|
476
|
0
|
|
|
|
|
|
$workbook->close; |
|
477
|
0
|
0
|
|
|
|
|
$fh->close unless ($fh_was_open); |
|
478
|
0
|
|
|
|
|
|
return; |
|
479
|
|
|
|
|
|
|
} |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
sub xlsx_slurp { |
|
482
|
0
|
|
|
0
|
0
|
|
my ( $src, $cc ) = @_; |
|
483
|
|
|
|
|
|
|
#my @m; |
|
484
|
|
|
|
|
|
|
#my ( $fh, $fh_was_open ) = open_on_demand( $src, '<' ); |
|
485
|
|
|
|
|
|
|
|
|
486
|
0
|
0
|
|
|
|
|
eval "use Spreadsheet::ParseXLSX; 1" or confess "could not load Spreadsheet::ParseXLSX"; |
|
487
|
|
|
|
|
|
|
|
|
488
|
0
|
0
|
|
|
|
|
$src = expand_home($src) if ( !ref($src) ); |
|
489
|
|
|
|
|
|
|
|
|
490
|
0
|
|
|
|
|
|
my $parser = Spreadsheet::ParseXLSX->new; |
|
491
|
0
|
|
|
|
|
|
my $workbook = $parser->parse($src); |
|
492
|
0
|
0
|
|
|
|
|
if ( !defined $workbook ) { |
|
493
|
0
|
|
|
|
|
|
confess $parser->error(), ".\n"; |
|
494
|
|
|
|
|
|
|
} |
|
495
|
|
|
|
|
|
|
|
|
496
|
0
|
|
|
|
|
|
my @ws; |
|
497
|
0
|
|
|
|
|
|
for my $worksheet ( $workbook->worksheets() ) { |
|
498
|
0
|
|
|
|
|
|
my @w; |
|
499
|
0
|
|
|
|
|
|
my ( $row_min, $row_max ) = $worksheet->row_range(); |
|
500
|
0
|
|
|
|
|
|
my ( $col_min, $col_max ) = $worksheet->col_range(); |
|
501
|
0
|
|
|
|
|
|
for ( my $i = $row_min; $i <= $row_max; $i++ ) { |
|
502
|
0
|
|
|
|
|
|
my @r; |
|
503
|
0
|
|
|
|
|
|
for ( my $j = $col_min; $j <= $col_max; $j++ ) { |
|
504
|
0
|
|
|
|
|
|
my $e = $worksheet->get_cell( $i, $j ); |
|
505
|
0
|
0
|
|
|
|
|
push @r, ( defined($e) ? $e->unformatted : undef ); |
|
506
|
|
|
|
|
|
|
} |
|
507
|
0
|
|
|
|
|
|
push @w, \@r; |
|
508
|
|
|
|
|
|
|
} |
|
509
|
0
|
|
|
|
|
|
push @ws, \@w; |
|
510
|
|
|
|
|
|
|
} |
|
511
|
0
|
|
|
|
|
|
return \@ws; |
|
512
|
|
|
|
|
|
|
} |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
1; |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
__END__ |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=head1 NAME |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
Bio::Gonzales::Matrix::IO - Library for simple matrix IO |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
use Bio::Gonzales::Matrix::IO qw(lspew mslurp lslurp mspew); |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
Provides functions for common matrix/list IO. |
|
530
|
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
=head1 SUBROUTINES |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=over 4 |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=item B<< dict_slurp($filename, \%options) >> |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
%options = ( |
|
538
|
|
|
|
|
|
|
sep => qr/\t/, |
|
539
|
|
|
|
|
|
|
header => 0, |
|
540
|
|
|
|
|
|
|
skip => -1, |
|
541
|
|
|
|
|
|
|
comment => qr/^\s*#/, |
|
542
|
|
|
|
|
|
|
key_idx => 0, |
|
543
|
|
|
|
|
|
|
val_idx => undef, |
|
544
|
|
|
|
|
|
|
uniq => 0, |
|
545
|
|
|
|
|
|
|
record_filter => undef, |
|
546
|
|
|
|
|
|
|
concat_keys => 1, |
|
547
|
|
|
|
|
|
|
strict => 0 |
|
548
|
|
|
|
|
|
|
); |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
Setups: |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
=over 4 |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=item uniq = 1 && no val_idx => read in key_idx as hash and set values to 1 |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=item uniq = 0 && no val_idx => read in key_idx as hash and set values to the count of keys |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=item uniq = 1 && val_idx => read into ( key => [ @values ], ...) |
|
559
|
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=item uniq = 0 && val_idx => read into ( key => [ [ @values ], [ @more_values ] ], ...) |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=item concat_keys |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
Concatenate the keys by C<< $; >>. If set to 0, key columns are taken in a |
|
565
|
|
|
|
|
|
|
serial fashion and are merged to one big column. |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=item uniq = 1 && strict = 1 => confess if two times the same key occurs in the data. |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=back |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
If key_idx is an array, the keys columns are joined by C<$;> to build the hash key. |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=item B<< mspew($filename, \@matrix, \%options) >> |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=item B<< mspew($filehandle, \@matrix, \%options) >> |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
Save the values in C<@matrix> to a C<$filename> or C<$filehandle>. C<@matrix> |
|
580
|
|
|
|
|
|
|
is an array of arrayrefs: |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
@matrix = ( |
|
583
|
|
|
|
|
|
|
[ l11, l12, l13 ], |
|
584
|
|
|
|
|
|
|
[ l21, l22, l23 ], |
|
585
|
|
|
|
|
|
|
[ l31, l32, l33 ] |
|
586
|
|
|
|
|
|
|
); |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
Options: |
|
589
|
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
=over 4 |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=item header / ids |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
Supply a header. Same as |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
mspew($file, [ \@header, @matrix ]) |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=item row_names |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
Supply row names or if not an array but true, use the header as row names |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
mspew( $file, $matrix, { row_names => 1 } ); #use header |
|
603
|
|
|
|
|
|
|
mspew( $file, $matrix, { row_names => [ 'row1', '...', 'rown' ] } ); #use supplied row names |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=item fill_missing_cols |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
If a row has less columns than the longest row of the matrix, fill it up with empty strings. |
|
609
|
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
=item na_value |
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
Use this value in case undefined values are found. Default is 'NA'. |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
=item sep |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
Set a separator for the output file |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=item square (default 1) |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Add empty columns to fill up to a square. |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=back |
|
623
|
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
=item B<< $matrix_ref = mslurp($file, \%config) >> |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=item B<< ($matrix_ref, $header_ref, $row_names_ref) = mslurp($file, \%config) >> |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
Reads in the contents of C<$file> and puts it in a array of arrayrefs. |
|
629
|
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
You can set the delimiter via the configuration by supplying C<< { sep => qr/\t/ } >> as config hash. |
|
631
|
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
Further options with defaults: |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
%config = ( |
|
635
|
|
|
|
|
|
|
sep => qr/\t/, # set column separator |
|
636
|
|
|
|
|
|
|
header => 0, # parse header |
|
637
|
|
|
|
|
|
|
skip => 0, # skip the first N lines (without header) |
|
638
|
|
|
|
|
|
|
row_names => 0, # parse row names |
|
639
|
|
|
|
|
|
|
comment => qr/^\s*#/ # the comment character |
|
640
|
|
|
|
|
|
|
record_filter => undef # set a function to filter records |
|
641
|
|
|
|
|
|
|
); |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=item B<< lspew($fh_or_filename, $list, $config_options) >> |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
spews out a list of values to a file. It can handle filenames and filehandles, |
|
646
|
|
|
|
|
|
|
but if you supply a handle, you have to close it on your own. The C<$list> can |
|
647
|
|
|
|
|
|
|
be a |
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=over 4 |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
=item hash ref of array refs |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
results in |
|
654
|
|
|
|
|
|
|
keya avalue0 avalue1 |
|
655
|
|
|
|
|
|
|
keyb bvalue0 bvalue1 |
|
656
|
|
|
|
|
|
|
... |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=item hash ref |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
results in |
|
661
|
|
|
|
|
|
|
keya valuea |
|
662
|
|
|
|
|
|
|
keyb valueb |
|
663
|
|
|
|
|
|
|
... |
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=item array ref |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
results in |
|
668
|
|
|
|
|
|
|
value0 |
|
669
|
|
|
|
|
|
|
value1 |
|
670
|
|
|
|
|
|
|
... |
|
671
|
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
=back |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
C<$config_options> is a hash ref. It can take the options: |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
$config_options = { |
|
677
|
|
|
|
|
|
|
delim => "\t", |
|
678
|
|
|
|
|
|
|
}; |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
=back |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=head1 AUTHOR |
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
jw bargsten, C<< <joachim.bargsten at wur.nl> >> |
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=cut |