line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Pheno::Ranker::IO; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
24
|
use strict; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
96
|
|
4
|
3
|
|
|
3
|
|
16
|
use warnings; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
95
|
|
5
|
3
|
|
|
3
|
|
18
|
use autodie; |
|
3
|
|
|
|
|
18
|
|
|
3
|
|
|
|
|
36
|
|
6
|
3
|
|
|
3
|
|
16934
|
use feature qw(say); |
|
3
|
|
|
|
|
15
|
|
|
3
|
|
|
|
|
307
|
|
7
|
3
|
|
|
3
|
|
4633
|
use Path::Tiny; |
|
3
|
|
|
|
|
40150
|
|
|
3
|
|
|
|
|
176
|
|
8
|
3
|
|
|
3
|
|
31
|
use File::Basename; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
249
|
|
9
|
3
|
|
|
3
|
|
19
|
use File::Spec::Functions qw(catdir catfile); |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
162
|
|
10
|
3
|
|
|
3
|
|
17
|
use List::Util qw(any); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
158
|
|
11
|
3
|
|
|
3
|
|
1399
|
use YAML::XS qw(LoadFile DumpFile); |
|
3
|
|
|
|
|
9075
|
|
|
3
|
|
|
|
|
212
|
|
12
|
3
|
|
|
3
|
|
3581
|
use JSON::XS; |
|
3
|
|
|
|
|
16159
|
|
|
3
|
|
|
|
|
174
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
#use Sort::Naturally qw(nsort); |
15
|
3
|
|
|
3
|
|
20
|
use Exporter 'import'; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
140
|
|
16
|
|
|
|
|
|
|
our @EXPORT = |
17
|
|
|
|
|
|
|
qw(serialize_hashes write_alignment io_yaml_or_json read_json read_yaml write_json write_array2txt array2object validate_json write_poi coverage_stats append_and_rename_primary_key); |
18
|
3
|
|
|
3
|
|
15
|
use constant DEVEL_MODE => 0; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
4435
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
######################### |
21
|
|
|
|
|
|
|
######################### |
22
|
|
|
|
|
|
|
# SUBROUTINES FOR I/O # |
23
|
|
|
|
|
|
|
######################### |
24
|
|
|
|
|
|
|
######################### |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub serialize_hashes { |
27
|
|
|
|
|
|
|
|
28
|
0
|
|
|
0
|
0
|
0
|
my $arg = shift; |
29
|
0
|
|
|
|
|
0
|
my $data = $arg->{data}; |
30
|
0
|
|
|
|
|
0
|
my $export_basename = $arg->{export_basename}; |
31
|
|
|
|
|
|
|
write_json( |
32
|
|
|
|
|
|
|
{ data => $data->{$_}, filepath => qq/$export_basename.$_.json/ } ) |
33
|
0
|
|
|
|
|
0
|
for keys %{$data}; |
|
0
|
|
|
|
|
0
|
|
34
|
0
|
|
|
|
|
0
|
return 1; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub write_alignment { |
38
|
|
|
|
|
|
|
|
39
|
1
|
|
|
1
|
0
|
2
|
my $arg = shift; |
40
|
1
|
|
|
|
|
3
|
my $basename = $arg->{align}; |
41
|
1
|
|
|
|
|
3
|
my $ascii = $arg->{ascii}; |
42
|
1
|
|
|
|
|
1
|
my $dataframe = $arg->{dataframe}; |
43
|
1
|
|
|
|
|
3
|
my $csv = $arg->{csv}; |
44
|
1
|
|
|
|
|
6
|
my %hash = ( |
45
|
|
|
|
|
|
|
'.txt' => $ascii, |
46
|
|
|
|
|
|
|
'.csv' => $dataframe, |
47
|
|
|
|
|
|
|
'.target.csv' => $csv |
48
|
|
|
|
|
|
|
); |
49
|
|
|
|
|
|
|
|
50
|
1
|
|
|
|
|
4
|
for my $key ( keys %hash ) { |
51
|
3
|
|
|
|
|
8
|
my $output = $basename . $key; |
52
|
3
|
|
|
|
|
13
|
write_array2txt( { filepath => $output, data => $hash{$key} } ); |
53
|
|
|
|
|
|
|
} |
54
|
1
|
|
|
|
|
8
|
return 1; |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub io_yaml_or_json { |
58
|
|
|
|
|
|
|
|
59
|
7
|
|
|
7
|
0
|
19
|
my $arg = shift; |
60
|
7
|
|
|
|
|
17
|
my $file = $arg->{filepath}; |
61
|
7
|
|
|
|
|
15
|
my $mode = $arg->{mode}; |
62
|
7
|
50
|
|
|
|
25
|
my $data = $mode eq 'write' ? $arg->{data} : undef; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# Checking only for qw(.yaml .yml .json) |
65
|
7
|
|
|
|
|
25
|
my @exts = qw(.yaml .yml .json); |
66
|
7
|
|
|
|
|
37
|
my $msg = qq(Can't recognize <$file> extension. Extensions allowed are: ) |
67
|
|
|
|
|
|
|
. join ',', @exts; |
68
|
7
|
|
|
|
|
490
|
my ( undef, undef, $ext ) = fileparse( $file, @exts ); |
69
|
7
|
50
|
|
21
|
|
66
|
die $msg unless any { $_ eq $ext } @exts; |
|
21
|
|
|
|
|
61
|
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# To simplify return values, we create a hash |
72
|
7
|
|
|
|
|
37
|
$ext =~ tr/a.//d; # Unify $ext (delete 'a' and '.') |
73
|
7
|
|
|
|
|
74
|
my $return = { |
74
|
|
|
|
|
|
|
read => { json => \&read_json, yml => \&read_yaml }, |
75
|
|
|
|
|
|
|
write => { json => \&write_json, yml => \&write_yaml } |
76
|
|
|
|
|
|
|
}; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# We return according to the mode (read or write) and format |
79
|
|
|
|
|
|
|
return $mode eq 'read' |
80
|
|
|
|
|
|
|
? $return->{$mode}{$ext}->($file) |
81
|
7
|
50
|
|
|
|
48
|
: $return->{$mode}{$ext}->( { filepath => $file, data => $data } ); |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub read_json { |
85
|
|
|
|
|
|
|
|
86
|
7
|
|
|
7
|
0
|
17
|
my $file = shift; |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# NB: hp.json is non-UTF8 |
89
|
|
|
|
|
|
|
# malformed UTF-8 character in JSON string, at character offset 680 (before "\x{fffd}r"\n },...") |
90
|
7
|
50
|
|
|
|
56
|
my $str = |
91
|
|
|
|
|
|
|
$file =~ /hp\.json/ ? path($file)->slurp : path($file)->slurp_utf8; |
92
|
7
|
|
|
|
|
36283
|
return decode_json($str); # Decode to Perl data structure |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub read_yaml { |
96
|
|
|
|
|
|
|
|
97
|
12
|
|
|
12
|
0
|
54
|
return LoadFile(shift); # Decode to Perl data structure |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub write_json { |
101
|
|
|
|
|
|
|
|
102
|
0
|
|
|
0
|
0
|
0
|
my $arg = shift; |
103
|
0
|
|
|
|
|
0
|
my $file = $arg->{filepath}; |
104
|
0
|
|
|
|
|
0
|
my $json_data = $arg->{data}; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
# Note that canonical DOES not match the order of nsort from Sort:.Naturally |
107
|
0
|
|
|
|
|
0
|
my $json = JSON::XS->new->utf8->canonical->pretty->encode($json_data); |
108
|
0
|
|
|
|
|
0
|
path($file)->spew_utf8($json); |
109
|
0
|
|
|
|
|
0
|
return 1; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub write_yaml { |
113
|
|
|
|
|
|
|
|
114
|
0
|
|
|
0
|
0
|
0
|
my $arg = shift; |
115
|
0
|
|
|
|
|
0
|
my $file = $arg->{filepath}; |
116
|
0
|
|
|
|
|
0
|
my $json_data = $arg->{data}; |
117
|
0
|
|
|
|
|
0
|
local $YAML::XS::Boolean = 'JSON::PP'; |
118
|
0
|
|
|
|
|
0
|
DumpFile( $file, $json_data ); |
119
|
0
|
|
|
|
|
0
|
return 1; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub write_array2txt { |
123
|
|
|
|
|
|
|
|
124
|
4
|
|
|
4
|
0
|
7
|
my $arg = shift; |
125
|
4
|
|
|
|
|
6
|
my $file = $arg->{filepath}; |
126
|
4
|
|
|
|
|
8
|
my $data = $arg->{data}; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# Watch out for RAM usage!!! |
129
|
4
|
|
|
|
|
16
|
path($file)->spew( join( "\n", @$data ) . "\n" ); |
130
|
4
|
|
|
|
|
19910
|
return 1; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub write_poi { |
134
|
|
|
|
|
|
|
|
135
|
0
|
|
|
0
|
0
|
0
|
my $arg = shift; |
136
|
0
|
|
|
|
|
0
|
my $ref_data = $arg->{ref_data}; |
137
|
0
|
|
|
|
|
0
|
my $poi = $arg->{poi}; |
138
|
0
|
|
|
|
|
0
|
my $poi_out_dir = $arg->{poi_out_dir}; |
139
|
0
|
|
|
|
|
0
|
my $primary_key = $arg->{primary_key}; |
140
|
0
|
|
|
|
|
0
|
my $verbose = $arg->{verbose}; |
141
|
0
|
|
|
|
|
0
|
for my $name (@$poi) { |
142
|
0
|
|
|
|
|
0
|
my ($match) = grep { $name eq $_->{$primary_key} } @$ref_data; |
|
0
|
|
|
|
|
0
|
|
143
|
0
|
0
|
|
|
|
0
|
if ($match) { |
144
|
0
|
|
|
|
|
0
|
my $out = catfile( $poi_out_dir, "$name.json" ); |
145
|
0
|
0
|
|
|
|
0
|
say "Writting <$out>" if $verbose; |
146
|
0
|
|
|
|
|
0
|
write_json( { filepath => $out, data => $match } ); |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
else { |
149
|
0
|
|
|
|
|
0
|
warn |
150
|
|
|
|
|
|
|
"No individual found for <$name>. Are you sure you used the right prefix?\n"; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
} |
153
|
0
|
|
|
|
|
0
|
return 1; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub array2object { |
157
|
|
|
|
|
|
|
|
158
|
1
|
|
|
1
|
0
|
5
|
my $data = shift; |
159
|
1
|
50
|
|
|
|
7
|
if ( ref $data eq ref [] ) { |
160
|
0
|
|
|
|
|
0
|
my $n = @$data; |
161
|
0
|
0
|
|
|
|
0
|
if ( $n == 1 ) { |
162
|
0
|
|
|
|
|
0
|
$data = $data->[0]; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
else { |
165
|
0
|
|
|
|
|
0
|
die |
166
|
|
|
|
|
|
|
"Sorry, your file has $n patients but only 1 patient is allowed with <-t>\n"; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
} |
169
|
1
|
|
|
|
|
3
|
return $data; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub validate_json { |
173
|
|
|
|
|
|
|
|
174
|
8
|
|
|
8
|
0
|
17
|
my $file = shift; |
175
|
8
|
100
|
66
|
|
|
249
|
my $data = ( $file && -f $file ) ? read_yaml($file) : undef; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# Premature return with undef if the file does not exist |
178
|
8
|
100
|
|
|
|
3337
|
return undef unless defined $data; #perlcritic severity 5 |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# schema for the weights file |
181
|
4
|
|
|
|
|
47
|
my $schema = { |
182
|
|
|
|
|
|
|
'$schema' => 'http://json-schema.org/draft-07/schema#', |
183
|
|
|
|
|
|
|
'type' => 'object', |
184
|
|
|
|
|
|
|
'patternProperties' => { |
185
|
|
|
|
|
|
|
'^\w+([.:\w]*\w+)?$' => { |
186
|
|
|
|
|
|
|
'type' => 'integer', |
187
|
|
|
|
|
|
|
}, |
188
|
|
|
|
|
|
|
}, |
189
|
|
|
|
|
|
|
'additionalProperties' => JSON::XS::false, |
190
|
|
|
|
|
|
|
}; |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# Load at runtime |
193
|
4
|
|
|
|
|
2384
|
require JSON::Validator; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# Create object and load schema |
196
|
4
|
|
|
|
|
1483479
|
my $jv = JSON::Validator->new; |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
# Load schema in object |
199
|
4
|
|
|
|
|
98
|
$jv->schema($schema); |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# Validate data |
202
|
4
|
|
|
|
|
32862
|
my @errors = $jv->validate($data); |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# Show error if any |
205
|
4
|
100
|
50
|
|
|
2264
|
say_errors( \@errors ) and die if @errors; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# return data if ok |
208
|
2
|
|
|
|
|
41
|
return $data; |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub say_errors { |
213
|
|
|
|
|
|
|
|
214
|
2
|
|
|
2
|
0
|
5
|
my $errors = shift; |
215
|
2
|
50
|
|
|
|
4
|
if ( @{$errors} ) { |
|
2
|
|
|
|
|
6
|
|
216
|
2
|
|
|
|
|
4
|
say join "\n", @{$errors}; |
|
2
|
|
|
|
|
7
|
|
217
|
|
|
|
|
|
|
} |
218
|
2
|
|
|
|
|
266
|
return 1; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub coverage_stats { |
222
|
|
|
|
|
|
|
|
223
|
3
|
|
|
3
|
|
26
|
use Data::Dumper; |
|
3
|
|
|
|
|
14
|
|
|
3
|
|
|
|
|
1275
|
|
224
|
6
|
|
|
6
|
0
|
12
|
my $data = shift; |
225
|
6
|
|
|
|
|
13
|
my $coverage = {}; |
226
|
6
|
|
|
|
|
17
|
for my $item (@$data) { |
227
|
199
|
|
|
|
|
465
|
for my $key ( keys %$item ) { |
228
|
1115
|
|
|
|
|
1380
|
$coverage->{$key}++; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
} |
231
|
6
|
|
|
|
|
40
|
return { cohort_size => scalar @$data, coverage_terms => $coverage }; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub append_and_rename_primary_key { |
235
|
|
|
|
|
|
|
|
236
|
6
|
|
|
6
|
0
|
19
|
my $arg = shift; |
237
|
6
|
|
|
|
|
18
|
my $ref_data = $arg->{ref_data}; |
238
|
6
|
|
|
|
|
14
|
my $append_prefixes = $arg->{append_prefixes}; |
239
|
6
|
|
|
|
|
15
|
my $primary_key = $arg->{primary_key}; |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# Premature return if @$ref_data == 1 (only 1 cohort) |
242
|
|
|
|
|
|
|
# *** IMPORTANT *** |
243
|
|
|
|
|
|
|
# $ref_data->[0] can be ARRAY or HASH |
244
|
|
|
|
|
|
|
# We force HASH to be ARRAY |
245
|
6
|
50
|
|
|
|
59
|
return ref $ref_data->[0] eq ref {} ? [ $ref_data->[0] ] : $ref_data->[0] |
|
|
50
|
|
|
|
|
|
246
|
|
|
|
|
|
|
if @$ref_data == 1; |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# NB: for is a bit faster than map |
249
|
0
|
|
|
|
|
|
my $count = 1; |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# We have to load into a new array data |
252
|
0
|
|
|
|
|
|
my $data; |
253
|
0
|
|
|
|
|
|
for my $item (@$ref_data) { |
254
|
|
|
|
|
|
|
|
255
|
0
|
0
|
|
|
|
|
my $prefix = |
256
|
|
|
|
|
|
|
$append_prefixes->[ $count - 1 ] |
257
|
|
|
|
|
|
|
? $append_prefixes->[ $count - 1 ] . '_' |
258
|
|
|
|
|
|
|
: 'C' . $count . '_'; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# ARRAY |
261
|
0
|
0
|
|
|
|
|
if ( ref $item eq ref [] ) { |
262
|
0
|
|
|
|
|
|
for my $individual (@$item) { |
263
|
|
|
|
|
|
|
$individual->{$primary_key} = |
264
|
0
|
|
|
|
|
|
$prefix . $individual->{$primary_key}; |
265
|
0
|
|
|
|
|
|
push @$data, $individual; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# Object |
270
|
|
|
|
|
|
|
else { |
271
|
0
|
|
|
|
|
|
$item->{$primary_key} = $prefix . $item->{$primary_key}; |
272
|
0
|
|
|
|
|
|
push @$data, $item; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# Add $count |
276
|
0
|
|
|
|
|
|
$count++; |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
0
|
|
|
|
|
|
return $data; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
1; |