File Coverage

lib/Pheno/Ranker/IO.pm

Criterion	Covered	Total	%
statement	100	149	67.1
branch	14	32	43.7
condition	3	5	60.0
subroutine	24	28	85.7
pod	0	14	0.0
total	141	228	61.8

line	stmt	bran	cond	sub	pod	time	code
1							package Pheno::Ranker::IO;
2
3	3			3		24	use strict;
	3					6
	3					96
4	3			3		16	use warnings;
	3					5
	3					95
5	3			3		18	use autodie;
	3					18
	3					36
6	3			3		16934	use feature qw(say);
	3					15
	3					307
7	3			3		4633	use Path::Tiny;
	3					40150
	3					176
8	3			3		31	use File::Basename;
	3					5
	3					249
9	3			3		19	use File::Spec::Functions qw(catdir catfile);
	3					7
	3					162
10	3			3		17	use List::Util qw(any);
	3					6
	3					158
11	3			3		1399	use YAML::XS qw(LoadFile DumpFile);
	3					9075
	3					212
12	3			3		3581	use JSON::XS;
	3					16159
	3					174
13
14							#use Sort::Naturally qw(nsort);
15	3			3		20	use Exporter 'import';
	3					6
	3					140
16							our @EXPORT =
17							qw(serialize_hashes write_alignment io_yaml_or_json read_json read_yaml write_json write_array2txt array2object validate_json write_poi coverage_stats append_and_rename_primary_key);
18	3			3		15	use constant DEVEL_MODE => 0;
	3					5
	3					4435
19
20							#########################
21							#########################
22							# SUBROUTINES FOR I/O #
23							#########################
24							#########################
25
26							sub serialize_hashes {
27
28	0			0	0	0	my $arg = shift;
29	0					0	my $data = $arg->{data};
30	0					0	my $export_basename = $arg->{export_basename};
31							write_json(
32							{ data => $data->{$_}, filepath => qq/$export_basename.$_.json/ } )
33	0					0	for keys %{$data};
	0					0
34	0					0	return 1;
35							}
36
37							sub write_alignment {
38
39	1			1	0	2	my $arg = shift;
40	1					3	my $basename = $arg->{align};
41	1					3	my $ascii = $arg->{ascii};
42	1					1	my $dataframe = $arg->{dataframe};
43	1					3	my $csv = $arg->{csv};
44	1					6	my %hash = (
45							'.txt' => $ascii,
46							'.csv' => $dataframe,
47							'.target.csv' => $csv
48							);
49
50	1					4	for my $key ( keys %hash ) {
51	3					8	my $output = $basename . $key;
52	3					13	write_array2txt( { filepath => $output, data => $hash{$key} } );
53							}
54	1					8	return 1;
55							}
56
57							sub io_yaml_or_json {
58
59	7			7	0	19	my $arg = shift;
60	7					17	my $file = $arg->{filepath};
61	7					15	my $mode = $arg->{mode};
62	7	50				25	my $data = $mode eq 'write' ? $arg->{data} : undef;
63
64							# Checking only for qw(.yaml .yml .json)
65	7					25	my @exts = qw(.yaml .yml .json);
66	7					37	my $msg = qq(Can't recognize <$file> extension. Extensions allowed are: )
67							. join ',', @exts;
68	7					490	my ( undef, undef, $ext ) = fileparse( $file, @exts );
69	7	50		21		66	die $msg unless any { $_ eq $ext } @exts;
	21					61
70
71							# To simplify return values, we create a hash
72	7					37	$ext =~ tr/a.//d; # Unify $ext (delete 'a' and '.')
73	7					74	my $return = {
74							read => { json => \&read_json, yml => \&read_yaml },
75							write => { json => \&write_json, yml => \&write_yaml }
76							};
77
78							# We return according to the mode (read or write) and format
79							return $mode eq 'read'
80							? $return->{$mode}{$ext}->($file)
81	7	50				48	: $return->{$mode}{$ext}->( { filepath => $file, data => $data } );
82							}
83
84							sub read_json {
85
86	7			7	0	17	my $file = shift;
87
88							# NB: hp.json is non-UTF8
89							# malformed UTF-8 character in JSON string, at character offset 680 (before "\x{fffd}r"\n },...")
90	7	50				56	my $str =
91							$file =~ /hp\.json/ ? path($file)->slurp : path($file)->slurp_utf8;
92	7					36283	return decode_json($str); # Decode to Perl data structure
93							}
94
95							sub read_yaml {
96
97	12			12	0	54	return LoadFile(shift); # Decode to Perl data structure
98							}
99
100							sub write_json {
101
102	0			0	0	0	my $arg = shift;
103	0					0	my $file = $arg->{filepath};
104	0					0	my $json_data = $arg->{data};
105
106							# Note that canonical DOES not match the order of nsort from Sort:.Naturally
107	0					0	my $json = JSON::XS->new->utf8->canonical->pretty->encode($json_data);
108	0					0	path($file)->spew_utf8($json);
109	0					0	return 1;
110							}
111
112							sub write_yaml {
113
114	0			0	0	0	my $arg = shift;
115	0					0	my $file = $arg->{filepath};
116	0					0	my $json_data = $arg->{data};
117	0					0	local $YAML::XS::Boolean = 'JSON::PP';
118	0					0	DumpFile( $file, $json_data );
119	0					0	return 1;
120							}
121
122							sub write_array2txt {
123
124	4			4	0	7	my $arg = shift;
125	4					6	my $file = $arg->{filepath};
126	4					8	my $data = $arg->{data};
127
128							# Watch out for RAM usage!!!
129	4					16	path($file)->spew( join( "\n", @$data ) . "\n" );
130	4					19910	return 1;
131							}
132
133							sub write_poi {
134
135	0			0	0	0	my $arg = shift;
136	0					0	my $ref_data = $arg->{ref_data};
137	0					0	my $poi = $arg->{poi};
138	0					0	my $poi_out_dir = $arg->{poi_out_dir};
139	0					0	my $primary_key = $arg->{primary_key};
140	0					0	my $verbose = $arg->{verbose};
141	0					0	for my $name (@$poi) {
142	0					0	my ($match) = grep { $name eq $_->{$primary_key} } @$ref_data;
	0					0
143	0	0				0	if ($match) {
144	0					0	my $out = catfile( $poi_out_dir, "$name.json" );
145	0	0				0	say "Writting <$out>" if $verbose;
146	0					0	write_json( { filepath => $out, data => $match } );
147							}
148							else {
149	0					0	warn
150							"No individual found for <$name>. Are you sure you used the right prefix?\n";
151							}
152							}
153	0					0	return 1;
154							}
155
156							sub array2object {
157
158	1			1	0	5	my $data = shift;
159	1	50				7	if ( ref $data eq ref [] ) {
160	0					0	my $n = @$data;
161	0	0				0	if ( $n == 1 ) {
162	0					0	$data = $data->[0];
163							}
164							else {
165	0					0	die
166							"Sorry, your file has $n patients but only 1 patient is allowed with <-t>\n";
167							}
168							}
169	1					3	return $data;
170							}
171
172							sub validate_json {
173
174	8			8	0	17	my $file = shift;
175	8	100	66			249	my $data = ( $file && -f $file ) ? read_yaml($file) : undef;
176
177							# Premature return with undef if the file does not exist
178	8	100				3337	return undef unless defined $data; #perlcritic severity 5
179
180							# schema for the weights file
181	4					47	my $schema = {
182							'$schema' => 'http://json-schema.org/draft-07/schema#',
183							'type' => 'object',
184							'patternProperties' => {
185							'^\w+([.:\w]*\w+)?$' => {
186							'type' => 'integer',
187							},
188							},
189							'additionalProperties' => JSON::XS::false,
190							};
191
192							# Load at runtime
193	4					2384	require JSON::Validator;
194
195							# Create object and load schema
196	4					1483479	my $jv = JSON::Validator->new;
197
198							# Load schema in object
199	4					98	$jv->schema($schema);
200
201							# Validate data
202	4					32862	my @errors = $jv->validate($data);
203
204							# Show error if any
205	4	100	50			2264	say_errors( \@errors ) and die if @errors;
206
207							# return data if ok
208	2					41	return $data;
209
210							}
211
212							sub say_errors {
213
214	2			2	0	5	my $errors = shift;
215	2	50				4	if ( @{$errors} ) {
	2					6
216	2					4	say join "\n", @{$errors};
	2					7
217							}
218	2					266	return 1;
219							}
220
221							sub coverage_stats {
222
223	3			3		26	use Data::Dumper;
	3					14
	3					1275
224	6			6	0	12	my $data = shift;
225	6					13	my $coverage = {};
226	6					17	for my $item (@$data) {
227	199					465	for my $key ( keys %$item ) {
228	1115					1380	$coverage->{$key}++;
229							}
230							}
231	6					40	return { cohort_size => scalar @$data, coverage_terms => $coverage };
232							}
233
234							sub append_and_rename_primary_key {
235
236	6			6	0	19	my $arg = shift;
237	6					18	my $ref_data = $arg->{ref_data};
238	6					14	my $append_prefixes = $arg->{append_prefixes};
239	6					15	my $primary_key = $arg->{primary_key};
240
241							# Premature return if @$ref_data == 1 (only 1 cohort)
242							# * IMPORTANT *
243							# $ref_data->[0] can be ARRAY or HASH
244							# We force HASH to be ARRAY
245	6	50				59	return ref $ref_data->[0] eq ref {} ? [ $ref_data->[0] ] : $ref_data->[0]
		50
246							if @$ref_data == 1;
247
248							# NB: for is a bit faster than map
249	0						my $count = 1;
250
251							# We have to load into a new array data
252	0						my $data;
253	0						for my $item (@$ref_data) {
254
255	0	0					my $prefix =
256							$append_prefixes->[ $count - 1 ]
257							? $append_prefixes->[ $count - 1 ] . '_'
258							: 'C' . $count . '_';
259
260							# ARRAY
261	0	0					if ( ref $item eq ref [] ) {
262	0						for my $individual (@$item) {
263							$individual->{$primary_key} =
264	0						$prefix . $individual->{$primary_key};
265	0						push @$data, $individual;
266							}
267							}
268
269							# Object
270							else {
271	0						$item->{$primary_key} = $prefix . $item->{$primary_key};
272	0						push @$data, $item;
273							}
274
275							# Add $count
276	0						$count++;
277							}
278
279	0						return $data;
280							}
281
282							1;