File Coverage

lib/Convert/Pheno.pm

Criterion	Covered	Total	%
statement	216	245	88.1
branch	43	68	63.2
condition	12	20	60.0
subroutine	39	42	92.8
pod	0	13	0.0
total	310	388	79.9

line	stmt	bran	cond	sub	pod	time	code
1							package Convert::Pheno;
2
3	6			6		648144	use strict;
	6					65
	6					174
4	6			6		30	use warnings;
	6					11
	6					175
5	6			6		7215	use autodie;
	6					102343
	6					31
6	6			6		42376	use feature qw(say);
	6					22
	6					788
7	6			6		2053	use File::Spec::Functions qw(catdir catfile);
	6					3707
	6					445
8	6			6		809	use Data::Dumper;
	6					7612
	6					323
9	6			6		5833	use Path::Tiny;
	6					82835
	6					363
10	6			6		48	use File::Basename;
	6					14
	6					676
11	6			6		3572	use File::ShareDir::ProjectDistDir;
	6					251516
	6					50
12	6			6		3692	use List::Util qw(any uniq);
	6					12
	6					782
13	6			6		50	use Carp qw(confess);
	6					23
	6					278
14	6			6		2979	use XML::Fast;
	6					95880
	6					349
15	6			6		3682	use Moo;
	6					45525
	6					31
16	6			6		15512	use Types::Standard qw(Str Int Num Enum ArrayRef Undef);
	6					706661
	6					70
17	6			6		20033	use File::ShareDir::ProjectDistDir qw(dist_dir);
	6					16
	6					96
18
19							#use Devel::Size qw(size total_size);
20	6			6		6597	use Convert::Pheno::CSV;
	6					26
	6					601
21	6			6		52	use Convert::Pheno::IO;
	6					14
	6					366
22	6			6		43	use Convert::Pheno::SQLite;
	6					15
	6					663
23	6			6		47	use Convert::Pheno::Mapping;
	6					27
	6					636
24	6			6		44	use Convert::Pheno::OMOP;
	6					18
	6					698
25	6			6		3139	use Convert::Pheno::PXF;
	6					22
	6					359
26	6			6		2687	use Convert::Pheno::BFF;
	6					14
	6					403
27	6			6		2482	use Convert::Pheno::CDISC;
	6					23
	6					336
28	6			6		45	use Convert::Pheno::REDCap;
	6					13
	6					230
29
30	6			6		41	use Exporter 'import';
	6					13
	6					257
31							our @EXPORT =
32							qw($VERSION io_yaml_or_json omop2bff_stream_processing share_dir); # Symbols imported by default
33
34							#our @EXPORT_OK = qw(foo bar); # Symbols imported by request
35
36	6			6		28	use constant DEVEL_MODE => 0;
	6					13
	6					20883
37
38							# Global variables:
39							our $VERSION = '0.12_3';
40							our $share_dir = dist_dir('Convert-Pheno');
41
42							############################################
43							# Start declaring attributes for the class #
44							############################################
45
46							# Complex defaults here
47							has search => (
48
49							default => 'exact',
50							is => 'ro',
51							coerce => sub { $_[0] // 'exact' },
52							isa => Enum [qw(exact mixed)]
53							);
54
55							has text_similarity_method => (
56
57							#default => 'cosine',
58							is => 'ro',
59							coerce => sub { $_[0] // 'cosine' },
60							isa => Enum [qw(cosine dice)]
61							);
62
63							has min_text_similarity_score => (
64
65							#default => 0.8,
66							is => 'ro',
67							coerce => sub { $_[0] // 0.8 },
68							isa => sub {
69							die "Only values between 0 .. 1 supported!"
70							unless ( $_[0] >= 0.0 && $_[0] <= 1.0 );
71							}
72							);
73
74							has username => (
75
76							#default => ( $ENV{LOGNAME} \|\| $ENV{USER} \|\| getpwuid($<) ) , # getpwuid not implemented in Windows
77							default => $ENV{'LOGNAME'} \|\| $ENV{'USER'} \|\| $ENV{'USERNAME'} \|\| 'dummy-user',
78							is => 'ro',
79							coerce => sub {
80							$_[0] // ( $ENV{'LOGNAME'} \|\| $ENV{'USER'} \|\| $ENV{'USERNAME'} \|\| 'dummy-user' );
81							},
82							isa => Str
83							);
84
85							has max_lines_sql => (
86							default => 500, # Limit to speed up runtime
87							is => 'ro',
88							coerce => sub { $_[0] // 500 },
89							isa => Int
90							);
91
92							has omop_tables => (
93
94							# Table <CONCEPT> is always required
95							default => sub { [@omop_essential_tables] },
96							coerce => sub {
97							@{ $_[0] }
98							? $_[0] =
99							[ map { uc($_) } ( uniq( @{ $_[0] }, 'CONCEPT', 'PERSON' ) ) ]
100							: \@omop_essential_tables;
101							},
102							is => 'rw',
103							isa => ArrayRef
104							);
105
106							has exposures_file => (
107
108							default =>
109							catfile( $share_dir, 'db', '/concepts_candidates_2_exposure.csv' ),
110							coerce => sub {
111							$_[0]
112							// catfile( $share_dir, 'db', 'concepts_candidates_2_exposure.csv' );
113							},
114							is => 'ro',
115							isa => Str
116							);
117
118							# Miscellanea atributes here
119							has [qw /test print_hidden_labels self_validate_schema path_to_ohdsi_db/] =>
120							( default => undef, is => 'ro' );
121
122							has [qw /stream ohdsi_db/] => ( default => 0, is => 'ro' );
123
124							has [qw /in_files/] => ( default => sub { [] }, is => 'ro' );
125
126							has [
127							qw /out_file out_dir in_textfile in_file sep sql2csv redcap_dictionary mapping_file schema_file debug log verbose/
128							] => ( is => 'ro' );
129
130							has [qw /data method/] => ( is => 'rw' );
131
132							##########################################
133							# End declaring attributes for the class #
134							##########################################
135
136							# NB: In general, we'll only display terms that exist and have content
137
138							#############
139							#############
140							# PXF2BFF #
141							#############
142							#############
143
144							sub pxf2bff {
145
146							# <array_dispatcher> will deal with JSON arrays
147	2			2	0	163	return array_dispatcher(shift);
148							}
149
150							#############
151							#############
152							# BFF2PXF #
153							#############
154							#############
155
156							sub bff2pxf {
157
158							# <array_dispatcher> will deal with JSON arrays
159	2			2	0	100	return array_dispatcher(shift);
160							}
161
162							################
163							################
164							# REDCAP2BFF #
165							################
166							################
167
168							sub redcap2bff {
169
170	12			12	0	972	my $self = shift;
171
172							# Read and load data from REDCap export
173	12					123	my $data = read_csv( { in => $self->{in_file}, sep => undef } );
174							my ( $data_redcap_dict, $data_mapping_file ) =
175							read_redcap_dict_and_mapping_file(
176							{
177							redcap_dictionary => $self->{redcap_dictionary},
178							mapping_file => $self->{mapping_file},
179							self_validate_schema => $self->{self_validate_schema},
180							schema_file => $self->{schema_file}
181							}
182	11					161	);
183
184							# Load data in $self
185	4					25	$self->{data} = $data; # Dynamically adding attributes (setter)
186	4					14	$self->{data_redcap_dict} = $data_redcap_dict; # Dynamically adding attributes (setter)
187	4					10	$self->{data_mapping_file} = $data_mapping_file; # Dynamically adding attributes (setter)
188
189							# array_dispatcher will deal with JSON arrays
190	4					23	return array_dispatcher($self);
191							}
192
193							################
194							################
195							# REDCAP2PXF #
196							################
197							################
198
199							sub redcap2pxf {
200
201	1			1	0	49	my $self = shift;
202
203							# First iteration: redcap2bff
204	1					4	$self->{method} = 'redcap2bff'; # setter - we have to change the value of attr {method}
205	1					6	my $bff = redcap2bff($self); # array
206
207							# Preparing for second iteration: bff2pxf
208	1					10	$self->{method} = 'bff2pxf'; # setter
209	1					7168	$self->{data} = $bff; # setter
210	1					9	$self->{in_textfile} = 0; # setter
211
212							# Run second iteration
213	1					5	return array_dispatcher($self);
214							}
215
216							##############
217							##############
218							# OMOP2BFF #
219							##############
220							##############
221
222							sub omop2bff {
223
224	3			3	0	106	my $self = shift;
225
226							#############
227							# IMPORTANT #
228							#############
229
230							# SMALL TO MEDIUM FILES < 1M rows
231							#
232							# In many cases, because people are downsizing their DBs for data sharing,
233							# PostgreSQL dumps or CSVs will be < 1M rows.
234							# Providing we have enough memory (4-16GB), we'll able to load data in RAM,
235							# and consolidate individual values (MEASURES, DRUGS, etc.)
236
237							# HUMONGOUS FILES > 1M rows
238							# NB: Interesting read on the topic
239							# https://www.perlmonks.org/?node_id=1033692
240							# Since we're relying heavily on hashes we need to resort to another strategy(es) to load the data
241							#
242							# * Option A *: Parellel processing - No change in our code
243							# Without changing the code, we ask the user to create mini-instances (or split CSV's in chunks) and use
244							# some sort of parallel processing (e.g., GNU parallel, snakemake, HPC, etc.)
245							# CONS: Concurrent jobs may fail due to SQLite been opened by multiple threads
246							#
247							# * Option B *: Keeping data consolidated at the individual-object level (as we do with small to medium files)
248							# --no-stream
249							# To do this, we have two options:
250							# a) Externalize (save to file) THE WHOLE HASH w/ DBM:Deep (but it's very slow)
251							# b) First dump CSV (me or users) and then use *nix to sort by person_id (or loadSQLite and sort there).
252							# Then, since rows for each individual are adjacent, we can load individual data together. Still,
253							# we'll by reading one table (e.g. MEASUREMENTS) at a time, thus, this is not relly helping much to consolidate...
254							#
255							# * Option C *: Parsing files line by line (one row of CSV/SQL per JSON object) <=========== IMPLEMENTED ==========
256							# --stream
257							# BFF / PXF JSONs are just intermediate files. It's nice that they contain data grouped by individual
258							# (for visually inspection and display), but at the end of the day they'll end up in Mongo DB.
259							# If all entries contain the primary key 'person_id' then it's up to the Beacon v2 API to deal with them.
260							# It's a similar issue to the one we had with genomicVariations in the B2RI, where a given variant belong to many individuals.
261							# Here, multiple JSON documents/objects (MEASUREMENTS, DRUGS, etc.) will belong to the same individual.
262							# Now, since we allow for CSV and SQL as an input, we need to minimize the numer of steps to a minimum.
263							#
264							# - Problems that may arise:
265							# 1 - <CONCEPT> table is mandatory, but it can be so huge that it takes all RAM memory.
266							# For instance, <CONCEPT.csv> with 5_808_095 lines = 735 MB
267							# <CONCEPT_light.csv> with 5_808_094 lines but only 4 columns = 501 MB
268							# Anything more than 2M lines kills a 8GB Ram machine.
269							# Solutions:
270							# a) Not loading the table at all and resort to --ohdsi-db
271							# b) Creating a temporary SQLite instance for <CONCEPT>
272							# 2 - How to read line-by-line from an SQL dump
273							# If the PostgreSQL dump weights, say, 20GB, do we create CSV tables from it (another ~20GB)?
274							# Solutions:
275							# a) Yep, we read @stream_ram_memory_tables and export the needed tables to CSV and go from there.
276							# b) Nope, we read PostgreSQL file twice, one time to load @stream_ram_memory_tables
277							# and the second time to load the remaining TABLES. <=========== IMPLEMENTED ==========
278							# 3 - In --stream mode, do we still allow for --sql2csv? NOPE !!!! <=========== IMPLEMENTED ==========
279							# We would need to go from functional mode (csv) to filehandles and it will take tons of space.
280							# Then, --stream and -sql2csv are mutually exclusive.
281							#
282
283							# Load variables
284	3					12	my $data;
285							my $filepath;
286	3					0	my @filepaths;
287							$self->{method_ori} =
288	3	100				17	exists $self->{method_ori} ? $self->{method_ori} : 'omop2bff'; # setter
289	3					6	$self->{prev_omop_tables} = [ @{ $self->{omop_tables} } ]; # setter - 1D clone
	3					18
290
291							# Check if data comes from variable or from file
292							# Variable
293	3	50				14	if ( exists $self->{data} ) {
294	0					0	$self->{omop_cli} = 0; # setter
295	0					0	$data = $self->{data};
296							}
297
298							# File(s)
299							else {
300
301							# Read and load data from OMOP-CDM export
302	3					9	$self->{omop_cli} = 1; # setter
303
304							# First we need to know if we have PostgreSQL dump or a bunch of csv
305							# File extensions to check
306	3					11	my @exts = map { $_, $_ . '.gz' } qw(.csv .tsv .sql);
	9					31
307
308							# Proceed
309							# The idea here is that we'll load ONLY ESSENTIAL TABLES
310							# regardless of wheter they are concepts or truly records.
311							# Dictionaries (e.g. <CONCEPT>) will be parsed latter from $data
312
313	3					8	for my $file ( @{ $self->{in_files} } ) {
	3					13
314	3					301	my ( $table_name, undef, $ext ) = fileparse( $file, @exts );
315	3	50				41	if ( $ext =~ m/\.sql/i ) {
316
317							#######################
318							# Loading OMOP tables #
319							#######################
320
321							# --no-stream
322	3	100				13	if ( !$self->{stream} ) {
323
324							# We read all tables in memory
325	2					22	$data = read_sqldump( { in => $file, self => $self } );
326
327							# Exporting to CSV if --sql2csv
328	2	50				41	sqldump2csv( $data, $self->{out_dir} ) if $self->{sql2csv};
329							}
330
331							# --stream
332							else {
333
334							# We'll ONLY load @stream_ram_memory_tables
335							# in RAM and the other tables as $fh
336	1					8	$self->{omop_tables} = [@stream_ram_memory_tables]; # setter
337	1					9	$data = read_sqldump( { in => $file, self => $self } );
338							}
339
340							# We keep the filepath for later
341	3					114	$filepath = $file;
342
343							# Exit loop
344	3					22	last;
345							}
346							else {
347
348							# We'll load all OMOP tables that the user is providing as -iomop
349							# as long as they have a match in @omop_essential_tables
350							# NB: --omop-tables has no effect
351							warn "<$table_name> is not a valid table in OMOP-CDM\n" and next
352
353							#unless (any { $_ eq $table_name } @{ $omop_main_table->{$omop_version} };
354	0	0	0	0		0	unless any { $_ eq $table_name } @omop_essential_tables; # global
	0					0
355
356							# --no-stream
357	0	0				0	if ( !$self->{stream} ) {
358
359							# We read all tables in memory
360							$data->{$table_name} =
361	0					0	read_csv( { in => $file, sep => $self->{sep} } );
362							}
363
364							# --stream
365							else {
366							# We'll ONLY load @stream_ram_memory_tables
367							# in RAM and the other tables as $fh
368	0	0		0		0	if ( any { $_ eq $table_name } @stream_ram_memory_tables ) {
	0					0
369							$data->{$table_name} =
370	0					0	read_csv( { in => $file, sep => $self->{sep} } );
371							}
372							else {
373	0					0	push @filepaths, $file;
374							}
375							}
376							}
377							}
378							}
379
380							#print Dumper_concise($data) and die;
381							#print Dumper_concise($self) and die;
382
383							# Primarily with CSVs, it can happen that user does not provide <CONCEPT.csv>
384							confess 'We could not find table <CONCEPT> from your input files'
385	3	50				22	unless exists $data->{CONCEPT};
386
387							# We create a dictionary for $data->{CONCEPT}
388	3					21	$self->{data_ohdsi_dic} = transpose_ohdsi_dictionary( $data->{CONCEPT} ); # Dynamically adding attributes (setter)
389
390							# We load the allowed concept_id for exposures as hashref (for --no--stream and --stream)
391	3					21	$self->{exposures} = load_exposures( $self->{exposures_file} ); # Dynamically adding attributes (setter)
392
393							# We transpose $self->{data}{VISIT_OCCURRENCE} if present
394	3	50				26	if ( exists $data->{VISIT_OCCURRENCE} ) {
395							$self->{visit_occurrence} =
396	3					21	transpose_visit_occurrence( $data->{VISIT_OCCURRENCE} ); # Dynamically adding attributes (setter)
397	3					75	delete $data->{VISIT_OCCURRENCE};
398							}
399
400							# Now we need to perform a tranformation of the data where 'person_id' is one row of data
401							# NB: Transformation is due ONLY IN $omop_main_table FIELDS, the rest of the tables are not used
402							# The transformation is performed in --no-stream mode
403							$self->{data} =
404	3	100				24	$self->{stream} ? $data : transpose_omop_data_structure($data); # Dynamically adding attributes (setter)
405
406							# Giving some memory back to the system
407	3					858	$data = undef;
408
409							# --stream
410	3	100				17	if ( $self->{stream} ) {
411	1					10	omop_stream_dispatcher(
412							{ self => $self, filepath => $filepath, filepaths => \@filepaths }
413							);
414							}
415
416							# --no-stream
417							else {
418							# array_dispatcher will deal with JSON arrays
419	2					32	return array_dispatcher($self);
420							}
421							}
422
423							##############
424							##############
425							# OMOP2PXF #
426							##############
427							##############
428
429							sub omop2pxf {
430
431	1			1	0	59	my $self = shift;
432
433							# We have two possibilities:
434							#
435							# 1 - Module (Variables)
436							# 2 - CLI (I/O files)
437
438							# Variable
439	1	50				5	if ( exists $self->{data} ) {
440
441							# First iteration: omop2bff
442	0					0	$self->{omop_cli} = 0;
443	0					0	$self->{method} = 'omop2bff'; # setter - we have to change the value of attr {method}
444	0					0	my $bff = omop2bff($self); # array
445
446							# Preparing for second iteration: bff2pxf
447							# NB: This 2nd round may take a while if #inviduals > 1000!!!
448	0					0	$self->{method} = 'bff2pxf'; # setter
449	0					0	$self->{data} = $bff; # setter
450	0					0	$self->{in_textfile} = 0; # setter
451
452							# Run second iteration
453	0					0	return array_dispatcher($self);
454
455							# CLI
456							}
457							else {
458							# $self->{method} will be always 'omop2bff'
459							# $self->{method_ori} will tell us the original one
460	1					49	$self->{method_ori} = 'omop2pxf'; # setter
461	1					10	$self->{method} = 'omop2bff'; # setter
462	1					42	$self->{omop_cli} = 1; # setter
463
464							# Run 1st and 2nd iteration
465	1					11	return omop2bff($self);
466							}
467							}
468
469							###############
470							###############
471							# CDISC2BFF #
472							###############
473							###############
474
475							sub cdisc2bff {
476
477	2			2	0	37	my $self = shift;
478	2					10	my $str = path( $self->{in_file} )->slurp_utf8;
479	2					7702	my $hash = xml2hash $str, attr => '-', text => '~';
480	2					68595	my $data = cdisc2redcap($hash);
481
482							my ( $data_redcap_dict, $data_mapping_file ) =
483							read_redcap_dict_and_mapping_file(
484							{
485							redcap_dictionary => $self->{redcap_dictionary},
486							mapping_file => $self->{mapping_file},
487							self_validate_schema => $self->{self_validate_schema},
488							schema_file => $self->{schema_file}
489							}
490	2					34	);
491
492							# Load data in $self
493	2					10	$self->{data} = $data; # Dynamically adding attributes (setter)
494	2					9	$self->{data_redcap_dict} = $data_redcap_dict; # Dynamically adding attributes (setter)
495	2					5	$self->{data_mapping_file} = $data_mapping_file; # Dynamically adding attributes (setter)
496
497							# array_dispatcher will deal with JSON arrays
498	2					12	return array_dispatcher($self);
499							}
500
501							###############
502							###############
503							# CDISC2PXF #
504							###############
505							###############
506
507							sub cdisc2pxf {
508
509	1			1	0	68	my $self = shift;
510
511							# First iteration: cdisc2bff
512	1					4	$self->{method} = 'cdisc2bff'; # setter - we have to change the value of attr {method}
513	1					8	my $bff = cdisc2bff($self); # array
514
515							# Preparing for second iteration: bff2pxf
516	1					7	$self->{method} = 'bff2pxf'; # setter
517	1					4726	$self->{data} = $bff; # setter
518	1					34	$self->{in_textfile} = 0; # setter
519
520							# Run second iteration
521	1					12	return array_dispatcher($self);
522							}
523
524							######################
525							######################
526							# MISCELLANEA SUBS #
527							######################
528							######################
529
530							sub array_dispatcher {
531
532	14			14	0	39	my $self = shift;
533
534							# Load the input data as Perl data structure
535							my $in_data =
536							( $self->{in_textfile} && $self->{method} !~ m/^redcap2\|^omop2\|^cdisc2/ )
537							? io_yaml_or_json( { filepath => $self->{in_file}, mode => 'read' } )
538	14	100	100			235	: $self->{data};
539
540							# Define the methods to call (naming 'func' to avoid confussion with $self->{method})
541	13					194	my %func = (
542							pxf2bff => \&do_pxf2bff,
543							redcap2bff => \&do_redcap2bff,
544							cdisc2bff => \&do_cdisc2bff,
545							omop2bff => \&do_omop2bff,
546							bff2pxf => \&do_bff2pxf
547							);
548
549							# Open connection to SQLlite databases ONCE
550	13	100				110	open_connections_SQLite($self) if $self->{method} ne 'bff2pxf';
551
552							# Open filehandle if omop2bff
553	13					35	my $fh_out;
554	13	50	66			78	if ( $self->{method} eq 'omop2bff' && $self->{omop_cli} ) {
555	2					12	$fh_out = open_filehandle( $self->{out_file}, 'a' );
556	2					31	say $fh_out "[";
557							}
558
559							# Proceed depending if we have an ARRAY or not
560							# NB: Caution with RAM (we store all in memory except for omop2bff)
561	13					29	my $out_data;
562	13	100				82	if ( ref $in_data eq ref [] ) {
563
564							# Print if we have ARRAY
565	12	50				54	say "$self->{method}: ARRAY" if $self->{debug};
566
567							# Initialize needed variables
568	12					24	my $count = 0;
569	12					23	my $total = 0;
570	12					23	my $elements = scalar @{$in_data};
	12					25
571
572							# Start looping
573							# In $self->{data} we have all participants data, but,
574							# WE DELIBERATELY SEPARATE ARRAY ELEMENTS FROM $self->{data}
575
576	12					29	for ( @{$in_data} ) {
	12					35
577	2228					2717	$count++;
578
579							# Print imfo
580	2228	50				4423	say "[$count] ARRAY ELEMENT from $elements" if $self->{debug};
581
582							# NB: If we get "null" participants the validator will complain
583							# about not having "id" or any other required property
584	2228					6156	my $method_result = $func{ $self->{method} }->( $self, $_ ); # Method
585
586							# Only proceeding if we got value from method
587	2228	100				4319	if ($method_result) {
588	1288					1823	$total++;
589	1288	50				2556	say " * [$count] ARRAY ELEMENT is defined" if $self->{debug};
590
591							# For omop2bff and omop2pxf we serialize by individual
592	1288	100	66			3902	if ( exists $self->{omop_cli} && $self->{omop_cli} ) {
593	1000					1794	my $out = omop_dispatcher( $self, $method_result );
594	1000					54633	print $fh_out $$out;
595							print $fh_out ",\n"
596							unless ( $total == $elements
597	1000	100	66			18518	\|\| $total == $self->{max_lines_sql} );
598							}
599
600							# For the other we have array_ref $out_data and serialize at once
601							else {
602	288					338	push @{$out_data}, $method_result;
	288					777
603
604							#say total_size($out_data);
605							}
606							}
607							}
608
609							say "==============\nIndividuals total: $total\n"
610	12	50	33			103	if ( $self->{verbose} && $self->{method} eq 'omop2bff' );
611							}
612
613							# NOT ARRAY
614							else {
615	1	50				3	say "$self->{method}: NOT ARRAY" if $self->{debug};
616	1					10	$out_data = $func{ $self->{method} }->( $self, $in_data ); # Method
617							}
618
619							# Close connections ONCE
620	13	100				136	close_connections_SQLite($self) unless $self->{method} eq 'bff2pxf';
621
622							# Close filehandle if omop2bff (w/ premature return)
623	13	50	66			92	if ( exists $self->{omop_cli} && $self->{omop_cli} ) {
624	2					12	say $fh_out "\n]";
625	2					17	close $fh_out;
626	2					1682	return 1;
627							}
628
629							# Return data
630	11					14151	return $out_data;
631							}
632
633							sub omop_dispatcher {
634
635	1000			1000	0	1575	my ( $self, $method_result ) = @_;
636
637							# For omop2bff and omop2pxf we serialize by individual
638	1000					1272	my $out;
639
640							# omop2bff encode directly
641	1000	100				2020	if ( $self->{method_ori} ne 'omop2pxf' ) {
642	500					36061	$out = JSON::XS->new->utf8->canonical->pretty->encode($method_result);
643							}
644
645							# omop2pxf convert to PXF
646							else {
647	500					1313	my $pxf = do_bff2pxf( $self, $method_result );
648	500					11626	$out = JSON::XS->new->utf8->canonical->pretty->encode($pxf);
649							}
650	1000					4335	chomp $out;
651	1000					2051	return \$out;
652							}
653
654							sub omop_stream_dispatcher {
655
656	1			1	0	4	my $arg = shift;
657	1					4	my $self = $arg->{self};
658	1					3	my $filepath = $arg->{filepath};
659	1					3	my $filepaths = $arg->{filepaths};
660	1					4	my $omop_tables = $self->{prev_omop_tables};
661
662							# Open connection to SQLite databases ONCE
663	1	50				12	open_connections_SQLite($self) if $self->{method} ne 'bff2pxf';
664
665							# First we do transformations from AoH to HoH to speed up the calculation
666	1					2	my $person = { map { $_->{person_id} => $_ } @{ $self->{data}{PERSON} } };
	2694					6374
	1					19
667
668							# Give back memory to RAM
669	1					256	delete $self->{data}{PERSON};
670
671							# CSVs
672	1	50				7	if (@$filepaths) {
673	0					0	for (@$filepaths) {
674	0	0				0	say "Processing file ... <$_>" if $self->{verbose};
675							read_csv_stream(
676							{
677							in => $_,
678							sep => $self->{sep},
679	0					0	self => $self,
680							person => $person
681							}
682							);
683							}
684							}
685
686							# PosgreSQL dump
687							else {
688
689							# Now iterate
690	1					1	for my $table ( @{$omop_tables} ) {
	1					6
691
692							# We already loaded @stream_ram_memory_tables;
693	3	100		6		258	next if any { $_ eq $table } @stream_ram_memory_tables;
	6					28
694	1	50				7	say "Processing table ... <$table>" if $self->{verbose};
695	1					7	$self->{omop_tables} = [$table];
696	1					12	read_sqldump_stream(
697							{ in => $filepath, self => $self, person => $person } );
698							}
699							}
700
701							# Close connections ONCE
702	1	50				18	close_connections_SQLite($self) unless $self->{method} eq 'bff2pxf';
703	1					8250	return 1;
704							}
705
706							sub omop2bff_stream_processing {
707
708	67707			67707	0	102810	my ( $self, $data ) = @_;
709
710							# We have this subroutine here because the class was initiated in Pheno.pm
711	67707					160830	return do_omop2bff( $self, $data ); # Method
712							}
713
714							sub Dumper_concise {
715							{
716	0			0	0		local $Data::Dumper::Terse = 1;
	0
717	0						local $Data::Dumper::Indent = 1;
718	0						local $Data::Dumper::Useqq = 1;
719	0						local $Data::Dumper::Deparse = 1;
720	0						local $Data::Dumper::Quotekeys = 1;
721	0						local $Data::Dumper::Sortkeys = 1;
722	0						local $Data::Dumper::Pair = ' : ';
723	0						print Dumper shift;
724							}
725							}
726
727							1;
728
729							=head1 NAME
730
731							Convert::Pheno - A module to interconvert common data models for phenotypic data
732
733							=head1 SYNOPSIS
734
735							use Convert::Pheno;
736
737							# Define data
738							my $my_pxf_json_data = {
739							"phenopacket" => {
740							"id" => "P0007500",
741							"subject" => {
742							"id" => "P0007500",
743							"dateOfBirth" => "unknown-01-01T00:00:00Z",
744							"sex" => "FEMALE"
745							}
746							}
747							};
748
749							# Create object
750							my $convert = Convert::Pheno->new(
751							{
752							data => $my_pxf_json_data,
753							method => 'pxf2json'
754							}
755							);
756
757							# Apply a method
758							my $data = $convert->pxf2json;
759
760							=head1 DESCRIPTION
761
762							For a better description, please read the following documentation:
763
764							=over
765
766							=item General:
767
768							L<https://cnag-biomedical-informatics.github.io/convert-pheno>
769
770							=item Command-Line Interface:
771
772							L<https://github.com/CNAG-Biomedical-Informatics/convert-pheno#readme>
773
774							=back
775
776							=head1 CITATION
777
778							The author requests that any published work that utilizes C<Convert-Pheno> includes a cite to the the following reference:
779
780							Rueda, M. et al. "Convert-Pheno: A software toolkit for the interconversion of standard data models for phenotypic data", (2023), I<Journal of Biomedical Informatics>.
781
782							=head1 AUTHOR
783
784							Written by Manuel Rueda, PhD. Info about CNAG can be found at L<https://www.cnag.eu>.
785
786							=head1 METHODS
787
788							See L<https://cnag-biomedical-informatics.github.io/convert-pheno/use-as-a-module>.
789
790							=head1 COPYRIGHT
791
792							This PERL file is copyrighted. See the LICENSE file included in this distribution.
793
794							=cut