File Coverage

lib/Convert/Pheno.pm
Criterion Covered Total %
statement 216 245 88.1
branch 43 68 63.2
condition 12 20 60.0
subroutine 39 42 92.8
pod 0 13 0.0
total 310 388 79.9


line stmt bran cond sub pod time code
1             package Convert::Pheno;
2              
3 6     6   648144 use strict;
  6         65  
  6         174  
4 6     6   30 use warnings;
  6         11  
  6         175  
5 6     6   7215 use autodie;
  6         102343  
  6         31  
6 6     6   42376 use feature qw(say);
  6         22  
  6         788  
7 6     6   2053 use File::Spec::Functions qw(catdir catfile);
  6         3707  
  6         445  
8 6     6   809 use Data::Dumper;
  6         7612  
  6         323  
9 6     6   5833 use Path::Tiny;
  6         82835  
  6         363  
10 6     6   48 use File::Basename;
  6         14  
  6         676  
11 6     6   3572 use File::ShareDir::ProjectDistDir;
  6         251516  
  6         50  
12 6     6   3692 use List::Util qw(any uniq);
  6         12  
  6         782  
13 6     6   50 use Carp qw(confess);
  6         23  
  6         278  
14 6     6   2979 use XML::Fast;
  6         95880  
  6         349  
15 6     6   3682 use Moo;
  6         45525  
  6         31  
16 6     6   15512 use Types::Standard qw(Str Int Num Enum ArrayRef Undef);
  6         706661  
  6         70  
17 6     6   20033 use File::ShareDir::ProjectDistDir qw(dist_dir);
  6         16  
  6         96  
18              
19             #use Devel::Size qw(size total_size);
20 6     6   6597 use Convert::Pheno::CSV;
  6         26  
  6         601  
21 6     6   52 use Convert::Pheno::IO;
  6         14  
  6         366  
22 6     6   43 use Convert::Pheno::SQLite;
  6         15  
  6         663  
23 6     6   47 use Convert::Pheno::Mapping;
  6         27  
  6         636  
24 6     6   44 use Convert::Pheno::OMOP;
  6         18  
  6         698  
25 6     6   3139 use Convert::Pheno::PXF;
  6         22  
  6         359  
26 6     6   2687 use Convert::Pheno::BFF;
  6         14  
  6         403  
27 6     6   2482 use Convert::Pheno::CDISC;
  6         23  
  6         336  
28 6     6   45 use Convert::Pheno::REDCap;
  6         13  
  6         230  
29              
30 6     6   41 use Exporter 'import';
  6         13  
  6         257  
31             our @EXPORT =
32             qw($VERSION io_yaml_or_json omop2bff_stream_processing share_dir); # Symbols imported by default
33              
34             #our @EXPORT_OK = qw(foo bar); # Symbols imported by request
35              
36 6     6   28 use constant DEVEL_MODE => 0;
  6         13  
  6         20883  
37              
38             # Global variables:
39             our $VERSION = '0.12_3';
40             our $share_dir = dist_dir('Convert-Pheno');
41              
42             ############################################
43             # Start declaring attributes for the class #
44             ############################################
45              
46             # Complex defaults here
47             has search => (
48              
49             default => 'exact',
50             is => 'ro',
51             coerce => sub { $_[0] // 'exact' },
52             isa => Enum [qw(exact mixed)]
53             );
54              
55             has text_similarity_method => (
56              
57             #default => 'cosine',
58             is => 'ro',
59             coerce => sub { $_[0] // 'cosine' },
60             isa => Enum [qw(cosine dice)]
61             );
62              
63             has min_text_similarity_score => (
64              
65             #default => 0.8,
66             is => 'ro',
67             coerce => sub { $_[0] // 0.8 },
68             isa => sub {
69             die "Only values between 0 .. 1 supported!"
70             unless ( $_[0] >= 0.0 && $_[0] <= 1.0 );
71             }
72             );
73              
74             has username => (
75              
76             #default => ( $ENV{LOGNAME} || $ENV{USER} || getpwuid($<) ) , # getpwuid not implemented in Windows
77             default => $ENV{'LOGNAME'} || $ENV{'USER'} || $ENV{'USERNAME'} || 'dummy-user',
78             is => 'ro',
79             coerce => sub {
80             $_[0] // ( $ENV{'LOGNAME'} || $ENV{'USER'} || $ENV{'USERNAME'} || 'dummy-user' );
81             },
82             isa => Str
83             );
84              
85             has max_lines_sql => (
86             default => 500, # Limit to speed up runtime
87             is => 'ro',
88             coerce => sub { $_[0] // 500 },
89             isa => Int
90             );
91              
92             has omop_tables => (
93              
94             # Table <CONCEPT> is always required
95             default => sub { [@omop_essential_tables] },
96             coerce => sub {
97             @{ $_[0] }
98             ? $_[0] =
99             [ map { uc($_) } ( uniq( @{ $_[0] }, 'CONCEPT', 'PERSON' ) ) ]
100             : \@omop_essential_tables;
101             },
102             is => 'rw',
103             isa => ArrayRef
104             );
105              
106             has exposures_file => (
107              
108             default =>
109             catfile( $share_dir, 'db', '/concepts_candidates_2_exposure.csv' ),
110             coerce => sub {
111             $_[0]
112             // catfile( $share_dir, 'db', 'concepts_candidates_2_exposure.csv' );
113             },
114             is => 'ro',
115             isa => Str
116             );
117              
118             # Miscellanea atributes here
119             has [qw /test print_hidden_labels self_validate_schema path_to_ohdsi_db/] =>
120             ( default => undef, is => 'ro' );
121              
122             has [qw /stream ohdsi_db/] => ( default => 0, is => 'ro' );
123              
124             has [qw /in_files/] => ( default => sub { [] }, is => 'ro' );
125              
126             has [
127             qw /out_file out_dir in_textfile in_file sep sql2csv redcap_dictionary mapping_file schema_file debug log verbose/
128             ] => ( is => 'ro' );
129              
130             has [qw /data method/] => ( is => 'rw' );
131              
132             ##########################################
133             # End declaring attributes for the class #
134             ##########################################
135              
136             # NB: In general, we'll only display terms that exist and have content
137              
138             #############
139             #############
140             # PXF2BFF #
141             #############
142             #############
143              
144             sub pxf2bff {
145              
146             # <array_dispatcher> will deal with JSON arrays
147 2     2 0 163 return array_dispatcher(shift);
148             }
149              
150             #############
151             #############
152             # BFF2PXF #
153             #############
154             #############
155              
156             sub bff2pxf {
157              
158             # <array_dispatcher> will deal with JSON arrays
159 2     2 0 100 return array_dispatcher(shift);
160             }
161              
162             ################
163             ################
164             # REDCAP2BFF #
165             ################
166             ################
167              
168             sub redcap2bff {
169              
170 12     12 0 972 my $self = shift;
171              
172             # Read and load data from REDCap export
173 12         123 my $data = read_csv( { in => $self->{in_file}, sep => undef } );
174             my ( $data_redcap_dict, $data_mapping_file ) =
175             read_redcap_dict_and_mapping_file(
176             {
177             redcap_dictionary => $self->{redcap_dictionary},
178             mapping_file => $self->{mapping_file},
179             self_validate_schema => $self->{self_validate_schema},
180             schema_file => $self->{schema_file}
181             }
182 11         161 );
183              
184             # Load data in $self
185 4         25 $self->{data} = $data; # Dynamically adding attributes (setter)
186 4         14 $self->{data_redcap_dict} = $data_redcap_dict; # Dynamically adding attributes (setter)
187 4         10 $self->{data_mapping_file} = $data_mapping_file; # Dynamically adding attributes (setter)
188              
189             # array_dispatcher will deal with JSON arrays
190 4         23 return array_dispatcher($self);
191             }
192              
193             ################
194             ################
195             # REDCAP2PXF #
196             ################
197             ################
198              
199             sub redcap2pxf {
200              
201 1     1 0 49 my $self = shift;
202              
203             # First iteration: redcap2bff
204 1         4 $self->{method} = 'redcap2bff'; # setter - we have to change the value of attr {method}
205 1         6 my $bff = redcap2bff($self); # array
206              
207             # Preparing for second iteration: bff2pxf
208 1         10 $self->{method} = 'bff2pxf'; # setter
209 1         7168 $self->{data} = $bff; # setter
210 1         9 $self->{in_textfile} = 0; # setter
211              
212             # Run second iteration
213 1         5 return array_dispatcher($self);
214             }
215              
216             ##############
217             ##############
218             # OMOP2BFF #
219             ##############
220             ##############
221              
222             sub omop2bff {
223              
224 3     3 0 106 my $self = shift;
225              
226             #############
227             # IMPORTANT #
228             #############
229              
230             # SMALL TO MEDIUM FILES < 1M rows
231             #
232             # In many cases, because people are downsizing their DBs for data sharing,
233             # PostgreSQL dumps or CSVs will be < 1M rows.
234             # Providing we have enough memory (4-16GB), we'll able to load data in RAM,
235             # and consolidate individual values (MEASURES, DRUGS, etc.)
236              
237             # HUMONGOUS FILES > 1M rows
238             # NB: Interesting read on the topic
239             # https://www.perlmonks.org/?node_id=1033692
240             # Since we're relying heavily on hashes we need to resort to another strategy(es) to load the data
241             #
242             # * Option A *: Parellel processing - No change in our code
243             # Without changing the code, we ask the user to create mini-instances (or split CSV's in chunks) and use
244             # some sort of parallel processing (e.g., GNU parallel, snakemake, HPC, etc.)
245             # CONS: Concurrent jobs may fail due to SQLite been opened by multiple threads
246             #
247             # * Option B *: Keeping data consolidated at the individual-object level (as we do with small to medium files)
248             # --no-stream
249             # To do this, we have two options:
250             # a) Externalize (save to file) THE WHOLE HASH w/ DBM:Deep (but it's very slow)
251             # b) First dump CSV (me or users) and then use *nix to sort by person_id (or loadSQLite and sort there).
252             # Then, since rows for each individual are adjacent, we can load individual data together. Still,
253             # we'll by reading one table (e.g. MEASUREMENTS) at a time, thus, this is not relly helping much to consolidate...
254             #
255             # * Option C *: Parsing files line by line (one row of CSV/SQL per JSON object) <=========== IMPLEMENTED ==========
256             # --stream
257             # BFF / PXF JSONs are just intermediate files. It's nice that they contain data grouped by individual
258             # (for visually inspection and display), but at the end of the day they'll end up in Mongo DB.
259             # If all entries contain the primary key 'person_id' then it's up to the Beacon v2 API to deal with them.
260             # It's a similar issue to the one we had with genomicVariations in the B2RI, where a given variant belong to many individuals.
261             # Here, multiple JSON documents/objects (MEASUREMENTS, DRUGS, etc.) will belong to the same individual.
262             # Now, since we allow for CSV and SQL as an input, we need to minimize the numer of steps to a minimum.
263             #
264             # - Problems that may arise:
265             # 1 - <CONCEPT> table is mandatory, but it can be so huge that it takes all RAM memory.
266             # For instance, <CONCEPT.csv> with 5_808_095 lines = 735 MB
267             # <CONCEPT_light.csv> with 5_808_094 lines but only 4 columns = 501 MB
268             # Anything more than 2M lines kills a 8GB Ram machine.
269             # Solutions:
270             # a) Not loading the table at all and resort to --ohdsi-db
271             # b) Creating a temporary SQLite instance for <CONCEPT>
272             # 2 - How to read line-by-line from an SQL dump
273             # If the PostgreSQL dump weights, say, 20GB, do we create CSV tables from it (another ~20GB)?
274             # Solutions:
275             # a) Yep, we read @stream_ram_memory_tables and export the needed tables to CSV and go from there.
276             # b) Nope, we read PostgreSQL file twice, one time to load @stream_ram_memory_tables
277             # and the second time to load the remaining TABLES. <=========== IMPLEMENTED ==========
278             # 3 - In --stream mode, do we still allow for --sql2csv? NOPE !!!! <=========== IMPLEMENTED ==========
279             # We would need to go from functional mode (csv) to filehandles and it will take tons of space.
280             # Then, --stream and -sql2csv are mutually exclusive.
281             #
282              
283             # Load variables
284 3         12 my $data;
285             my $filepath;
286 3         0 my @filepaths;
287             $self->{method_ori} =
288 3 100       17 exists $self->{method_ori} ? $self->{method_ori} : 'omop2bff'; # setter
289 3         6 $self->{prev_omop_tables} = [ @{ $self->{omop_tables} } ]; # setter - 1D clone
  3         18  
290              
291             # Check if data comes from variable or from file
292             # Variable
293 3 50       14 if ( exists $self->{data} ) {
294 0         0 $self->{omop_cli} = 0; # setter
295 0         0 $data = $self->{data};
296             }
297              
298             # File(s)
299             else {
300              
301             # Read and load data from OMOP-CDM export
302 3         9 $self->{omop_cli} = 1; # setter
303              
304             # First we need to know if we have PostgreSQL dump or a bunch of csv
305             # File extensions to check
306 3         11 my @exts = map { $_, $_ . '.gz' } qw(.csv .tsv .sql);
  9         31  
307              
308             # Proceed
309             # The idea here is that we'll load ONLY ESSENTIAL TABLES
310             # regardless of wheter they are concepts or truly records.
311             # Dictionaries (e.g. <CONCEPT>) will be parsed latter from $data
312              
313 3         8 for my $file ( @{ $self->{in_files} } ) {
  3         13  
314 3         301 my ( $table_name, undef, $ext ) = fileparse( $file, @exts );
315 3 50       41 if ( $ext =~ m/\.sql/i ) {
316              
317             #######################
318             # Loading OMOP tables #
319             #######################
320              
321             # --no-stream
322 3 100       13 if ( !$self->{stream} ) {
323              
324             # We read all tables in memory
325 2         22 $data = read_sqldump( { in => $file, self => $self } );
326              
327             # Exporting to CSV if --sql2csv
328 2 50       41 sqldump2csv( $data, $self->{out_dir} ) if $self->{sql2csv};
329             }
330              
331             # --stream
332             else {
333              
334             # We'll ONLY load @stream_ram_memory_tables
335             # in RAM and the other tables as $fh
336 1         8 $self->{omop_tables} = [@stream_ram_memory_tables]; # setter
337 1         9 $data = read_sqldump( { in => $file, self => $self } );
338             }
339              
340             # We keep the filepath for later
341 3         114 $filepath = $file;
342              
343             # Exit loop
344 3         22 last;
345             }
346             else {
347              
348             # We'll load all OMOP tables that the user is providing as -iomop
349             # as long as they have a match in @omop_essential_tables
350             # NB: --omop-tables has no effect
351             warn "<$table_name> is not a valid table in OMOP-CDM\n" and next
352              
353             #unless (any { $_ eq $table_name } @{ $omop_main_table->{$omop_version} };
354 0 0 0 0   0 unless any { $_ eq $table_name } @omop_essential_tables; # global
  0         0  
355              
356             # --no-stream
357 0 0       0 if ( !$self->{stream} ) {
358              
359             # We read all tables in memory
360             $data->{$table_name} =
361 0         0 read_csv( { in => $file, sep => $self->{sep} } );
362             }
363              
364             # --stream
365             else {
366             # We'll ONLY load @stream_ram_memory_tables
367             # in RAM and the other tables as $fh
368 0 0   0   0 if ( any { $_ eq $table_name } @stream_ram_memory_tables ) {
  0         0  
369             $data->{$table_name} =
370 0         0 read_csv( { in => $file, sep => $self->{sep} } );
371             }
372             else {
373 0         0 push @filepaths, $file;
374             }
375             }
376             }
377             }
378             }
379              
380             #print Dumper_concise($data) and die;
381             #print Dumper_concise($self) and die;
382              
383             # Primarily with CSVs, it can happen that user does not provide <CONCEPT.csv>
384             confess 'We could not find table <CONCEPT> from your input files'
385 3 50       22 unless exists $data->{CONCEPT};
386              
387             # We create a dictionary for $data->{CONCEPT}
388 3         21 $self->{data_ohdsi_dic} = transpose_ohdsi_dictionary( $data->{CONCEPT} ); # Dynamically adding attributes (setter)
389              
390             # We load the allowed concept_id for exposures as hashref (for --no--stream and --stream)
391 3         21 $self->{exposures} = load_exposures( $self->{exposures_file} ); # Dynamically adding attributes (setter)
392              
393             # We transpose $self->{data}{VISIT_OCCURRENCE} if present
394 3 50       26 if ( exists $data->{VISIT_OCCURRENCE} ) {
395             $self->{visit_occurrence} =
396 3         21 transpose_visit_occurrence( $data->{VISIT_OCCURRENCE} ); # Dynamically adding attributes (setter)
397 3         75 delete $data->{VISIT_OCCURRENCE};
398             }
399              
400             # Now we need to perform a tranformation of the data where 'person_id' is one row of data
401             # NB: Transformation is due ONLY IN $omop_main_table FIELDS, the rest of the tables are not used
402             # The transformation is performed in --no-stream mode
403             $self->{data} =
404 3 100       24 $self->{stream} ? $data : transpose_omop_data_structure($data); # Dynamically adding attributes (setter)
405              
406             # Giving some memory back to the system
407 3         858 $data = undef;
408              
409             # --stream
410 3 100       17 if ( $self->{stream} ) {
411 1         10 omop_stream_dispatcher(
412             { self => $self, filepath => $filepath, filepaths => \@filepaths }
413             );
414             }
415              
416             # --no-stream
417             else {
418             # array_dispatcher will deal with JSON arrays
419 2         32 return array_dispatcher($self);
420             }
421             }
422              
423             ##############
424             ##############
425             # OMOP2PXF #
426             ##############
427             ##############
428              
429             sub omop2pxf {
430              
431 1     1 0 59 my $self = shift;
432              
433             # We have two possibilities:
434             #
435             # 1 - Module (Variables)
436             # 2 - CLI (I/O files)
437              
438             # Variable
439 1 50       5 if ( exists $self->{data} ) {
440              
441             # First iteration: omop2bff
442 0         0 $self->{omop_cli} = 0;
443 0         0 $self->{method} = 'omop2bff'; # setter - we have to change the value of attr {method}
444 0         0 my $bff = omop2bff($self); # array
445              
446             # Preparing for second iteration: bff2pxf
447             # NB: This 2nd round may take a while if #inviduals > 1000!!!
448 0         0 $self->{method} = 'bff2pxf'; # setter
449 0         0 $self->{data} = $bff; # setter
450 0         0 $self->{in_textfile} = 0; # setter
451              
452             # Run second iteration
453 0         0 return array_dispatcher($self);
454              
455             # CLI
456             }
457             else {
458             # $self->{method} will be always 'omop2bff'
459             # $self->{method_ori} will tell us the original one
460 1         49 $self->{method_ori} = 'omop2pxf'; # setter
461 1         10 $self->{method} = 'omop2bff'; # setter
462 1         42 $self->{omop_cli} = 1; # setter
463              
464             # Run 1st and 2nd iteration
465 1         11 return omop2bff($self);
466             }
467             }
468              
469             ###############
470             ###############
471             # CDISC2BFF #
472             ###############
473             ###############
474              
475             sub cdisc2bff {
476              
477 2     2 0 37 my $self = shift;
478 2         10 my $str = path( $self->{in_file} )->slurp_utf8;
479 2         7702 my $hash = xml2hash $str, attr => '-', text => '~';
480 2         68595 my $data = cdisc2redcap($hash);
481              
482             my ( $data_redcap_dict, $data_mapping_file ) =
483             read_redcap_dict_and_mapping_file(
484             {
485             redcap_dictionary => $self->{redcap_dictionary},
486             mapping_file => $self->{mapping_file},
487             self_validate_schema => $self->{self_validate_schema},
488             schema_file => $self->{schema_file}
489             }
490 2         34 );
491              
492             # Load data in $self
493 2         10 $self->{data} = $data; # Dynamically adding attributes (setter)
494 2         9 $self->{data_redcap_dict} = $data_redcap_dict; # Dynamically adding attributes (setter)
495 2         5 $self->{data_mapping_file} = $data_mapping_file; # Dynamically adding attributes (setter)
496              
497             # array_dispatcher will deal with JSON arrays
498 2         12 return array_dispatcher($self);
499             }
500              
501             ###############
502             ###############
503             # CDISC2PXF #
504             ###############
505             ###############
506              
507             sub cdisc2pxf {
508              
509 1     1 0 68 my $self = shift;
510              
511             # First iteration: cdisc2bff
512 1         4 $self->{method} = 'cdisc2bff'; # setter - we have to change the value of attr {method}
513 1         8 my $bff = cdisc2bff($self); # array
514              
515             # Preparing for second iteration: bff2pxf
516 1         7 $self->{method} = 'bff2pxf'; # setter
517 1         4726 $self->{data} = $bff; # setter
518 1         34 $self->{in_textfile} = 0; # setter
519              
520             # Run second iteration
521 1         12 return array_dispatcher($self);
522             }
523              
524             ######################
525             ######################
526             # MISCELLANEA SUBS #
527             ######################
528             ######################
529              
530             sub array_dispatcher {
531              
532 14     14 0 39 my $self = shift;
533              
534             # Load the input data as Perl data structure
535             my $in_data =
536             ( $self->{in_textfile} && $self->{method} !~ m/^redcap2|^omop2|^cdisc2/ )
537             ? io_yaml_or_json( { filepath => $self->{in_file}, mode => 'read' } )
538 14 100 100     235 : $self->{data};
539              
540             # Define the methods to call (naming 'func' to avoid confussion with $self->{method})
541 13         194 my %func = (
542             pxf2bff => \&do_pxf2bff,
543             redcap2bff => \&do_redcap2bff,
544             cdisc2bff => \&do_cdisc2bff,
545             omop2bff => \&do_omop2bff,
546             bff2pxf => \&do_bff2pxf
547             );
548              
549             # Open connection to SQLlite databases ONCE
550 13 100       110 open_connections_SQLite($self) if $self->{method} ne 'bff2pxf';
551              
552             # Open filehandle if omop2bff
553 13         35 my $fh_out;
554 13 50 66     78 if ( $self->{method} eq 'omop2bff' && $self->{omop_cli} ) {
555 2         12 $fh_out = open_filehandle( $self->{out_file}, 'a' );
556 2         31 say $fh_out "[";
557             }
558              
559             # Proceed depending if we have an ARRAY or not
560             # NB: Caution with RAM (we store all in memory except for omop2bff)
561 13         29 my $out_data;
562 13 100       82 if ( ref $in_data eq ref [] ) {
563              
564             # Print if we have ARRAY
565 12 50       54 say "$self->{method}: ARRAY" if $self->{debug};
566              
567             # Initialize needed variables
568 12         24 my $count = 0;
569 12         23 my $total = 0;
570 12         23 my $elements = scalar @{$in_data};
  12         25  
571              
572             # Start looping
573             # In $self->{data} we have all participants data, but,
574             # WE DELIBERATELY SEPARATE ARRAY ELEMENTS FROM $self->{data}
575              
576 12         29 for ( @{$in_data} ) {
  12         35  
577 2228         2717 $count++;
578              
579             # Print imfo
580 2228 50       4423 say "[$count] ARRAY ELEMENT from $elements" if $self->{debug};
581              
582             # NB: If we get "null" participants the validator will complain
583             # about not having "id" or any other required property
584 2228         6156 my $method_result = $func{ $self->{method} }->( $self, $_ ); # Method
585              
586             # Only proceeding if we got value from method
587 2228 100       4319 if ($method_result) {
588 1288         1823 $total++;
589 1288 50       2556 say " * [$count] ARRAY ELEMENT is defined" if $self->{debug};
590              
591             # For omop2bff and omop2pxf we serialize by individual
592 1288 100 66     3902 if ( exists $self->{omop_cli} && $self->{omop_cli} ) {
593 1000         1794 my $out = omop_dispatcher( $self, $method_result );
594 1000         54633 print $fh_out $$out;
595             print $fh_out ",\n"
596             unless ( $total == $elements
597 1000 100 66     18518 || $total == $self->{max_lines_sql} );
598             }
599              
600             # For the other we have array_ref $out_data and serialize at once
601             else {
602 288         338 push @{$out_data}, $method_result;
  288         777  
603              
604             #say total_size($out_data);
605             }
606             }
607             }
608              
609             say "==============\nIndividuals total: $total\n"
610 12 50 33     103 if ( $self->{verbose} && $self->{method} eq 'omop2bff' );
611             }
612              
613             # NOT ARRAY
614             else {
615 1 50       3 say "$self->{method}: NOT ARRAY" if $self->{debug};
616 1         10 $out_data = $func{ $self->{method} }->( $self, $in_data ); # Method
617             }
618              
619             # Close connections ONCE
620 13 100       136 close_connections_SQLite($self) unless $self->{method} eq 'bff2pxf';
621              
622             # Close filehandle if omop2bff (w/ premature return)
623 13 50 66     92 if ( exists $self->{omop_cli} && $self->{omop_cli} ) {
624 2         12 say $fh_out "\n]";
625 2         17 close $fh_out;
626 2         1682 return 1;
627             }
628              
629             # Return data
630 11         14151 return $out_data;
631             }
632              
633             sub omop_dispatcher {
634              
635 1000     1000 0 1575 my ( $self, $method_result ) = @_;
636              
637             # For omop2bff and omop2pxf we serialize by individual
638 1000         1272 my $out;
639              
640             # omop2bff encode directly
641 1000 100       2020 if ( $self->{method_ori} ne 'omop2pxf' ) {
642 500         36061 $out = JSON::XS->new->utf8->canonical->pretty->encode($method_result);
643             }
644              
645             # omop2pxf convert to PXF
646             else {
647 500         1313 my $pxf = do_bff2pxf( $self, $method_result );
648 500         11626 $out = JSON::XS->new->utf8->canonical->pretty->encode($pxf);
649             }
650 1000         4335 chomp $out;
651 1000         2051 return \$out;
652             }
653              
654             sub omop_stream_dispatcher {
655              
656 1     1 0 4 my $arg = shift;
657 1         4 my $self = $arg->{self};
658 1         3 my $filepath = $arg->{filepath};
659 1         3 my $filepaths = $arg->{filepaths};
660 1         4 my $omop_tables = $self->{prev_omop_tables};
661              
662             # Open connection to SQLite databases ONCE
663 1 50       12 open_connections_SQLite($self) if $self->{method} ne 'bff2pxf';
664              
665             # First we do transformations from AoH to HoH to speed up the calculation
666 1         2 my $person = { map { $_->{person_id} => $_ } @{ $self->{data}{PERSON} } };
  2694         6374  
  1         19  
667              
668             # Give back memory to RAM
669 1         256 delete $self->{data}{PERSON};
670              
671             # CSVs
672 1 50       7 if (@$filepaths) {
673 0         0 for (@$filepaths) {
674 0 0       0 say "Processing file ... <$_>" if $self->{verbose};
675             read_csv_stream(
676             {
677             in => $_,
678             sep => $self->{sep},
679 0         0 self => $self,
680             person => $person
681             }
682             );
683             }
684             }
685              
686             # PosgreSQL dump
687             else {
688              
689             # Now iterate
690 1         1 for my $table ( @{$omop_tables} ) {
  1         6  
691              
692             # We already loaded @stream_ram_memory_tables;
693 3 100   6   258 next if any { $_ eq $table } @stream_ram_memory_tables;
  6         28  
694 1 50       7 say "Processing table ... <$table>" if $self->{verbose};
695 1         7 $self->{omop_tables} = [$table];
696 1         12 read_sqldump_stream(
697             { in => $filepath, self => $self, person => $person } );
698             }
699             }
700              
701             # Close connections ONCE
702 1 50       18 close_connections_SQLite($self) unless $self->{method} eq 'bff2pxf';
703 1         8250 return 1;
704             }
705              
706             sub omop2bff_stream_processing {
707              
708 67707     67707 0 102810 my ( $self, $data ) = @_;
709              
710             # We have this subroutine here because the class was initiated in Pheno.pm
711 67707         160830 return do_omop2bff( $self, $data ); # Method
712             }
713              
714             sub Dumper_concise {
715             {
716 0     0 0   local $Data::Dumper::Terse = 1;
  0            
717 0           local $Data::Dumper::Indent = 1;
718 0           local $Data::Dumper::Useqq = 1;
719 0           local $Data::Dumper::Deparse = 1;
720 0           local $Data::Dumper::Quotekeys = 1;
721 0           local $Data::Dumper::Sortkeys = 1;
722 0           local $Data::Dumper::Pair = ' : ';
723 0           print Dumper shift;
724             }
725             }
726              
727             1;
728              
729             =head1 NAME
730              
731             Convert::Pheno - A module to interconvert common data models for phenotypic data
732            
733             =head1 SYNOPSIS
734              
735             use Convert::Pheno;
736              
737             # Define data
738             my $my_pxf_json_data = {
739             "phenopacket" => {
740             "id" => "P0007500",
741             "subject" => {
742             "id" => "P0007500",
743             "dateOfBirth" => "unknown-01-01T00:00:00Z",
744             "sex" => "FEMALE"
745             }
746             }
747             };
748              
749             # Create object
750             my $convert = Convert::Pheno->new(
751             {
752             data => $my_pxf_json_data,
753             method => 'pxf2json'
754             }
755             );
756              
757             # Apply a method
758             my $data = $convert->pxf2json;
759              
760             =head1 DESCRIPTION
761              
762             For a better description, please read the following documentation:
763              
764             =over
765              
766             =item General:
767              
768             L<https://cnag-biomedical-informatics.github.io/convert-pheno>
769              
770             =item Command-Line Interface:
771              
772             L<https://github.com/CNAG-Biomedical-Informatics/convert-pheno#readme>
773              
774             =back
775              
776             =head1 CITATION
777              
778             The author requests that any published work that utilizes C<Convert-Pheno> includes a cite to the the following reference:
779              
780             Rueda, M. et al. "Convert-Pheno: A software toolkit for the interconversion of standard data models for phenotypic data", (2023), I<Journal of Biomedical Informatics>.
781              
782             =head1 AUTHOR
783              
784             Written by Manuel Rueda, PhD. Info about CNAG can be found at L<https://www.cnag.eu>.
785              
786             =head1 METHODS
787              
788             See L<https://cnag-biomedical-informatics.github.io/convert-pheno/use-as-a-module>.
789              
790             =head1 COPYRIGHT
791              
792             This PERL file is copyrighted. See the LICENSE file included in this distribution.
793              
794             =cut