File Coverage

lib/Pheno/Ranker/Graph.pm
Criterion Covered Total %
statement 93 94 98.9
branch 10 18 55.5
condition n/a
subroutine 11 11 100.0
pod 0 3 0.0
total 114 126 90.4


line stmt bran cond sub pod time code
1             package Pheno::Ranker::Graph;
2              
3 5     5   24 use strict;
  5         8  
  5         167  
4 5     5   16 use warnings;
  5         6  
  5         203  
5 5     5   59 use autodie;
  5         22  
  5         30  
6 5     5   17789 use feature qw(say);
  5         75  
  5         618  
7 5     5   24 use JSON::XS;
  5         5  
  5         280  
8 5     5   19 use Pheno::Ranker::IO;
  5         5  
  5         399  
9 5     5   20 use Exporter 'import';
  5         6  
  5         237  
10             our @EXPORT = qw(matrix2graph cytoscape2graph);
11 5     5   28 use constant DEVEL_MODE => 0;
  5         5  
  5         5460  
12              
13             ############################
14             ############################
15             # SUBROUTINES FOR GRAPHS #
16             ############################
17             ############################
18              
19             sub matrix2graph {
20              
21             # *** IMPORTANT***
22             # Hard-coded in purpose to avoid using Graph unless necessary
23              
24 1     1 0 3 my $arg = shift;
25 1         2 my $input = $arg->{matrix};
26 1         1 my $output = $arg->{json};
27 1         2 my $verbose = $arg->{verbose};
28 1         2 my $graph_stats = $arg->{graph_stats};
29              
30             # Open the matrix file to read
31 1         5 open( my $matrix_fh, '<:encoding(UTF-8)', $input );
32              
33             # Read the first line to get node IDs (headers)
34 1         1449 my $header_line = <$matrix_fh>;
35 1         11 chomp $header_line;
36 1         13 my @headers = split /\t/, $header_line;
37 1         2 shift @headers; # Remove the initial empty element from the headers list
38              
39             # Initialize the nodes and edges arrays
40 1         3 my ( @nodes, @edges );
41 1         3 my $threshold = 0.0;
42              
43             # Initialize an index to keep track of the current row
44 1         2 my $current_index = 0;
45              
46             # Read each subsequent line
47 1         5 while ( my $line = <$matrix_fh> ) {
48 36         60 chomp $line;
49 36         259 my @values = split /\t/, $line;
50 36         231 my $node_id = shift @values; # The first column is the node ID
51              
52             # Ensure each node is represented in the node array
53 36         69 push @nodes, { data => { id => $node_id } };
54              
55             # Process each value in the row corresponding to an edge, but only in the upper triangle
56             # and explicitly skipping diagonal elements
57             # Undirected graph - Cytoscape.js settings:
58             # "style": [
59             # {
60             # "selector": "edge",
61             # "style": {
62             # "target-arrow-shape": "none"
63             # }
64             # }
65             # ]
66              
67 36         48 for ( my $i = $current_index + 1 ; $i < scalar @headers ; $i++ ) {
68 630 50       760 if ( $values[$i] >= $threshold ) {
69 630         1368 push @edges,
70             {
71             data => {
72             source => $node_id,
73             target => $headers[$i],
74             weight => $values[$i]
75             }
76             };
77             }
78             }
79              
80             # Increment the current row index
81 36         145 $current_index++;
82             }
83              
84             # Close the matrix file handle
85 1         6 close $matrix_fh;
86              
87             # Assemble the complete graph structure
88 1         719 my %graph = (
89             elements => {
90             nodes => \@nodes,
91             edges => \@edges,
92             }
93             );
94              
95             # Open a file to write JSON output
96 1 50       4 say "Writting <$output> file " if $verbose;
97 1         9 write_json( { filepath => $output, data => \%graph } );
98              
99 1 50       12 return defined $graph_stats ? \%graph : undef;
100             }
101              
102             sub cytoscape2graph {
103              
104             # This is a very time consuming function, that's why we only load data in Graph
105             # if the user asks for it
106 1     1 0 884 require Graph;
107              
108             # Decode JSON to a Perl data structure
109 1         33339 my $arg = shift;
110 1         4 my $json_data = $arg->{graph};
111 1         3 my $output = $arg->{output};
112 1         2 my $metric = $arg->{metric};
113 1         3 my $verbose = $arg->{verbose};
114 1 50       4 my $jaccard = $metric eq 'jaccard' ? 1 : 0;
115              
116 1         1 my @nodes = @{ $json_data->{elements}{nodes} };
  1         14  
117 1         2 my @edges = @{ $json_data->{elements}{edges} };
  1         168  
118              
119             # Create a new Graph object
120 1         10 my $graph = Graph->new( undirected => 1 );
121              
122             # Add nodes and edges to the graph
123 1         3512 foreach my $node (@nodes) {
124 36         987 $graph->add_vertex( $node->{data}{id} );
125             }
126              
127 1         22 foreach my $edge (@edges) {
128             $graph->add_weighted_edge(
129             $edge->{data}{source},
130             $edge->{data}{target},
131            
132             # Convert to distances if Jaccard
133             $jaccard ? 1 - $edge->{data}{weight} : $edge->{data}{weight}
134 630 50       145497 );
135             }
136              
137             # Now $graph contains the Graph object populated with the Cytoscape data
138 1         203 graph_stats( $graph, $output, $metric, $verbose );
139 1         84 return 1;
140             }
141              
142             sub graph_stats {
143              
144 1     1 0 5 my ( $g, $output, $metric, $verbose ) = @_;
145              
146             # Open the output file
147 1 50       3 say "Writting <$output> file " if $verbose;
148 1         9 open( my $fh, '>:encoding(UTF-8)', $output );
149              
150             # Basic stats
151 1         340 print $fh "Metric: ", ucfirst($metric), "\n";
152 1         6 print $fh "Number of vertices: ", scalar $g->vertices, "\n";
153 1         60 print $fh "Number of edges: ", scalar $g->edges, "\n";
154              
155             # Checking connectivity and components
156 1         636 print $fh "Is connected: ", $g->is_connected, "\n";
157 1         23129 print $fh "Connected Components: ", scalar $g->connected_components, "\n";
158              
159             # Diameter and average path length, check if the graph is connected first
160 1 50       42 if ( $g->is_connected ) {
161 1         29 print $fh "Graph Diameter: ", ( join "->", $g->diameter ), "\n";
162 1         472123 print $fh "Average Path Length: ", sprintf ("%7.3f",$g->average_path_length), "\n";
163             }
164              
165             # Display degrees of all vertices
166 1         41143 foreach my $v ( $g->vertices ) {
167 36         38566 print $fh "Degree of vertex $v: ", $g->degree($v), "\n";
168             }
169              
170             # Minimum Spanning Tree
171 1         1082 my $mst = $g->MST_Kruskal; # Assuming Kruskal's is available and appropriate
172 1         115764 print $fh "MST has ", scalar $mst->edges, " edges\n";
173              
174             # Calculate and write all pairs shortest paths and their lengths to the file
175 1         64 foreach my $u ( $g->vertices ) {
176 36         135 foreach my $v ( $g->vertices ) {
177 1296 100       3429 if ( $u ne $v ) {
178 1260         2582 my @path = $g->SP_Dijkstra( $u, $v ); # Get shortest path using Dijkstra's algorithm
179 1260 50       3529601 if (@path) {
180 1260         2683 my $distance = $g->path_length( $u, $v ); # Recompute
181 1260         62100 print $fh "Shortest path from $u to $v is ",
182             ( join "->", @path ), " [", scalar @path,
183             "] with length $distance\n";
184             }
185             else {
186 0         0 print $fh "No path from $u to $v\n";
187             }
188             }
189             }
190             }
191              
192             # Close the output file
193 1         10 close $fh;
194 1         1158 return 1;
195             }
196              
197             1;