File Coverage

lib/Pheno/Ranker/Graph.pm
Criterion Covered Total %
statement 93 94 98.9
branch 10 18 55.5
condition n/a
subroutine 11 11 100.0
pod 0 3 0.0
total 114 126 90.4


line stmt bran cond sub pod time code
1             package Pheno::Ranker::Graph;
2              
3 5     5   39 use strict;
  5         11  
  5         235  
4 5     5   30 use warnings;
  5         11  
  5         568  
5 5     5   33 use autodie;
  5         11  
  5         69  
6 5     5   36078 use feature qw(say);
  5         14  
  5         974  
7 5     5   40 use JSON::XS;
  5         11  
  5         542  
8 5     5   52 use Pheno::Ranker::IO;
  5         11  
  5         768  
9 5     5   40 use Exporter 'import';
  5         8  
  5         406  
10             our @EXPORT = qw(matrix2graph cytoscape2graph);
11 5     5   35 use constant DEVEL_MODE => 0;
  5         10  
  5         9257  
12              
13             ############################
14             ############################
15             # SUBROUTINES FOR GRAPHS #
16             ############################
17             ############################
18              
19             sub matrix2graph {
20              
21             # *** IMPORTANT***
22             # Hard-coded in purpose to avoid using Graph unless necessary
23              
24 1     1 0 3 my $arg = shift;
25 1         92 my $input = $arg->{matrix};
26 1         7 my $output = $arg->{json};
27 1         2 my $verbose = $arg->{verbose};
28 1         3 my $graph_stats = $arg->{graph_stats};
29              
30             # Open the matrix file to read
31 1         58 open( my $matrix_fh, '<:encoding(UTF-8)', $input );
32              
33             # Read the first line to get node IDs (headers)
34 1         2962 my $header_line = <$matrix_fh>;
35 1         18 chomp $header_line;
36 1         23 my @headers = split /\t/, $header_line;
37 1         3 shift @headers; # Remove the initial empty element from the headers list
38              
39             # Initialize the nodes and edges arrays
40 1         5 my ( @nodes, @edges );
41 1         4 my $threshold = 0.0;
42              
43             # Initialize an index to keep track of the current row
44 1         4 my $current_index = 0;
45              
46             # Read each subsequent line
47 1         9 while ( my $line = <$matrix_fh> ) {
48 36         164 chomp $line;
49 36         514 my @values = split /\t/, $line;
50 36         69 my $node_id = shift @values; # The first column is the node ID
51              
52             # Ensure each node is represented in the node array
53 36         313 push @nodes, { data => { id => $node_id } };
54              
55             # Process each value in the row corresponding to an edge, but only in the upper triangle
56             # and explicitly skipping diagonal elements
57             # Undirected graph - Cytoscape.js settings:
58             # "style": [
59             # {
60             # "selector": "edge",
61             # "style": {
62             # "target-arrow-shape": "none"
63             # }
64             # }
65             # ]
66              
67 36         106 for ( my $i = $current_index + 1 ; $i < scalar @headers ; $i++ ) {
68 630 50       1439 if ( $values[$i] >= $threshold ) {
69 630         2741 push @edges,
70             {
71             data => {
72             source => $node_id,
73             target => $headers[$i],
74             weight => $values[$i]
75             }
76             };
77             }
78             }
79              
80             # Increment the current row index
81 36         311 $current_index++;
82             }
83              
84             # Close the matrix file handle
85 1         9 close $matrix_fh;
86              
87             # Assemble the complete graph structure
88 1         1234 my %graph = (
89             elements => {
90             nodes => \@nodes,
91             edges => \@edges,
92             }
93             );
94              
95             # Open a file to write JSON output
96 1 50       7 say "Writting <$output> file " if $verbose;
97 1         12 write_json( { filepath => $output, data => \%graph } );
98              
99 1 50       24 return defined $graph_stats ? \%graph : undef;
100             }
101              
102             sub cytoscape2graph {
103              
104             # This is a very time consuming function, that's why we only load data in Graph
105             # if the user asks for it
106 1     1 0 1261 require Graph;
107              
108             # Decode JSON to a Perl data structure
109 1         54836 my $arg = shift;
110 1         5 my $json_data = $arg->{graph};
111 1         4 my $output = $arg->{output};
112 1         3 my $metric = $arg->{metric};
113 1         3 my $verbose = $arg->{verbose};
114 1 50       5 my $jaccard = $metric eq 'jaccard' ? 1 : 0;
115              
116 1         2 my @nodes = @{ $json_data->{elements}{nodes} };
  1         20  
117 1         3 my @edges = @{ $json_data->{elements}{edges} };
  1         102  
118              
119             # Create a new Graph object
120 1         12 my $graph = Graph->new( undirected => 1 );
121              
122             # Add nodes and edges to the graph
123 1         4212 foreach my $node (@nodes) {
124 36         919 $graph->add_vertex( $node->{data}{id} );
125             }
126              
127 1         46 foreach my $edge (@edges) {
128             $graph->add_weighted_edge(
129             $edge->{data}{source},
130             $edge->{data}{target},
131              
132             # Convert to distances if Jaccard
133             $jaccard ? 1 - $edge->{data}{weight} : $edge->{data}{weight}
134 630 50       243312 );
135             }
136              
137             # Now $graph contains the Graph object populated with the Cytoscape data
138 1         490 graph_stats( $graph, $output, $metric, $verbose );
139 1         51 return 1;
140             }
141              
142             sub graph_stats {
143 1     1 0 4 my ( $g, $output, $metric, $verbose ) = @_;
144              
145             # Open the output file
146 1 50       5 say "Writting <$output> file " if $verbose;
147 1         8 open( my $fh, '>:encoding(UTF-8)', $output );
148              
149             # Basic stats
150 1         366 print $fh "Metric: ", ucfirst($metric), "\n";
151 1         6 print $fh "Number of vertices: ", scalar $g->vertices, "\n";
152 1         64 print $fh "Number of edges: ", scalar $g->edges, "\n";
153              
154             # Checking connectivity and components
155 1         1128 print $fh "Is connected: ", $g->is_connected, "\n";
156 1         39472 print $fh "Connected Components: ", scalar $g->connected_components, "\n";
157              
158             # Diameter and average path length, check if the graph is connected first
159 1 50       69 if ( $g->is_connected ) {
160 1         50 print $fh "Graph Diameter: ", ( join "->", $g->diameter ), "\n";
161 1         841092 print $fh "Average Path Length: ",
162             sprintf( "%7.3f", $g->average_path_length ), "\n";
163             }
164              
165             # Display degrees of all vertices
166 1         79663 foreach my $v ( $g->vertices ) {
167 36         79553 print $fh "Degree of vertex $v: ", $g->degree($v), "\n";
168             }
169              
170             # Minimum Spanning Tree
171 1         2313 my $mst = $g->MST_Kruskal; # Assuming Kruskal's is available and appropriate
172 1         242160 print $fh "MST has ", scalar $mst->edges, " edges\n";
173              
174             # Calculate and write all pairs shortest paths and their lengths to the file
175 1         105 foreach my $u ( $g->vertices ) {
176 36         215 foreach my $v ( $g->vertices ) {
177 1296 100       6035 if ( $u ne $v ) {
178 1260         4308 my @path = $g->SP_Dijkstra( $u, $v )
179             ; # Get shortest path using Dijkstra's algorithm
180 1260 50       7249854 if (@path) {
181 1260         4465 my $distance = $g->path_length( $u, $v ); # Recompute
182 1260         114960 print $fh "Shortest path from $u to $v is ",
183             ( join "->", @path ), " [", scalar @path,
184             "] with length $distance\n";
185             }
186             else {
187 0         0 print $fh "No path from $u to $v\n";
188             }
189             }
190             }
191             }
192              
193             # Close the output file
194 1         10 close $fh;
195 1         1303 return 1;
196             }
197              
198             1;