| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package obogaf::parser; |
|
2
|
|
|
|
|
|
|
require 5.006; |
|
3
|
|
|
|
|
|
|
our $VERSION= '1.373'; |
|
4
|
|
|
|
|
|
|
$VERSION= eval $VERSION; |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
require Exporter; |
|
7
|
|
|
|
|
|
|
our @ISA= qw(Exporter); |
|
8
|
|
|
|
|
|
|
our %EXPORT_TAGS=( |
|
9
|
|
|
|
|
|
|
all => [qw( |
|
10
|
|
|
|
|
|
|
&build_edges |
|
11
|
|
|
|
|
|
|
&build_subonto |
|
12
|
|
|
|
|
|
|
&make_stat |
|
13
|
|
|
|
|
|
|
&get_parents_or_children_list |
|
14
|
|
|
|
|
|
|
&obo_filter |
|
15
|
|
|
|
|
|
|
&gene2biofun |
|
16
|
|
|
|
|
|
|
&map_OBOterm_between_release |
|
17
|
|
|
|
|
|
|
)], |
|
18
|
|
|
|
|
|
|
); |
|
19
|
|
|
|
|
|
|
our @EXPORT_OK= (@{$EXPORT_TAGS{'all'}}); |
|
20
|
|
|
|
|
|
|
|
|
21
|
7
|
|
|
7
|
|
611698
|
use strict; |
|
|
7
|
|
|
|
|
72
|
|
|
|
7
|
|
|
|
|
211
|
|
|
22
|
7
|
|
|
7
|
|
38
|
use warnings; |
|
|
7
|
|
|
|
|
12
|
|
|
|
7
|
|
|
|
|
164
|
|
|
23
|
7
|
|
|
7
|
|
5825
|
use Graph; |
|
|
7
|
|
|
|
|
821520
|
|
|
|
7
|
|
|
|
|
360
|
|
|
24
|
7
|
|
|
7
|
|
4432
|
use List::MoreUtils qw(uniq); |
|
|
7
|
|
|
|
|
92691
|
|
|
|
7
|
|
|
|
|
44
|
|
|
25
|
7
|
|
|
7
|
|
11583
|
use IO::File; |
|
|
7
|
|
|
|
|
64000
|
|
|
|
7
|
|
|
|
|
826
|
|
|
26
|
7
|
|
|
7
|
|
3431
|
use PerlIO::gzip; |
|
|
7
|
|
|
|
|
4131
|
|
|
|
7
|
|
|
|
|
23163
|
|
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub build_edges{ |
|
29
|
8
|
|
|
8
|
1
|
17661
|
my ($obofile)= @_; |
|
30
|
8
|
|
|
|
|
19
|
my ($namespace, $idname, $isname, $pofname, $source, $destination, $pof, $res); |
|
31
|
8
|
100
|
|
|
|
51
|
if($obofile=~/.obo$/i){ open FH, "<", "$obofile" or die "cannot open $obofile. $!.\n"; } |
|
|
8
|
50
|
|
|
|
348
|
|
|
32
|
6
|
|
|
|
|
135
|
while(){ |
|
33
|
275
|
|
|
|
|
360
|
chomp; |
|
34
|
275
|
100
|
|
|
|
650
|
next if $_=~/^\s*$/; |
|
35
|
263
|
100
|
|
|
|
1015
|
if($_=~/^namespace:\s+(\D+)/){ |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
36
|
5
|
|
|
|
|
20
|
$namespace=$1; |
|
37
|
|
|
|
|
|
|
}elsif($_=~/^name:\s+(.+)/){ |
|
38
|
6
|
|
|
|
|
55
|
$idname=$1; |
|
39
|
|
|
|
|
|
|
}elsif($_=~/^id:\s+(\D+\d+)/){ |
|
40
|
6
|
|
|
|
|
39
|
$destination=$1; |
|
41
|
|
|
|
|
|
|
}elsif($_=~/^is_a:\s+(\D+\d+)/){ |
|
42
|
6
|
|
|
|
|
17
|
$source=$1; |
|
43
|
6
|
|
|
|
|
28
|
($isname)= ($_=~/!\s+(.+)/); |
|
44
|
6
|
100
|
|
|
|
21
|
if(defined $namespace){ |
|
45
|
5
|
|
|
|
|
59
|
$res .= "$namespace\t$source\t$destination\t$isname\t$idname\tis-a\n"; |
|
46
|
|
|
|
|
|
|
}else{ |
|
47
|
1
|
|
|
|
|
7
|
$res .= "$source\t$destination\t$isname\t$idname\tis-a\n"; |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
}elsif($_=~/^relationship: part_of\s+(\D+\d+)/){ |
|
50
|
6
|
|
|
|
|
23
|
$pof=$1; |
|
51
|
6
|
|
|
|
|
38
|
($pofname)= ($_=~/!\s+(.+)/); |
|
52
|
6
|
100
|
|
|
|
17
|
if(defined $namespace){ |
|
53
|
5
|
|
|
|
|
53
|
$res .= "$namespace\t$pof\t$destination\t$pofname\t$idname\tpart-of\n"; |
|
54
|
|
|
|
|
|
|
}else{ |
|
55
|
1
|
|
|
|
|
5
|
$res .= "$pof\t$destination\t$pofname\t$idname\tpart-of\n"; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
} |
|
59
|
6
|
|
|
|
|
72
|
close FH; |
|
60
|
6
|
|
|
|
|
35
|
return \$res; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub build_subonto{ |
|
64
|
6
|
|
|
6
|
1
|
19242
|
my ($edgesfile, $namespace)= @_; |
|
65
|
6
|
|
|
|
|
13
|
my ($res, %checker); |
|
66
|
6
|
100
|
|
|
|
240
|
open FH, "<", $edgesfile or die "cannot open $edgesfile. $!.\n"; |
|
67
|
5
|
|
|
|
|
113
|
while(){ |
|
68
|
11
|
100
|
|
|
|
165
|
next if $_=~/^[!,#]|^\s*$/; |
|
69
|
10
|
|
|
|
|
46
|
my @vals= split(/\t/, $_); |
|
70
|
10
|
|
|
|
|
25
|
$checker{$vals[0]}=1; |
|
71
|
10
|
100
|
|
|
|
36
|
if($vals[0] eq $namespace){ $res .= join("\t", @vals[1..$#vals]); } |
|
|
8
|
|
|
|
|
98
|
|
|
72
|
|
|
|
|
|
|
} |
|
73
|
5
|
|
|
|
|
54
|
close FH; |
|
74
|
5
|
100
|
|
|
|
23
|
unless(exists($checker{$namespace})){die "$edgesfile does not include $namespace or $namespace is not in the first column of $edgesfile.\n";} |
|
|
1
|
|
|
|
|
17
|
|
|
75
|
4
|
|
|
|
|
27
|
return \$res; |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub make_stat{ |
|
79
|
5
|
|
|
5
|
1
|
2506
|
my ($edgesfile, $parentIndex, $childIndex)= @_; |
|
80
|
5
|
|
|
|
|
15
|
my (%indeg, %outdeg, %deg, $ed, $nd, $mindeg, $maxdeg, $medeg, $avgdeg, $den, $scc, $resdeg, $stat, $res); |
|
81
|
|
|
|
|
|
|
## create graph |
|
82
|
5
|
|
|
|
|
23
|
my $g= Graph->new(directed => 1); |
|
83
|
5
|
100
|
|
|
|
1397
|
open FH, "<", $edgesfile or die "cannot open $edgesfile. $!.\n"; |
|
84
|
4
|
|
|
|
|
87
|
while(){ |
|
85
|
7
|
|
|
|
|
463
|
chomp; |
|
86
|
7
|
|
|
|
|
31
|
my @vals= split(/\t/,$_); |
|
87
|
7
|
|
|
|
|
31
|
$g->add_edge($vals[$parentIndex], $vals[$childIndex]); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
4
|
|
|
|
|
418
|
close FH; |
|
90
|
|
|
|
|
|
|
## compute indegree/outdegree/degree |
|
91
|
4
|
|
|
|
|
20
|
my @V= $g->vertices; |
|
92
|
4
|
|
|
|
|
223
|
foreach my $nd (@V){ |
|
93
|
11
|
|
|
|
|
24
|
my $i= $g->in_degree($nd); |
|
94
|
11
|
|
|
|
|
1285
|
my $o= $g->out_degree($nd); |
|
95
|
11
|
|
|
|
|
1236
|
my $d= $i+$o; |
|
96
|
11
|
|
|
|
|
21
|
$indeg{$nd}=$i; |
|
97
|
11
|
|
|
|
|
17
|
$outdeg{$nd}=$o; |
|
98
|
11
|
|
|
|
|
22
|
$deg{$nd}=$d; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
4
|
50
|
|
|
|
20
|
foreach my $node (sort{$deg{$b}<=>$deg{$a} or ($a cmp $b)} keys %deg){ $resdeg .= "$node\t$deg{$node}\t$indeg{$node}\t$outdeg{$node}\n"; } |
|
|
9
|
|
|
|
|
30
|
|
|
|
11
|
|
|
|
|
37
|
|
|
101
|
|
|
|
|
|
|
## compute: median/max/min degree |
|
102
|
4
|
|
|
|
|
12
|
my @sortdeg= sort{$a<=>$b} values (%deg); |
|
|
10
|
|
|
|
|
22
|
|
|
103
|
4
|
|
|
|
|
6
|
my $len= $#sortdeg+1; |
|
104
|
4
|
|
|
|
|
15
|
my $mid = int $len/2; |
|
105
|
4
|
100
|
|
|
|
11
|
if($len % 2){ $medeg = $sortdeg[$mid]; }else{ $medeg = ( $sortdeg[$mid-1] + $sortdeg[$mid] ) / 2; } |
|
|
3
|
|
|
|
|
7
|
|
|
|
1
|
|
|
|
|
4
|
|
|
106
|
4
|
|
|
|
|
30
|
$medeg= sprintf("%.4f", $medeg); |
|
107
|
4
|
|
|
|
|
7
|
$mindeg= $sortdeg[0]; |
|
108
|
4
|
|
|
|
|
21
|
$maxdeg= $sortdeg[$#sortdeg]; |
|
109
|
|
|
|
|
|
|
## compute number of nodes and edges |
|
110
|
4
|
|
|
|
|
16
|
$ed= $g->edges; |
|
111
|
4
|
|
|
|
|
160
|
$nd= $g->vertices; |
|
112
|
|
|
|
|
|
|
## compute average degree and density |
|
113
|
4
|
|
|
|
|
165
|
$avgdeg= $ed/$nd; |
|
114
|
4
|
|
|
|
|
10
|
$den= $ed / ( $nd * ($nd -1) ); |
|
115
|
4
|
|
|
|
|
27
|
$avgdeg= sprintf("%.4f", $avgdeg); |
|
116
|
4
|
|
|
|
|
14
|
$den= sprintf("%.4e", $den); |
|
117
|
|
|
|
|
|
|
## return stat |
|
118
|
4
|
|
|
|
|
15
|
$stat .= "nodes: $nd\nedges: $ed\nmax degree: $maxdeg\nmin degree: $mindeg\n"; |
|
119
|
4
|
|
|
|
|
12
|
$stat .= "median degree: $medeg\naverage degree: $avgdeg\ndensity: $den\n"; |
|
120
|
4
|
|
|
|
|
11
|
$res= "#oboterm degree indegree outdegree\n".$resdeg."\n"."~summary stat~\n".$stat; |
|
121
|
4
|
|
|
|
|
62
|
return $res; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub get_parents_or_children_list{ |
|
125
|
10
|
|
|
10
|
1
|
18555
|
my ($edgesfile, $parentIndex, $childIndex, $chdORpar)= @_; |
|
126
|
10
|
|
|
|
|
21
|
my (%nodelist, %outlist); |
|
127
|
10
|
100
|
100
|
|
|
49
|
if($chdORpar ne "parents" && $chdORpar ne "children"){ die "$chdORpar can be 'parents' or 'children'.\n";} |
|
|
2
|
|
|
|
|
16
|
|
|
128
|
8
|
100
|
|
|
|
283
|
open FH, "<", $edgesfile or die "cannot open $edgesfile. $!.\n"; |
|
129
|
6
|
|
|
|
|
128
|
while(){ |
|
130
|
12
|
|
|
|
|
26
|
chomp; |
|
131
|
12
|
|
|
|
|
48
|
my @vals= split(/\t/,$_); |
|
132
|
12
|
100
|
|
|
|
29
|
if($chdORpar eq "parents"){ |
|
133
|
6
|
|
|
|
|
8
|
push (@{$nodelist{$vals[$childIndex]}}, $vals[$parentIndex]); |
|
|
6
|
|
|
|
|
47
|
|
|
134
|
|
|
|
|
|
|
}else{ |
|
135
|
6
|
|
|
|
|
8
|
push (@{$nodelist{$vals[$parentIndex]}}, $vals[$childIndex]); |
|
|
6
|
|
|
|
|
49
|
|
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
} |
|
138
|
6
|
|
|
|
|
56
|
close FH; |
|
139
|
6
|
|
|
|
|
24
|
foreach my $term (keys %nodelist){ |
|
140
|
9
|
|
|
|
|
15
|
$outlist{$term} = join('|', sort{($a=~/(\d+)/)[0] <=> ($b=~/(\d+)/)[0]} uniq @{$nodelist{$term}}); |
|
|
3
|
|
|
|
|
42
|
|
|
|
9
|
|
|
|
|
62
|
|
|
141
|
|
|
|
|
|
|
} |
|
142
|
6
|
|
|
|
|
38
|
return \%outlist; |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub obo_filter{ |
|
146
|
5
|
|
|
5
|
1
|
18321
|
my ($obofile, $termsfile)= @_; |
|
147
|
5
|
|
|
|
|
9
|
my (@oboterms, $header, $counter, $res, $obo); |
|
148
|
|
|
|
|
|
|
## obo terms of interest |
|
149
|
5
|
100
|
|
|
|
229
|
open FH, "<", "$termsfile" or die "cannot open $termsfile. $!.\n"; |
|
150
|
4
|
|
|
|
|
82
|
while(){ |
|
151
|
14
|
|
|
|
|
28
|
chomp; |
|
152
|
14
|
100
|
|
|
|
53
|
next if $_=~/^\s*$/; |
|
153
|
12
|
|
|
|
|
54
|
push(@oboterms, $_); |
|
154
|
|
|
|
|
|
|
} |
|
155
|
4
|
|
|
|
|
52
|
close FH; |
|
156
|
4
|
|
|
|
|
9
|
my @unique= do { my %seen; grep { !$seen{$_}++ } @oboterms}; |
|
|
4
|
|
|
|
|
13
|
|
|
|
4
|
|
|
|
|
8
|
|
|
|
12
|
|
|
|
|
52
|
|
|
157
|
4
|
|
|
|
|
9
|
@oboterms=@unique; |
|
158
|
|
|
|
|
|
|
## store obofile header |
|
159
|
4
|
100
|
|
|
|
24
|
if($obofile=~/.obo$/i){ open FH, "<", "$obofile" or die "cannot open $obofile. $!.\n"; } |
|
|
4
|
50
|
|
|
|
137
|
|
|
160
|
3
|
|
|
|
|
59
|
while(){ |
|
161
|
357
|
100
|
|
|
|
763
|
next unless 1 .. /\[Term\]/; |
|
162
|
81
|
100
|
|
|
|
148
|
next if /^\[Term\]/; |
|
163
|
78
|
|
|
|
|
154
|
$header .= $_; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
## extract from obo file the terms of interest |
|
166
|
3
|
|
|
|
|
16
|
foreach my $oboterm (@oboterms){ |
|
167
|
8
|
|
|
|
|
22
|
$counter++; |
|
168
|
8
|
|
|
|
|
250
|
open FH, $obofile; |
|
169
|
8
|
|
|
|
|
149
|
while(){ |
|
170
|
952
|
100
|
66
|
|
|
3232
|
if(/^id:\s+($oboterm)/ .. (/^$/ || eof FH)){ ## check for end of file |
|
171
|
40
|
|
|
|
|
155
|
$res .= $_; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
} |
|
174
|
8
|
|
|
|
|
78
|
close FH; |
|
175
|
8
|
|
|
|
|
307
|
print($counter, "\t", $oboterm, "\tdone\n"); |
|
176
|
|
|
|
|
|
|
} |
|
177
|
3
|
100
|
|
|
|
16
|
unless(defined $res){ die "none obo terms listed in $termsfile file was found in $obofile file. $!.\n" } |
|
|
1
|
|
|
|
|
18
|
|
|
178
|
|
|
|
|
|
|
## print obofile filtered |
|
179
|
2
|
|
|
|
|
11
|
$obo = $header.$res."\n"; |
|
180
|
2
|
|
|
|
|
41
|
$obo=~ s/\b(id:\s+(\D+\d+))/\[Term\]\n$1/g; ## note: word boundary (\b) is necessary to match only "id:"" and not "alt_id:" in the string $obo |
|
181
|
2
|
|
|
|
|
14
|
return \$obo; |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
sub gene2biofun{ |
|
185
|
6
|
|
|
6
|
1
|
18509
|
my ($annfile, $geneIndex, $classIndex)= @_; |
|
186
|
6
|
|
|
|
|
16
|
my (%gene2biofun, %biofun, @genes, @biofun, $stat)= (); |
|
187
|
6
|
|
|
|
|
15
|
my ($sample, $oboterm)= (0)x2; |
|
188
|
6
|
100
|
|
|
|
30
|
if($annfile=~/.gz$/i){ open FH, "<:gzip", $annfile or die "cannot open $annfile. $!.\n"; } else { open FH, "<", "$annfile" or die "cannot open $annfile. $!.\n"; } |
|
|
2
|
100
|
|
|
|
114
|
|
|
|
4
|
100
|
|
|
|
174
|
|
|
189
|
4
|
|
|
|
|
118
|
while(){ |
|
190
|
27
|
100
|
|
|
|
475
|
next if $_=~/^[!,#]|^\s*$/; |
|
191
|
24
|
|
|
|
|
44
|
chomp; |
|
192
|
24
|
|
|
|
|
103
|
my @vals=split(/\t/,$_); |
|
193
|
24
|
|
|
|
|
67
|
push(@genes, $vals[$geneIndex]); |
|
194
|
24
|
|
|
|
|
36
|
push(@biofun, $vals[$classIndex]); |
|
195
|
24
|
|
|
|
|
30
|
push(@{$biofun{$vals[$geneIndex]}}, $vals[$classIndex]); |
|
|
24
|
|
|
|
|
117
|
|
|
196
|
|
|
|
|
|
|
} |
|
197
|
4
|
|
|
|
|
43
|
close FH; |
|
198
|
4
|
|
|
|
|
18
|
foreach my $gene (keys %biofun){ |
|
199
|
8
|
|
|
|
|
16
|
$gene2biofun{$gene} = join('|', sort{($a=~/(\d+)/)[0] <=> ($b=~/(\d+)/)[0]} uniq @{$biofun{$gene}}); |
|
|
24
|
|
|
|
|
139
|
|
|
|
8
|
|
|
|
|
56
|
|
|
200
|
|
|
|
|
|
|
} |
|
201
|
4
|
|
|
|
|
41
|
my @uniqgenes= uniq @genes; |
|
202
|
4
|
|
|
|
|
25
|
my @uniqpbiofun= uniq @biofun; |
|
203
|
4
|
|
|
|
|
10
|
$sample= scalar(@uniqgenes); |
|
204
|
4
|
|
|
|
|
8
|
$oboterm= scalar(@uniqpbiofun); |
|
205
|
4
|
|
|
|
|
14
|
$stat .= "genes: $sample\nontology terms: $oboterm\n"; |
|
206
|
4
|
|
|
|
|
32
|
return (\%gene2biofun, \$stat); |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub map_OBOterm_between_release{ |
|
210
|
9
|
|
|
9
|
1
|
20335
|
my ($obofile, $annfile, $classIndex)= @_; |
|
211
|
9
|
|
|
|
|
19
|
my (%altid, %oldclass, %old2new, $header, $id, $fln, $pair, $stat, $pstat); |
|
212
|
9
|
|
|
|
|
23
|
my ($alt, $classes, $seen, $unseen)= (0)x4; |
|
213
|
|
|
|
|
|
|
## step 0: pairing altid_2_id (key: alt_id) |
|
214
|
9
|
100
|
|
|
|
48
|
if($obofile=~/.obo$/i){ open FH, "<", "$obofile" or die "cannot open $obofile. $!.\n"; } |
|
|
9
|
50
|
|
|
|
307
|
|
|
215
|
7
|
|
|
|
|
115
|
while (){ |
|
216
|
322
|
|
|
|
|
408
|
chomp; |
|
217
|
322
|
100
|
|
|
|
764
|
next if $_=~/^\s*$/; |
|
218
|
308
|
100
|
|
|
|
522
|
if($_=~/^id:\s+(\D+\d+)/){ $id=$1; } |
|
|
7
|
|
|
|
|
22
|
|
|
219
|
308
|
100
|
|
|
|
747
|
if($_=~/^alt_id:\s+(\D+\d+)/){ $altid{$1}=$id; } |
|
|
49
|
|
|
|
|
166
|
|
|
220
|
|
|
|
|
|
|
} |
|
221
|
7
|
|
|
|
|
65
|
close FH; |
|
222
|
7
|
|
|
|
|
22
|
$alt= keys(%altid); |
|
223
|
|
|
|
|
|
|
# step 1: storing old ontology terms in a hash |
|
224
|
7
|
100
|
|
|
|
32
|
if($annfile=~/.gz$/i){ open FH, "<:gzip", $annfile or die "cannot open $annfile. $!.\n"; } else { open FH, "<", "$annfile" or die "cannot open $annfile. $!.\n"; } |
|
|
2
|
100
|
|
|
|
139
|
|
|
|
5
|
100
|
|
|
|
165
|
|
|
225
|
5
|
|
|
|
|
117
|
while(){ |
|
226
|
28
|
|
|
|
|
49
|
chomp; |
|
227
|
28
|
100
|
|
|
|
451
|
if($_=~/^[!,#]|^\s*$/){ $header .= "$_\n"; } |
|
|
2
|
|
|
|
|
6
|
|
|
228
|
28
|
100
|
|
|
|
400
|
next if $_=~/^[!,#]|^\s*$/; |
|
229
|
26
|
|
|
|
|
115
|
my @vals=split(/\t/,$_); |
|
230
|
26
|
|
|
|
|
138
|
$oldclass{$vals[$classIndex]}=$vals[$classIndex]; |
|
231
|
|
|
|
|
|
|
} |
|
232
|
5
|
|
|
|
|
59
|
close FH; |
|
233
|
5
|
|
|
|
|
13
|
$classes= keys(%oldclass); |
|
234
|
|
|
|
|
|
|
## step 2: mapping old GO terms to the new one using *alt_id* as key |
|
235
|
5
|
|
|
|
|
9
|
my $tmp= ""; |
|
236
|
5
|
|
|
|
|
27
|
foreach my $k (sort{$a cmp $b} keys(%altid)){ |
|
|
66
|
|
|
|
|
92
|
|
|
237
|
35
|
100
|
|
|
|
53
|
if($oldclass{$k}){ |
|
238
|
8
|
|
|
|
|
18
|
$old2new{$k}=$altid{$oldclass{$k}}; ## pairing |
|
239
|
8
|
|
|
|
|
11
|
$seen++; |
|
240
|
8
|
|
|
|
|
13
|
$tmp= $altid{$oldclass{$k}}; |
|
241
|
|
|
|
|
|
|
}else{ |
|
242
|
27
|
|
|
|
|
36
|
$tmp= "unseen"; |
|
243
|
27
|
|
|
|
|
34
|
$unseen++; |
|
244
|
|
|
|
|
|
|
} |
|
245
|
35
|
100
|
|
|
|
73
|
if($tmp ne "unseen"){ |
|
246
|
8
|
|
|
|
|
22
|
$pair .= "$k\t$altid{$oldclass{$k}}\n"; |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
## step 3: substitute ALT-ID with the updated ID, then the annotation file is returned. |
|
250
|
5
|
50
|
|
|
|
23
|
if($annfile=~/.gz$/i){ open FH, "<:gzip", $annfile or die "cannot open $annfile. $!.\n"; } else { open FH, "<", "$annfile" or die "cannot open $annfile. $!.\n"; } |
|
|
1
|
50
|
|
|
|
58
|
|
|
|
4
|
100
|
|
|
|
134
|
|
|
251
|
5
|
|
|
|
|
81
|
while(){ |
|
252
|
28
|
|
|
|
|
54
|
chomp; |
|
253
|
28
|
100
|
|
|
|
418
|
next if $_=~/^[!,#]|^\s*$/; |
|
254
|
26
|
|
|
|
|
107
|
my @vals= split(/\t/, $_); |
|
255
|
26
|
|
|
|
|
45
|
my $oboterm= $vals[$classIndex]; |
|
256
|
26
|
100
|
|
|
|
52
|
if($old2new{$oboterm}){ |
|
257
|
16
|
|
|
|
|
21
|
$oboterm= $old2new{$oboterm}; |
|
258
|
16
|
|
|
|
|
139
|
$_=~ s/$vals[$classIndex]/$oboterm/g; |
|
259
|
16
|
|
|
|
|
77
|
$fln .= "$_\n"; |
|
260
|
|
|
|
|
|
|
}else{ |
|
261
|
10
|
|
|
|
|
73
|
$fln .= "$_\n"; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
} |
|
264
|
5
|
|
|
|
|
45
|
close FH; |
|
265
|
5
|
100
|
|
|
|
17
|
if(defined $header){$fln = $header.$fln;} |
|
|
1
|
|
|
|
|
4
|
|
|
266
|
|
|
|
|
|
|
## print mapping stat |
|
267
|
5
|
|
|
|
|
17
|
$stat .= "Tot. ontology terms:\t$classes\nTot. altID:\t$alt\nTot. altID seen:\t$seen\nTot. altID unseen:\t$unseen\n"; |
|
268
|
5
|
100
|
|
|
|
11
|
unless(not defined $pair){ |
|
269
|
4
|
|
|
|
|
13
|
$pstat .= "#alt-id id\n$pair\n$stat"; |
|
270
|
4
|
|
|
|
|
37
|
return (\$fln, \$pstat); |
|
271
|
|
|
|
|
|
|
} |
|
272
|
1
|
|
|
|
|
9
|
return (\$fln, \$stat); |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
1; |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
__END__ |