line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package obogaf::parser; |
2
|
|
|
|
|
|
|
require 5.006; |
3
|
|
|
|
|
|
|
our $VERSION= '1.373'; |
4
|
|
|
|
|
|
|
$VERSION= eval $VERSION; |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
require Exporter; |
7
|
|
|
|
|
|
|
our @ISA= qw(Exporter); |
8
|
|
|
|
|
|
|
our %EXPORT_TAGS=( |
9
|
|
|
|
|
|
|
all => [qw( |
10
|
|
|
|
|
|
|
&build_edges |
11
|
|
|
|
|
|
|
&build_subonto |
12
|
|
|
|
|
|
|
&make_stat |
13
|
|
|
|
|
|
|
&get_parents_or_children_list |
14
|
|
|
|
|
|
|
&obo_filter |
15
|
|
|
|
|
|
|
&gene2biofun |
16
|
|
|
|
|
|
|
&map_OBOterm_between_release |
17
|
|
|
|
|
|
|
)], |
18
|
|
|
|
|
|
|
); |
19
|
|
|
|
|
|
|
our @EXPORT_OK= (@{$EXPORT_TAGS{'all'}}); |
20
|
|
|
|
|
|
|
|
21
|
7
|
|
|
7
|
|
611698
|
use strict; |
|
7
|
|
|
|
|
72
|
|
|
7
|
|
|
|
|
211
|
|
22
|
7
|
|
|
7
|
|
38
|
use warnings; |
|
7
|
|
|
|
|
12
|
|
|
7
|
|
|
|
|
164
|
|
23
|
7
|
|
|
7
|
|
5825
|
use Graph; |
|
7
|
|
|
|
|
821520
|
|
|
7
|
|
|
|
|
360
|
|
24
|
7
|
|
|
7
|
|
4432
|
use List::MoreUtils qw(uniq); |
|
7
|
|
|
|
|
92691
|
|
|
7
|
|
|
|
|
44
|
|
25
|
7
|
|
|
7
|
|
11583
|
use IO::File; |
|
7
|
|
|
|
|
64000
|
|
|
7
|
|
|
|
|
826
|
|
26
|
7
|
|
|
7
|
|
3431
|
use PerlIO::gzip; |
|
7
|
|
|
|
|
4131
|
|
|
7
|
|
|
|
|
23163
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub build_edges{ |
29
|
8
|
|
|
8
|
1
|
17661
|
my ($obofile)= @_; |
30
|
8
|
|
|
|
|
19
|
my ($namespace, $idname, $isname, $pofname, $source, $destination, $pof, $res); |
31
|
8
|
100
|
|
|
|
51
|
if($obofile=~/.obo$/i){ open FH, "<", "$obofile" or die "cannot open $obofile. $!.\n"; } |
|
8
|
50
|
|
|
|
348
|
|
32
|
6
|
|
|
|
|
135
|
while(){ |
33
|
275
|
|
|
|
|
360
|
chomp; |
34
|
275
|
100
|
|
|
|
650
|
next if $_=~/^\s*$/; |
35
|
263
|
100
|
|
|
|
1015
|
if($_=~/^namespace:\s+(\D+)/){ |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
36
|
5
|
|
|
|
|
20
|
$namespace=$1; |
37
|
|
|
|
|
|
|
}elsif($_=~/^name:\s+(.+)/){ |
38
|
6
|
|
|
|
|
55
|
$idname=$1; |
39
|
|
|
|
|
|
|
}elsif($_=~/^id:\s+(\D+\d+)/){ |
40
|
6
|
|
|
|
|
39
|
$destination=$1; |
41
|
|
|
|
|
|
|
}elsif($_=~/^is_a:\s+(\D+\d+)/){ |
42
|
6
|
|
|
|
|
17
|
$source=$1; |
43
|
6
|
|
|
|
|
28
|
($isname)= ($_=~/!\s+(.+)/); |
44
|
6
|
100
|
|
|
|
21
|
if(defined $namespace){ |
45
|
5
|
|
|
|
|
59
|
$res .= "$namespace\t$source\t$destination\t$isname\t$idname\tis-a\n"; |
46
|
|
|
|
|
|
|
}else{ |
47
|
1
|
|
|
|
|
7
|
$res .= "$source\t$destination\t$isname\t$idname\tis-a\n"; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
}elsif($_=~/^relationship: part_of\s+(\D+\d+)/){ |
50
|
6
|
|
|
|
|
23
|
$pof=$1; |
51
|
6
|
|
|
|
|
38
|
($pofname)= ($_=~/!\s+(.+)/); |
52
|
6
|
100
|
|
|
|
17
|
if(defined $namespace){ |
53
|
5
|
|
|
|
|
53
|
$res .= "$namespace\t$pof\t$destination\t$pofname\t$idname\tpart-of\n"; |
54
|
|
|
|
|
|
|
}else{ |
55
|
1
|
|
|
|
|
5
|
$res .= "$pof\t$destination\t$pofname\t$idname\tpart-of\n"; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
} |
59
|
6
|
|
|
|
|
72
|
close FH; |
60
|
6
|
|
|
|
|
35
|
return \$res; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub build_subonto{ |
64
|
6
|
|
|
6
|
1
|
19242
|
my ($edgesfile, $namespace)= @_; |
65
|
6
|
|
|
|
|
13
|
my ($res, %checker); |
66
|
6
|
100
|
|
|
|
240
|
open FH, "<", $edgesfile or die "cannot open $edgesfile. $!.\n"; |
67
|
5
|
|
|
|
|
113
|
while(){ |
68
|
11
|
100
|
|
|
|
165
|
next if $_=~/^[!,#]|^\s*$/; |
69
|
10
|
|
|
|
|
46
|
my @vals= split(/\t/, $_); |
70
|
10
|
|
|
|
|
25
|
$checker{$vals[0]}=1; |
71
|
10
|
100
|
|
|
|
36
|
if($vals[0] eq $namespace){ $res .= join("\t", @vals[1..$#vals]); } |
|
8
|
|
|
|
|
98
|
|
72
|
|
|
|
|
|
|
} |
73
|
5
|
|
|
|
|
54
|
close FH; |
74
|
5
|
100
|
|
|
|
23
|
unless(exists($checker{$namespace})){die "$edgesfile does not include $namespace or $namespace is not in the first column of $edgesfile.\n";} |
|
1
|
|
|
|
|
17
|
|
75
|
4
|
|
|
|
|
27
|
return \$res; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub make_stat{ |
79
|
5
|
|
|
5
|
1
|
2506
|
my ($edgesfile, $parentIndex, $childIndex)= @_; |
80
|
5
|
|
|
|
|
15
|
my (%indeg, %outdeg, %deg, $ed, $nd, $mindeg, $maxdeg, $medeg, $avgdeg, $den, $scc, $resdeg, $stat, $res); |
81
|
|
|
|
|
|
|
## create graph |
82
|
5
|
|
|
|
|
23
|
my $g= Graph->new(directed => 1); |
83
|
5
|
100
|
|
|
|
1397
|
open FH, "<", $edgesfile or die "cannot open $edgesfile. $!.\n"; |
84
|
4
|
|
|
|
|
87
|
while(){ |
85
|
7
|
|
|
|
|
463
|
chomp; |
86
|
7
|
|
|
|
|
31
|
my @vals= split(/\t/,$_); |
87
|
7
|
|
|
|
|
31
|
$g->add_edge($vals[$parentIndex], $vals[$childIndex]); |
88
|
|
|
|
|
|
|
} |
89
|
4
|
|
|
|
|
418
|
close FH; |
90
|
|
|
|
|
|
|
## compute indegree/outdegree/degree |
91
|
4
|
|
|
|
|
20
|
my @V= $g->vertices; |
92
|
4
|
|
|
|
|
223
|
foreach my $nd (@V){ |
93
|
11
|
|
|
|
|
24
|
my $i= $g->in_degree($nd); |
94
|
11
|
|
|
|
|
1285
|
my $o= $g->out_degree($nd); |
95
|
11
|
|
|
|
|
1236
|
my $d= $i+$o; |
96
|
11
|
|
|
|
|
21
|
$indeg{$nd}=$i; |
97
|
11
|
|
|
|
|
17
|
$outdeg{$nd}=$o; |
98
|
11
|
|
|
|
|
22
|
$deg{$nd}=$d; |
99
|
|
|
|
|
|
|
} |
100
|
4
|
50
|
|
|
|
20
|
foreach my $node (sort{$deg{$b}<=>$deg{$a} or ($a cmp $b)} keys %deg){ $resdeg .= "$node\t$deg{$node}\t$indeg{$node}\t$outdeg{$node}\n"; } |
|
9
|
|
|
|
|
30
|
|
|
11
|
|
|
|
|
37
|
|
101
|
|
|
|
|
|
|
## compute: median/max/min degree |
102
|
4
|
|
|
|
|
12
|
my @sortdeg= sort{$a<=>$b} values (%deg); |
|
10
|
|
|
|
|
22
|
|
103
|
4
|
|
|
|
|
6
|
my $len= $#sortdeg+1; |
104
|
4
|
|
|
|
|
15
|
my $mid = int $len/2; |
105
|
4
|
100
|
|
|
|
11
|
if($len % 2){ $medeg = $sortdeg[$mid]; }else{ $medeg = ( $sortdeg[$mid-1] + $sortdeg[$mid] ) / 2; } |
|
3
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
4
|
|
106
|
4
|
|
|
|
|
30
|
$medeg= sprintf("%.4f", $medeg); |
107
|
4
|
|
|
|
|
7
|
$mindeg= $sortdeg[0]; |
108
|
4
|
|
|
|
|
21
|
$maxdeg= $sortdeg[$#sortdeg]; |
109
|
|
|
|
|
|
|
## compute number of nodes and edges |
110
|
4
|
|
|
|
|
16
|
$ed= $g->edges; |
111
|
4
|
|
|
|
|
160
|
$nd= $g->vertices; |
112
|
|
|
|
|
|
|
## compute average degree and density |
113
|
4
|
|
|
|
|
165
|
$avgdeg= $ed/$nd; |
114
|
4
|
|
|
|
|
10
|
$den= $ed / ( $nd * ($nd -1) ); |
115
|
4
|
|
|
|
|
27
|
$avgdeg= sprintf("%.4f", $avgdeg); |
116
|
4
|
|
|
|
|
14
|
$den= sprintf("%.4e", $den); |
117
|
|
|
|
|
|
|
## return stat |
118
|
4
|
|
|
|
|
15
|
$stat .= "nodes: $nd\nedges: $ed\nmax degree: $maxdeg\nmin degree: $mindeg\n"; |
119
|
4
|
|
|
|
|
12
|
$stat .= "median degree: $medeg\naverage degree: $avgdeg\ndensity: $den\n"; |
120
|
4
|
|
|
|
|
11
|
$res= "#oboterm degree indegree outdegree\n".$resdeg."\n"."~summary stat~\n".$stat; |
121
|
4
|
|
|
|
|
62
|
return $res; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub get_parents_or_children_list{ |
125
|
10
|
|
|
10
|
1
|
18555
|
my ($edgesfile, $parentIndex, $childIndex, $chdORpar)= @_; |
126
|
10
|
|
|
|
|
21
|
my (%nodelist, %outlist); |
127
|
10
|
100
|
100
|
|
|
49
|
if($chdORpar ne "parents" && $chdORpar ne "children"){ die "$chdORpar can be 'parents' or 'children'.\n";} |
|
2
|
|
|
|
|
16
|
|
128
|
8
|
100
|
|
|
|
283
|
open FH, "<", $edgesfile or die "cannot open $edgesfile. $!.\n"; |
129
|
6
|
|
|
|
|
128
|
while(){ |
130
|
12
|
|
|
|
|
26
|
chomp; |
131
|
12
|
|
|
|
|
48
|
my @vals= split(/\t/,$_); |
132
|
12
|
100
|
|
|
|
29
|
if($chdORpar eq "parents"){ |
133
|
6
|
|
|
|
|
8
|
push (@{$nodelist{$vals[$childIndex]}}, $vals[$parentIndex]); |
|
6
|
|
|
|
|
47
|
|
134
|
|
|
|
|
|
|
}else{ |
135
|
6
|
|
|
|
|
8
|
push (@{$nodelist{$vals[$parentIndex]}}, $vals[$childIndex]); |
|
6
|
|
|
|
|
49
|
|
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
6
|
|
|
|
|
56
|
close FH; |
139
|
6
|
|
|
|
|
24
|
foreach my $term (keys %nodelist){ |
140
|
9
|
|
|
|
|
15
|
$outlist{$term} = join('|', sort{($a=~/(\d+)/)[0] <=> ($b=~/(\d+)/)[0]} uniq @{$nodelist{$term}}); |
|
3
|
|
|
|
|
42
|
|
|
9
|
|
|
|
|
62
|
|
141
|
|
|
|
|
|
|
} |
142
|
6
|
|
|
|
|
38
|
return \%outlist; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub obo_filter{ |
146
|
5
|
|
|
5
|
1
|
18321
|
my ($obofile, $termsfile)= @_; |
147
|
5
|
|
|
|
|
9
|
my (@oboterms, $header, $counter, $res, $obo); |
148
|
|
|
|
|
|
|
## obo terms of interest |
149
|
5
|
100
|
|
|
|
229
|
open FH, "<", "$termsfile" or die "cannot open $termsfile. $!.\n"; |
150
|
4
|
|
|
|
|
82
|
while(){ |
151
|
14
|
|
|
|
|
28
|
chomp; |
152
|
14
|
100
|
|
|
|
53
|
next if $_=~/^\s*$/; |
153
|
12
|
|
|
|
|
54
|
push(@oboterms, $_); |
154
|
|
|
|
|
|
|
} |
155
|
4
|
|
|
|
|
52
|
close FH; |
156
|
4
|
|
|
|
|
9
|
my @unique= do { my %seen; grep { !$seen{$_}++ } @oboterms}; |
|
4
|
|
|
|
|
13
|
|
|
4
|
|
|
|
|
8
|
|
|
12
|
|
|
|
|
52
|
|
157
|
4
|
|
|
|
|
9
|
@oboterms=@unique; |
158
|
|
|
|
|
|
|
## store obofile header |
159
|
4
|
100
|
|
|
|
24
|
if($obofile=~/.obo$/i){ open FH, "<", "$obofile" or die "cannot open $obofile. $!.\n"; } |
|
4
|
50
|
|
|
|
137
|
|
160
|
3
|
|
|
|
|
59
|
while(){ |
161
|
357
|
100
|
|
|
|
763
|
next unless 1 .. /\[Term\]/; |
162
|
81
|
100
|
|
|
|
148
|
next if /^\[Term\]/; |
163
|
78
|
|
|
|
|
154
|
$header .= $_; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
## extract from obo file the terms of interest |
166
|
3
|
|
|
|
|
16
|
foreach my $oboterm (@oboterms){ |
167
|
8
|
|
|
|
|
22
|
$counter++; |
168
|
8
|
|
|
|
|
250
|
open FH, $obofile; |
169
|
8
|
|
|
|
|
149
|
while(){ |
170
|
952
|
100
|
66
|
|
|
3232
|
if(/^id:\s+($oboterm)/ .. (/^$/ || eof FH)){ ## check for end of file |
171
|
40
|
|
|
|
|
155
|
$res .= $_; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
} |
174
|
8
|
|
|
|
|
78
|
close FH; |
175
|
8
|
|
|
|
|
307
|
print($counter, "\t", $oboterm, "\tdone\n"); |
176
|
|
|
|
|
|
|
} |
177
|
3
|
100
|
|
|
|
16
|
unless(defined $res){ die "none obo terms listed in $termsfile file was found in $obofile file. $!.\n" } |
|
1
|
|
|
|
|
18
|
|
178
|
|
|
|
|
|
|
## print obofile filtered |
179
|
2
|
|
|
|
|
11
|
$obo = $header.$res."\n"; |
180
|
2
|
|
|
|
|
41
|
$obo=~ s/\b(id:\s+(\D+\d+))/\[Term\]\n$1/g; ## note: word boundary (\b) is necessary to match only "id:"" and not "alt_id:" in the string $obo |
181
|
2
|
|
|
|
|
14
|
return \$obo; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
sub gene2biofun{ |
185
|
6
|
|
|
6
|
1
|
18509
|
my ($annfile, $geneIndex, $classIndex)= @_; |
186
|
6
|
|
|
|
|
16
|
my (%gene2biofun, %biofun, @genes, @biofun, $stat)= (); |
187
|
6
|
|
|
|
|
15
|
my ($sample, $oboterm)= (0)x2; |
188
|
6
|
100
|
|
|
|
30
|
if($annfile=~/.gz$/i){ open FH, "<:gzip", $annfile or die "cannot open $annfile. $!.\n"; } else { open FH, "<", "$annfile" or die "cannot open $annfile. $!.\n"; } |
|
2
|
100
|
|
|
|
114
|
|
|
4
|
100
|
|
|
|
174
|
|
189
|
4
|
|
|
|
|
118
|
while(){ |
190
|
27
|
100
|
|
|
|
475
|
next if $_=~/^[!,#]|^\s*$/; |
191
|
24
|
|
|
|
|
44
|
chomp; |
192
|
24
|
|
|
|
|
103
|
my @vals=split(/\t/,$_); |
193
|
24
|
|
|
|
|
67
|
push(@genes, $vals[$geneIndex]); |
194
|
24
|
|
|
|
|
36
|
push(@biofun, $vals[$classIndex]); |
195
|
24
|
|
|
|
|
30
|
push(@{$biofun{$vals[$geneIndex]}}, $vals[$classIndex]); |
|
24
|
|
|
|
|
117
|
|
196
|
|
|
|
|
|
|
} |
197
|
4
|
|
|
|
|
43
|
close FH; |
198
|
4
|
|
|
|
|
18
|
foreach my $gene (keys %biofun){ |
199
|
8
|
|
|
|
|
16
|
$gene2biofun{$gene} = join('|', sort{($a=~/(\d+)/)[0] <=> ($b=~/(\d+)/)[0]} uniq @{$biofun{$gene}}); |
|
24
|
|
|
|
|
139
|
|
|
8
|
|
|
|
|
56
|
|
200
|
|
|
|
|
|
|
} |
201
|
4
|
|
|
|
|
41
|
my @uniqgenes= uniq @genes; |
202
|
4
|
|
|
|
|
25
|
my @uniqpbiofun= uniq @biofun; |
203
|
4
|
|
|
|
|
10
|
$sample= scalar(@uniqgenes); |
204
|
4
|
|
|
|
|
8
|
$oboterm= scalar(@uniqpbiofun); |
205
|
4
|
|
|
|
|
14
|
$stat .= "genes: $sample\nontology terms: $oboterm\n"; |
206
|
4
|
|
|
|
|
32
|
return (\%gene2biofun, \$stat); |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub map_OBOterm_between_release{ |
210
|
9
|
|
|
9
|
1
|
20335
|
my ($obofile, $annfile, $classIndex)= @_; |
211
|
9
|
|
|
|
|
19
|
my (%altid, %oldclass, %old2new, $header, $id, $fln, $pair, $stat, $pstat); |
212
|
9
|
|
|
|
|
23
|
my ($alt, $classes, $seen, $unseen)= (0)x4; |
213
|
|
|
|
|
|
|
## step 0: pairing altid_2_id (key: alt_id) |
214
|
9
|
100
|
|
|
|
48
|
if($obofile=~/.obo$/i){ open FH, "<", "$obofile" or die "cannot open $obofile. $!.\n"; } |
|
9
|
50
|
|
|
|
307
|
|
215
|
7
|
|
|
|
|
115
|
while (){ |
216
|
322
|
|
|
|
|
408
|
chomp; |
217
|
322
|
100
|
|
|
|
764
|
next if $_=~/^\s*$/; |
218
|
308
|
100
|
|
|
|
522
|
if($_=~/^id:\s+(\D+\d+)/){ $id=$1; } |
|
7
|
|
|
|
|
22
|
|
219
|
308
|
100
|
|
|
|
747
|
if($_=~/^alt_id:\s+(\D+\d+)/){ $altid{$1}=$id; } |
|
49
|
|
|
|
|
166
|
|
220
|
|
|
|
|
|
|
} |
221
|
7
|
|
|
|
|
65
|
close FH; |
222
|
7
|
|
|
|
|
22
|
$alt= keys(%altid); |
223
|
|
|
|
|
|
|
# step 1: storing old ontology terms in a hash |
224
|
7
|
100
|
|
|
|
32
|
if($annfile=~/.gz$/i){ open FH, "<:gzip", $annfile or die "cannot open $annfile. $!.\n"; } else { open FH, "<", "$annfile" or die "cannot open $annfile. $!.\n"; } |
|
2
|
100
|
|
|
|
139
|
|
|
5
|
100
|
|
|
|
165
|
|
225
|
5
|
|
|
|
|
117
|
while(){ |
226
|
28
|
|
|
|
|
49
|
chomp; |
227
|
28
|
100
|
|
|
|
451
|
if($_=~/^[!,#]|^\s*$/){ $header .= "$_\n"; } |
|
2
|
|
|
|
|
6
|
|
228
|
28
|
100
|
|
|
|
400
|
next if $_=~/^[!,#]|^\s*$/; |
229
|
26
|
|
|
|
|
115
|
my @vals=split(/\t/,$_); |
230
|
26
|
|
|
|
|
138
|
$oldclass{$vals[$classIndex]}=$vals[$classIndex]; |
231
|
|
|
|
|
|
|
} |
232
|
5
|
|
|
|
|
59
|
close FH; |
233
|
5
|
|
|
|
|
13
|
$classes= keys(%oldclass); |
234
|
|
|
|
|
|
|
## step 2: mapping old GO terms to the new one using *alt_id* as key |
235
|
5
|
|
|
|
|
9
|
my $tmp= ""; |
236
|
5
|
|
|
|
|
27
|
foreach my $k (sort{$a cmp $b} keys(%altid)){ |
|
66
|
|
|
|
|
92
|
|
237
|
35
|
100
|
|
|
|
53
|
if($oldclass{$k}){ |
238
|
8
|
|
|
|
|
18
|
$old2new{$k}=$altid{$oldclass{$k}}; ## pairing |
239
|
8
|
|
|
|
|
11
|
$seen++; |
240
|
8
|
|
|
|
|
13
|
$tmp= $altid{$oldclass{$k}}; |
241
|
|
|
|
|
|
|
}else{ |
242
|
27
|
|
|
|
|
36
|
$tmp= "unseen"; |
243
|
27
|
|
|
|
|
34
|
$unseen++; |
244
|
|
|
|
|
|
|
} |
245
|
35
|
100
|
|
|
|
73
|
if($tmp ne "unseen"){ |
246
|
8
|
|
|
|
|
22
|
$pair .= "$k\t$altid{$oldclass{$k}}\n"; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
## step 3: substitute ALT-ID with the updated ID, then the annotation file is returned. |
250
|
5
|
50
|
|
|
|
23
|
if($annfile=~/.gz$/i){ open FH, "<:gzip", $annfile or die "cannot open $annfile. $!.\n"; } else { open FH, "<", "$annfile" or die "cannot open $annfile. $!.\n"; } |
|
1
|
50
|
|
|
|
58
|
|
|
4
|
100
|
|
|
|
134
|
|
251
|
5
|
|
|
|
|
81
|
while(){ |
252
|
28
|
|
|
|
|
54
|
chomp; |
253
|
28
|
100
|
|
|
|
418
|
next if $_=~/^[!,#]|^\s*$/; |
254
|
26
|
|
|
|
|
107
|
my @vals= split(/\t/, $_); |
255
|
26
|
|
|
|
|
45
|
my $oboterm= $vals[$classIndex]; |
256
|
26
|
100
|
|
|
|
52
|
if($old2new{$oboterm}){ |
257
|
16
|
|
|
|
|
21
|
$oboterm= $old2new{$oboterm}; |
258
|
16
|
|
|
|
|
139
|
$_=~ s/$vals[$classIndex]/$oboterm/g; |
259
|
16
|
|
|
|
|
77
|
$fln .= "$_\n"; |
260
|
|
|
|
|
|
|
}else{ |
261
|
10
|
|
|
|
|
73
|
$fln .= "$_\n"; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
} |
264
|
5
|
|
|
|
|
45
|
close FH; |
265
|
5
|
100
|
|
|
|
17
|
if(defined $header){$fln = $header.$fln;} |
|
1
|
|
|
|
|
4
|
|
266
|
|
|
|
|
|
|
## print mapping stat |
267
|
5
|
|
|
|
|
17
|
$stat .= "Tot. ontology terms:\t$classes\nTot. altID:\t$alt\nTot. altID seen:\t$seen\nTot. altID unseen:\t$unseen\n"; |
268
|
5
|
100
|
|
|
|
11
|
unless(not defined $pair){ |
269
|
4
|
|
|
|
|
13
|
$pstat .= "#alt-id id\n$pair\n$stat"; |
270
|
4
|
|
|
|
|
37
|
return (\$fln, \$pstat); |
271
|
|
|
|
|
|
|
} |
272
|
1
|
|
|
|
|
9
|
return (\$fln, \$stat); |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
1; |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
__END__ |