line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
############################################################################### |
2
|
|
|
|
|
|
|
# # |
3
|
|
|
|
|
|
|
# Copyright © 2012-2013 -- IRB/INSERM # |
4
|
|
|
|
|
|
|
# (Institut de Recherche en Biothérapie / # |
5
|
|
|
|
|
|
|
# Institut National de la Santé et de la # |
6
|
|
|
|
|
|
|
# Recherche Médicale) # |
7
|
|
|
|
|
|
|
# # |
8
|
|
|
|
|
|
|
# Auteurs/Authors: Jerôme AUDOUX # |
9
|
|
|
|
|
|
|
# Nicolas PHILIPPE # |
10
|
|
|
|
|
|
|
# # |
11
|
|
|
|
|
|
|
# ------------------------------------------------------------------------- # |
12
|
|
|
|
|
|
|
# # |
13
|
|
|
|
|
|
|
# Ce fichier fait partie de la suite CracTools qui contient plusieurs pipeline# |
14
|
|
|
|
|
|
|
# intégrés permettant de traiter les évênements biologiques présents dans du # |
15
|
|
|
|
|
|
|
# RNA-Seq. Les CracTools travaillent à partir d'un fichier SAM de CRAC et d'un# |
16
|
|
|
|
|
|
|
# fichier d'annotation au format GFF3. # |
17
|
|
|
|
|
|
|
# # |
18
|
|
|
|
|
|
|
# Ce logiciel est régi par la licence CeCILL soumise au droit français et # |
19
|
|
|
|
|
|
|
# respectant les principes de diffusion des logiciels libres. Vous pouvez # |
20
|
|
|
|
|
|
|
# utiliser, modifier et/ou redistribuer ce programme sous les conditions de # |
21
|
|
|
|
|
|
|
# la licence CeCILL telle que diffusée par le CEA, le CNRS et l'INRIA sur # |
22
|
|
|
|
|
|
|
# le site "http://www.cecill.info". # |
23
|
|
|
|
|
|
|
# # |
24
|
|
|
|
|
|
|
# En contrepartie de l'accessibilité au code source et des droits de copie, # |
25
|
|
|
|
|
|
|
# de modification et de redistribution accordés par cette licence, il n'est # |
26
|
|
|
|
|
|
|
# offert aux utilisateurs qu'une garantie limitée. Pour les mêmes raisons, # |
27
|
|
|
|
|
|
|
# seule une responsabilité restreinte pèse sur l'auteur du programme, le # |
28
|
|
|
|
|
|
|
# titulaire des droits patrimoniaux et les concédants successifs. # |
29
|
|
|
|
|
|
|
# # |
30
|
|
|
|
|
|
|
# À cet égard l'attention de l'utilisateur est attirée sur les risques # |
31
|
|
|
|
|
|
|
# associés au chargement, à l'utilisation, à la modification et/ou au # |
32
|
|
|
|
|
|
|
# développement et à la reproduction du logiciel par l'utilisateur étant # |
33
|
|
|
|
|
|
|
# donné sa spécificité de logiciel libre, qui peut le rendre complexe à # |
34
|
|
|
|
|
|
|
# manipuler et qui le réserve donc à des développeurs et des professionnels # |
35
|
|
|
|
|
|
|
# avertis possédant des connaissances informatiques approfondies. Les # |
36
|
|
|
|
|
|
|
# utilisateurs sont donc invités à charger et tester l'adéquation du # |
37
|
|
|
|
|
|
|
# logiciel à leurs besoins dans des conditions permettant d'assurer la # |
38
|
|
|
|
|
|
|
# sécurité de leurs systêmes et ou de leurs données et, plus généralement, # |
39
|
|
|
|
|
|
|
# à l'utiliser et l'exploiter dans les mêmes conditions de sécurité. # |
40
|
|
|
|
|
|
|
# # |
41
|
|
|
|
|
|
|
# Le fait que vous puissiez accéder à cet en-tête signifie que vous avez # |
42
|
|
|
|
|
|
|
# pris connaissance de la licence CeCILL, et que vous en avez accepté les # |
43
|
|
|
|
|
|
|
# termes. # |
44
|
|
|
|
|
|
|
# # |
45
|
|
|
|
|
|
|
# ------------------------------------------------------------------------- # |
46
|
|
|
|
|
|
|
# # |
47
|
|
|
|
|
|
|
# This file is part of the CracTools which provide several integrated # |
48
|
|
|
|
|
|
|
# pipeline to analyze biological events present in RNA-Seq data. CracTools # |
49
|
|
|
|
|
|
|
# work on a SAM file generated by CRAC and an annotation file in GFF3 format.# |
50
|
|
|
|
|
|
|
# # |
51
|
|
|
|
|
|
|
# This software is governed by the CeCILL license under French law and # |
52
|
|
|
|
|
|
|
# abiding by the rules of distribution of free software. You can use, # |
53
|
|
|
|
|
|
|
# modify and/ or redistribute the software under the terms of the CeCILL # |
54
|
|
|
|
|
|
|
# license as circulated by CEA, CNRS and INRIA at the following URL # |
55
|
|
|
|
|
|
|
# "http://www.cecill.info". # |
56
|
|
|
|
|
|
|
# # |
57
|
|
|
|
|
|
|
# As a counterpart to the access to the source code and rights to copy, # |
58
|
|
|
|
|
|
|
# modify and redistribute granted by the license, users are provided only # |
59
|
|
|
|
|
|
|
# with a limited warranty and the software's author, the holder of the # |
60
|
|
|
|
|
|
|
# economic rights, and the successive licensors have only limited # |
61
|
|
|
|
|
|
|
# liability. # |
62
|
|
|
|
|
|
|
# # |
63
|
|
|
|
|
|
|
# In this respect, the user's attention is drawn to the risks associated # |
64
|
|
|
|
|
|
|
# with loading, using, modifying and/or developing or reproducing the # |
65
|
|
|
|
|
|
|
# software by the user in light of its specific status of free software, # |
66
|
|
|
|
|
|
|
# that may mean that it is complicated to manipulate, and that also # |
67
|
|
|
|
|
|
|
# therefore means that it is reserved for developers and experienced # |
68
|
|
|
|
|
|
|
# professionals having in-depth computer knowledge. Users are therefore # |
69
|
|
|
|
|
|
|
# encouraged to load and test the software's suitability as regards their # |
70
|
|
|
|
|
|
|
# requirements in conditions enabling the security of their systems and/or # |
71
|
|
|
|
|
|
|
# data to be ensured and, more generally, to use and operate it in the same # |
72
|
|
|
|
|
|
|
# conditions as regards security. # |
73
|
|
|
|
|
|
|
# # |
74
|
|
|
|
|
|
|
# The fact that you are presently reading this means that you have had # |
75
|
|
|
|
|
|
|
# knowledge of the CeCILL license and that you accept its terms. # |
76
|
|
|
|
|
|
|
# # |
77
|
|
|
|
|
|
|
############################################################################### |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 NAME |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
CracTools::Annotator - Generic annotation base on CracTools::GFF::Query |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=cut |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
package CracTools::Annotator; |
86
|
|
|
|
|
|
|
|
87
|
1
|
|
|
1
|
|
65050
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
38
|
|
88
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
28
|
|
89
|
|
|
|
|
|
|
|
90
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
98
|
|
91
|
1
|
|
|
1
|
|
1136
|
use Data::Dumper; |
|
1
|
|
|
|
|
15331
|
|
|
1
|
|
|
|
|
89
|
|
92
|
1
|
|
|
1
|
|
742
|
use CracTools::GFF::Annotation; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
43
|
|
93
|
1
|
|
|
1
|
|
841
|
use CracTools::GFF::Query; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
41
|
|
94
|
1
|
|
|
1
|
|
715
|
use CracTools::Const; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
1173
|
|
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head1 METHODS |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head2 new |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
Arg [1] : String - $gff_file |
101
|
|
|
|
|
|
|
GFF file to perform annotation |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Example : my $annotation = CracTools::GFF::Annotation->new($gff_line); |
104
|
|
|
|
|
|
|
Description : Create a new CracTools::GFF::Annotation object |
105
|
|
|
|
|
|
|
If a gff line is passed in argument, the line will be parsed |
106
|
|
|
|
|
|
|
and loaded. |
107
|
|
|
|
|
|
|
ReturnType : CracTools::GFF::Query |
108
|
|
|
|
|
|
|
Exceptions : none |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=cut |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub new { |
113
|
1
|
|
|
1
|
1
|
2364
|
my $class = shift; |
114
|
1
|
|
|
|
|
3
|
my $gff_file = shift; |
115
|
|
|
|
|
|
|
|
116
|
1
|
50
|
|
|
|
8
|
if(!defined $gff_file) { |
117
|
0
|
|
|
|
|
0
|
croak "Missing GFF file argument in CracTools::Annotator constructor"; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
1
|
|
|
|
|
5
|
my $self = bless { |
121
|
|
|
|
|
|
|
gff_file => $gff_file, |
122
|
|
|
|
|
|
|
}, $class; |
123
|
|
|
|
|
|
|
|
124
|
1
|
|
|
|
|
6
|
$self->_init(); |
125
|
|
|
|
|
|
|
|
126
|
1
|
|
|
|
|
3
|
return $self; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 foundGene |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Arg [1] : String - chr |
132
|
|
|
|
|
|
|
Arg [2] : String - pos_start |
133
|
|
|
|
|
|
|
Arg [3] : String - pos_end |
134
|
|
|
|
|
|
|
Arg [4] : String - strand |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Description : Return true if there is an exon of a gene is this interval |
137
|
|
|
|
|
|
|
ReturnType : Boolean |
138
|
|
|
|
|
|
|
Exceptions : none |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=cut |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub foundGene { |
143
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
144
|
0
|
|
|
|
|
0
|
my ($chr,$pos_start,$pos_end,$strand) = @_; |
145
|
0
|
|
|
|
|
0
|
my @candidates = $self->getAnnotationCandidates($chr,$pos_start,$pos_end,$strand); |
146
|
0
|
|
|
|
|
0
|
return @candidates > 0; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head2 foundSameGene |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Arg [1] : String - chr |
152
|
|
|
|
|
|
|
Arg [2] : String - pos_start1 |
153
|
|
|
|
|
|
|
Arg [3] : String - pos_end1 |
154
|
|
|
|
|
|
|
Arg [4] : String - pos_start2 |
155
|
|
|
|
|
|
|
Arg [5] : String - pos_end1 |
156
|
|
|
|
|
|
|
Arg [6] : String - strand |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Description : Return true if a gene is the same gene is found is the two intervals. |
159
|
|
|
|
|
|
|
ReturnType : Boolean |
160
|
|
|
|
|
|
|
Exceptions : none |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=cut |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub foundSameGene { |
165
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
166
|
1
|
|
|
|
|
3
|
my ($chr,$pos_start1,$pos_end1,$pos_start2,$pos_end2,$strand) = @_; |
167
|
1
|
|
|
|
|
5
|
my @candidates1 = $self->getAnnotationCandidates($chr,$pos_start1,$pos_end1,$strand); |
168
|
1
|
|
|
|
|
5
|
my @candidates2 = $self->getAnnotationCandidates($chr,$pos_start2,$pos_end2,$strand); |
169
|
1
|
|
|
|
|
3
|
my $found_same_gene = 0; |
170
|
1
|
|
|
|
|
2
|
my @genes1; |
171
|
|
|
|
|
|
|
my @genes2; |
172
|
1
|
|
|
|
|
3
|
foreach my $candi1 (@candidates1) { |
173
|
2
|
50
|
|
|
|
8
|
if(defined $candi1->{gene}) { |
174
|
2
|
|
|
|
|
15
|
push @genes1,$candi1->{gene}->attribute('ID'); |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
} |
177
|
1
|
|
|
|
|
3
|
foreach my $candi2 (@candidates2) { |
178
|
2
|
50
|
|
|
|
7
|
if(defined $candi2->{gene}) { |
179
|
2
|
|
|
|
|
7
|
push @genes2,$candi2->{gene}->attribute('ID'); |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
} |
182
|
1
|
|
|
|
|
3
|
foreach my $gene_id (@genes1) { |
183
|
1
|
|
|
|
|
2
|
foreach (@genes2) { |
184
|
1
|
50
|
|
|
|
4
|
if($gene_id eq $_) { |
185
|
1
|
|
|
|
|
2
|
$found_same_gene = 1; |
186
|
1
|
|
|
|
|
3
|
last; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
} |
189
|
1
|
50
|
|
|
|
4
|
last if $found_same_gene == 1; |
190
|
|
|
|
|
|
|
} |
191
|
1
|
|
|
|
|
28
|
return $found_same_gene; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head2 getBestAnnotationCandidate |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Arg [1] : String - chr |
197
|
|
|
|
|
|
|
Arg [2] : String - pos_start |
198
|
|
|
|
|
|
|
Arg [3] : String - pos_end |
199
|
|
|
|
|
|
|
Arg [4] : String - strand |
200
|
|
|
|
|
|
|
Arg [5] : (Optional) Subroutine - see C for more details |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Description : Return best annotation candidate according to the priorities given |
203
|
|
|
|
|
|
|
by the subroutine in argument. |
204
|
|
|
|
|
|
|
ReturnType : Hash( feature_name => CracTools::GFF::Annotation, ...), Int(priority), String(type) |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=cut |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub getBestAnnotationCandidate { |
209
|
1
|
|
|
1
|
1
|
7
|
my $self = shift; |
210
|
1
|
|
|
|
|
3
|
my ($chr,$pos_start,$pos_end,$strand,$prioritySub) = @_; |
211
|
|
|
|
|
|
|
|
212
|
1
|
50
|
|
|
|
5
|
$prioritySub = \&getCandidatePriorityDefault unless defined $prioritySub; |
213
|
|
|
|
|
|
|
|
214
|
1
|
|
|
|
|
5
|
my @candidates = $self->getAnnotationCandidates($chr,$pos_start,$pos_end,$strand); |
215
|
1
|
|
|
|
|
3
|
my ($best_priority,$best_candidate,$best_type); |
216
|
1
|
|
|
|
|
4
|
foreach my $candi (@candidates) { |
217
|
2
|
|
|
|
|
7
|
my ($priority,$type) = $prioritySub->($pos_start,$pos_end,$candi); |
218
|
2
|
100
|
|
|
|
9
|
if($priority != -1) { |
219
|
1
|
50
|
33
|
|
|
22
|
if(!defined $best_priority || $priority < $best_priority) { |
220
|
1
|
|
|
|
|
3
|
$best_priority = $priority; |
221
|
1
|
|
|
|
|
2
|
$best_candidate = $candi; |
222
|
1
|
|
|
|
|
5
|
$best_type = $type; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
} |
226
|
1
|
|
|
|
|
20
|
return $best_candidate,$best_priority,$best_type; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=head2 getAnnotationCandidates |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
Arg [1] : String - chr |
232
|
|
|
|
|
|
|
Arg [2] : String - pos_start |
233
|
|
|
|
|
|
|
Arg [3] : String - pos_end |
234
|
|
|
|
|
|
|
Arg [4] : String - strand |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
Description : Return an array with all annotation candidates overlapping the |
237
|
|
|
|
|
|
|
chromosomic region. |
238
|
|
|
|
|
|
|
ReturnType : Array of Hash( feature_name => CracTools::GFF::Annotation, ...) |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=cut |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub getAnnotationCandidates { |
243
|
4
|
|
|
4
|
1
|
13
|
my $self = shift; |
244
|
4
|
|
|
|
|
7
|
my ($chr,$pos_start,$pos_end,$strand) = @_; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# get GFF annotations that overlap the region to annotate |
247
|
4
|
|
|
|
|
24
|
my $annotations = $self->{gff_query}->fetchByRegion($chr,$pos_start,$pos_end,$strand); |
248
|
|
|
|
|
|
|
|
249
|
4
|
|
|
|
|
10
|
my %annot_hash = (); |
250
|
4
|
|
|
|
|
10
|
my @candidates = (); |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# Construct annotation hash with annot ID as key |
253
|
4
|
|
|
|
|
6
|
foreach my $annot_line (@{$annotations}) { |
|
4
|
|
|
|
|
11
|
|
254
|
20
|
|
|
|
|
190
|
my $annot = CracTools::GFF::Annotation->new($annot_line,'gff3'); |
255
|
20
|
|
|
|
|
68
|
$annot_hash{$annot->attribute('ID')} = $annot; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# Find root in annotation tree |
259
|
4
|
|
|
|
|
17
|
foreach my $annot_id (keys %annot_hash) { |
260
|
20
|
|
|
|
|
157
|
my @parents = $annot_hash{$annot_id}->parents; |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# we have foud a root, lets constructs candidates |
263
|
20
|
100
|
|
|
|
57
|
if(scalar @parents == 0) { |
264
|
8
|
|
|
|
|
21
|
push @candidates, _constructCandidate($annot_id,my $new_candidate,\%annot_hash); |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
4
|
|
|
|
|
164
|
return @candidates; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head2 getCandidatePriorityDefault |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
Arg [1] : String - pos_start |
274
|
|
|
|
|
|
|
Arg [2] : String - pos_end |
275
|
|
|
|
|
|
|
Arg [3] : hash - candidate |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
Description : Default method used to give a priority to a candidate. |
278
|
|
|
|
|
|
|
You can create your own priority method to fit your specific need |
279
|
|
|
|
|
|
|
for selecting the best annotation. |
280
|
|
|
|
|
|
|
The best priority is 0. A priority of -1 means that this candidate |
281
|
|
|
|
|
|
|
should be avoided. |
282
|
|
|
|
|
|
|
ReturnType : Array ($priority,$type) where $priority is an integer and $type a string |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=cut |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub getCandidatePriorityDefault { |
287
|
2
|
|
|
2
|
1
|
4
|
my ($pos_start,$pos_end,$candidate) = @_; |
288
|
2
|
|
|
|
|
5
|
my ($priority,$type) = (-1,''); |
289
|
2
|
|
|
|
|
7
|
my ($mRNA,$exon) = ($candidate->{mRNA},$candidate->{exon}); |
290
|
2
|
100
|
|
|
|
8
|
if(defined $mRNA) { |
291
|
1
|
50
|
|
|
|
5
|
if($mRNA->attribute('type') =~ /protein_coding/i) { |
292
|
1
|
50
|
|
|
|
6
|
if(defined $exon) { |
293
|
1
|
50
|
33
|
|
|
6
|
if($exon->start > $pos_start || $exon->end < $pos_end) { |
294
|
1
|
|
|
|
|
2
|
$priority = 1; |
295
|
1
|
50
|
|
|
|
22
|
if(defined $candidate->{three}) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
296
|
0
|
|
|
|
|
0
|
$type = '3PRIM_UTR'; |
297
|
|
|
|
|
|
|
} elsif(defined $candidate->{five}) { |
298
|
0
|
|
|
|
|
0
|
$type = '5PRIM_UTR'; |
299
|
|
|
|
|
|
|
} elsif(defined $candidate->{cds}) { |
300
|
1
|
|
|
|
|
3
|
$type = 'CDS'; |
301
|
|
|
|
|
|
|
} else { |
302
|
0
|
|
|
|
|
0
|
$type = 'EXON'; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
} else { |
305
|
0
|
|
|
|
|
0
|
$priority = 2; |
306
|
0
|
|
|
|
|
0
|
$type = 'INXON'; |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
} else { |
310
|
0
|
0
|
|
|
|
0
|
if(defined $exon) { |
311
|
0
|
0
|
0
|
|
|
0
|
if($exon->start > $pos_start || $exon->end < $pos_end) { |
312
|
0
|
|
|
|
|
0
|
$priority = 3; |
313
|
0
|
|
|
|
|
0
|
$type = 'NON_CODING'; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
} |
318
|
2
|
|
|
|
|
7
|
return ($priority,$type); |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=head1 PRIVATE METHODS |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=head2 _init |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
Description : init method, load GFF annotation into a |
326
|
|
|
|
|
|
|
CracTools::GFF::Query object. |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=cut |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
sub _init { |
331
|
1
|
|
|
1
|
|
3
|
my $self = shift; |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# Create a GFF file to query exons |
334
|
1
|
|
|
|
|
17
|
my $gff_query = CracTools::GFF::Query->new($self->{gff_file}); |
335
|
1
|
|
|
|
|
4
|
$self->{gff_query} = $gff_query; |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=head2 _constructCandidate |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Arg [1] : String - annot_id |
342
|
|
|
|
|
|
|
Arg [2] : Hash ref - candidate |
343
|
|
|
|
|
|
|
Since this method is recursive, this is the object that |
344
|
|
|
|
|
|
|
we are constructing |
345
|
|
|
|
|
|
|
Arg [3] : Hash ref - annot_hash |
346
|
|
|
|
|
|
|
annot_hash is a hash reference where keys are annotion IDs |
347
|
|
|
|
|
|
|
and values are CracTools::GFF::Annotation objects. |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
Description : _constructCandidate is a recursive method that build a |
350
|
|
|
|
|
|
|
candidate hash. |
351
|
|
|
|
|
|
|
ReturnType : Candidate Hash ref where keys are GFF features and |
352
|
|
|
|
|
|
|
values are CracTools::GFF::Annotation objects : |
353
|
|
|
|
|
|
|
{ feature => CracTools::GFF::Annotation, ...} |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=cut |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub _constructCandidate { |
358
|
20
|
|
|
20
|
|
35
|
my ($annot_id,$candidate,$annot_hash) = @_; |
359
|
20
|
|
|
|
|
72
|
$candidate->{$annot_hash->{$annot_id}->feature} = $annot_hash->{$annot_id}; |
360
|
20
|
|
|
|
|
31
|
foreach my $annot (values %{$annot_hash}) { |
|
20
|
|
|
|
|
50
|
|
361
|
100
|
|
|
|
|
386
|
my @parents = $annot->parents; |
362
|
100
|
|
|
|
|
210
|
foreach my $parent (@parents) { |
363
|
60
|
100
|
|
|
|
225
|
if($parent eq $annot_id) { |
364
|
12
|
|
|
|
|
30
|
_constructCandidate($annot->attribute('ID'),$candidate,$annot_hash); |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
} |
368
|
20
|
|
|
|
|
1447
|
return $candidate; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
1; |