| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
############################################################################### |
|
2
|
|
|
|
|
|
|
# # |
|
3
|
|
|
|
|
|
|
# Copyright © 2012-2013 -- IRB/INSERM # |
|
4
|
|
|
|
|
|
|
# (Institut de Recherche en Biothérapie / # |
|
5
|
|
|
|
|
|
|
# Institut National de la Santé et de la # |
|
6
|
|
|
|
|
|
|
# Recherche Médicale) # |
|
7
|
|
|
|
|
|
|
# # |
|
8
|
|
|
|
|
|
|
# Auteurs/Authors: Jerôme AUDOUX # |
|
9
|
|
|
|
|
|
|
# Nicolas PHILIPPE # |
|
10
|
|
|
|
|
|
|
# # |
|
11
|
|
|
|
|
|
|
# ------------------------------------------------------------------------- # |
|
12
|
|
|
|
|
|
|
# # |
|
13
|
|
|
|
|
|
|
# Ce fichier fait partie de la suite CracTools qui contient plusieurs pipeline# |
|
14
|
|
|
|
|
|
|
# intégrés permettant de traiter les évênements biologiques présents dans du # |
|
15
|
|
|
|
|
|
|
# RNA-Seq. Les CracTools travaillent à partir d'un fichier SAM de CRAC et d'un# |
|
16
|
|
|
|
|
|
|
# fichier d'annotation au format GFF3. # |
|
17
|
|
|
|
|
|
|
# # |
|
18
|
|
|
|
|
|
|
# Ce logiciel est régi par la licence CeCILL soumise au droit français et # |
|
19
|
|
|
|
|
|
|
# respectant les principes de diffusion des logiciels libres. Vous pouvez # |
|
20
|
|
|
|
|
|
|
# utiliser, modifier et/ou redistribuer ce programme sous les conditions de # |
|
21
|
|
|
|
|
|
|
# la licence CeCILL telle que diffusée par le CEA, le CNRS et l'INRIA sur # |
|
22
|
|
|
|
|
|
|
# le site "http://www.cecill.info". # |
|
23
|
|
|
|
|
|
|
# # |
|
24
|
|
|
|
|
|
|
# En contrepartie de l'accessibilité au code source et des droits de copie, # |
|
25
|
|
|
|
|
|
|
# de modification et de redistribution accordés par cette licence, il n'est # |
|
26
|
|
|
|
|
|
|
# offert aux utilisateurs qu'une garantie limitée. Pour les mêmes raisons, # |
|
27
|
|
|
|
|
|
|
# seule une responsabilité restreinte pèse sur l'auteur du programme, le # |
|
28
|
|
|
|
|
|
|
# titulaire des droits patrimoniaux et les concédants successifs. # |
|
29
|
|
|
|
|
|
|
# # |
|
30
|
|
|
|
|
|
|
# À cet égard l'attention de l'utilisateur est attirée sur les risques # |
|
31
|
|
|
|
|
|
|
# associés au chargement, à l'utilisation, à la modification et/ou au # |
|
32
|
|
|
|
|
|
|
# développement et à la reproduction du logiciel par l'utilisateur étant # |
|
33
|
|
|
|
|
|
|
# donné sa spécificité de logiciel libre, qui peut le rendre complexe à # |
|
34
|
|
|
|
|
|
|
# manipuler et qui le réserve donc à des développeurs et des professionnels # |
|
35
|
|
|
|
|
|
|
# avertis possédant des connaissances informatiques approfondies. Les # |
|
36
|
|
|
|
|
|
|
# utilisateurs sont donc invités à charger et tester l'adéquation du # |
|
37
|
|
|
|
|
|
|
# logiciel à leurs besoins dans des conditions permettant d'assurer la # |
|
38
|
|
|
|
|
|
|
# sécurité de leurs systêmes et ou de leurs données et, plus généralement, # |
|
39
|
|
|
|
|
|
|
# à l'utiliser et l'exploiter dans les mêmes conditions de sécurité. # |
|
40
|
|
|
|
|
|
|
# # |
|
41
|
|
|
|
|
|
|
# Le fait que vous puissiez accéder à cet en-tête signifie que vous avez # |
|
42
|
|
|
|
|
|
|
# pris connaissance de la licence CeCILL, et que vous en avez accepté les # |
|
43
|
|
|
|
|
|
|
# termes. # |
|
44
|
|
|
|
|
|
|
# # |
|
45
|
|
|
|
|
|
|
# ------------------------------------------------------------------------- # |
|
46
|
|
|
|
|
|
|
# # |
|
47
|
|
|
|
|
|
|
# This file is part of the CracTools which provide several integrated # |
|
48
|
|
|
|
|
|
|
# pipeline to analyze biological events present in RNA-Seq data. CracTools # |
|
49
|
|
|
|
|
|
|
# work on a SAM file generated by CRAC and an annotation file in GFF3 format.# |
|
50
|
|
|
|
|
|
|
# # |
|
51
|
|
|
|
|
|
|
# This software is governed by the CeCILL license under French law and # |
|
52
|
|
|
|
|
|
|
# abiding by the rules of distribution of free software. You can use, # |
|
53
|
|
|
|
|
|
|
# modify and/ or redistribute the software under the terms of the CeCILL # |
|
54
|
|
|
|
|
|
|
# license as circulated by CEA, CNRS and INRIA at the following URL # |
|
55
|
|
|
|
|
|
|
# "http://www.cecill.info". # |
|
56
|
|
|
|
|
|
|
# # |
|
57
|
|
|
|
|
|
|
# As a counterpart to the access to the source code and rights to copy, # |
|
58
|
|
|
|
|
|
|
# modify and redistribute granted by the license, users are provided only # |
|
59
|
|
|
|
|
|
|
# with a limited warranty and the software's author, the holder of the # |
|
60
|
|
|
|
|
|
|
# economic rights, and the successive licensors have only limited # |
|
61
|
|
|
|
|
|
|
# liability. # |
|
62
|
|
|
|
|
|
|
# # |
|
63
|
|
|
|
|
|
|
# In this respect, the user's attention is drawn to the risks associated # |
|
64
|
|
|
|
|
|
|
# with loading, using, modifying and/or developing or reproducing the # |
|
65
|
|
|
|
|
|
|
# software by the user in light of its specific status of free software, # |
|
66
|
|
|
|
|
|
|
# that may mean that it is complicated to manipulate, and that also # |
|
67
|
|
|
|
|
|
|
# therefore means that it is reserved for developers and experienced # |
|
68
|
|
|
|
|
|
|
# professionals having in-depth computer knowledge. Users are therefore # |
|
69
|
|
|
|
|
|
|
# encouraged to load and test the software's suitability as regards their # |
|
70
|
|
|
|
|
|
|
# requirements in conditions enabling the security of their systems and/or # |
|
71
|
|
|
|
|
|
|
# data to be ensured and, more generally, to use and operate it in the same # |
|
72
|
|
|
|
|
|
|
# conditions as regards security. # |
|
73
|
|
|
|
|
|
|
# # |
|
74
|
|
|
|
|
|
|
# The fact that you are presently reading this means that you have had # |
|
75
|
|
|
|
|
|
|
# knowledge of the CeCILL license and that you accept its terms. # |
|
76
|
|
|
|
|
|
|
# # |
|
77
|
|
|
|
|
|
|
############################################################################### |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 NAME |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
CracTools::Annotator - Generic annotation base on CracTools::GFF::Query |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=cut |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
package CracTools::Annotator; |
|
86
|
|
|
|
|
|
|
|
|
87
|
1
|
|
|
1
|
|
65050
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
38
|
|
|
88
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
28
|
|
|
89
|
|
|
|
|
|
|
|
|
90
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
98
|
|
|
91
|
1
|
|
|
1
|
|
1136
|
use Data::Dumper; |
|
|
1
|
|
|
|
|
15331
|
|
|
|
1
|
|
|
|
|
89
|
|
|
92
|
1
|
|
|
1
|
|
742
|
use CracTools::GFF::Annotation; |
|
|
1
|
|
|
|
|
5
|
|
|
|
1
|
|
|
|
|
43
|
|
|
93
|
1
|
|
|
1
|
|
841
|
use CracTools::GFF::Query; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
41
|
|
|
94
|
1
|
|
|
1
|
|
715
|
use CracTools::Const; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
1173
|
|
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head1 METHODS |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head2 new |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
Arg [1] : String - $gff_file |
|
101
|
|
|
|
|
|
|
GFF file to perform annotation |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Example : my $annotation = CracTools::GFF::Annotation->new($gff_line); |
|
104
|
|
|
|
|
|
|
Description : Create a new CracTools::GFF::Annotation object |
|
105
|
|
|
|
|
|
|
If a gff line is passed in argument, the line will be parsed |
|
106
|
|
|
|
|
|
|
and loaded. |
|
107
|
|
|
|
|
|
|
ReturnType : CracTools::GFF::Query |
|
108
|
|
|
|
|
|
|
Exceptions : none |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=cut |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub new { |
|
113
|
1
|
|
|
1
|
1
|
2364
|
my $class = shift; |
|
114
|
1
|
|
|
|
|
3
|
my $gff_file = shift; |
|
115
|
|
|
|
|
|
|
|
|
116
|
1
|
50
|
|
|
|
8
|
if(!defined $gff_file) { |
|
117
|
0
|
|
|
|
|
0
|
croak "Missing GFF file argument in CracTools::Annotator constructor"; |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
1
|
|
|
|
|
5
|
my $self = bless { |
|
121
|
|
|
|
|
|
|
gff_file => $gff_file, |
|
122
|
|
|
|
|
|
|
}, $class; |
|
123
|
|
|
|
|
|
|
|
|
124
|
1
|
|
|
|
|
6
|
$self->_init(); |
|
125
|
|
|
|
|
|
|
|
|
126
|
1
|
|
|
|
|
3
|
return $self; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 foundGene |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Arg [1] : String - chr |
|
132
|
|
|
|
|
|
|
Arg [2] : String - pos_start |
|
133
|
|
|
|
|
|
|
Arg [3] : String - pos_end |
|
134
|
|
|
|
|
|
|
Arg [4] : String - strand |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Description : Return true if there is an exon of a gene is this interval |
|
137
|
|
|
|
|
|
|
ReturnType : Boolean |
|
138
|
|
|
|
|
|
|
Exceptions : none |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=cut |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub foundGene { |
|
143
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
144
|
0
|
|
|
|
|
0
|
my ($chr,$pos_start,$pos_end,$strand) = @_; |
|
145
|
0
|
|
|
|
|
0
|
my @candidates = $self->getAnnotationCandidates($chr,$pos_start,$pos_end,$strand); |
|
146
|
0
|
|
|
|
|
0
|
return @candidates > 0; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head2 foundSameGene |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Arg [1] : String - chr |
|
152
|
|
|
|
|
|
|
Arg [2] : String - pos_start1 |
|
153
|
|
|
|
|
|
|
Arg [3] : String - pos_end1 |
|
154
|
|
|
|
|
|
|
Arg [4] : String - pos_start2 |
|
155
|
|
|
|
|
|
|
Arg [5] : String - pos_end1 |
|
156
|
|
|
|
|
|
|
Arg [6] : String - strand |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Description : Return true if a gene is the same gene is found is the two intervals. |
|
159
|
|
|
|
|
|
|
ReturnType : Boolean |
|
160
|
|
|
|
|
|
|
Exceptions : none |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=cut |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub foundSameGene { |
|
165
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
166
|
1
|
|
|
|
|
3
|
my ($chr,$pos_start1,$pos_end1,$pos_start2,$pos_end2,$strand) = @_; |
|
167
|
1
|
|
|
|
|
5
|
my @candidates1 = $self->getAnnotationCandidates($chr,$pos_start1,$pos_end1,$strand); |
|
168
|
1
|
|
|
|
|
5
|
my @candidates2 = $self->getAnnotationCandidates($chr,$pos_start2,$pos_end2,$strand); |
|
169
|
1
|
|
|
|
|
3
|
my $found_same_gene = 0; |
|
170
|
1
|
|
|
|
|
2
|
my @genes1; |
|
171
|
|
|
|
|
|
|
my @genes2; |
|
172
|
1
|
|
|
|
|
3
|
foreach my $candi1 (@candidates1) { |
|
173
|
2
|
50
|
|
|
|
8
|
if(defined $candi1->{gene}) { |
|
174
|
2
|
|
|
|
|
15
|
push @genes1,$candi1->{gene}->attribute('ID'); |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
} |
|
177
|
1
|
|
|
|
|
3
|
foreach my $candi2 (@candidates2) { |
|
178
|
2
|
50
|
|
|
|
7
|
if(defined $candi2->{gene}) { |
|
179
|
2
|
|
|
|
|
7
|
push @genes2,$candi2->{gene}->attribute('ID'); |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
} |
|
182
|
1
|
|
|
|
|
3
|
foreach my $gene_id (@genes1) { |
|
183
|
1
|
|
|
|
|
2
|
foreach (@genes2) { |
|
184
|
1
|
50
|
|
|
|
4
|
if($gene_id eq $_) { |
|
185
|
1
|
|
|
|
|
2
|
$found_same_gene = 1; |
|
186
|
1
|
|
|
|
|
3
|
last; |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
} |
|
189
|
1
|
50
|
|
|
|
4
|
last if $found_same_gene == 1; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
1
|
|
|
|
|
28
|
return $found_same_gene; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head2 getBestAnnotationCandidate |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Arg [1] : String - chr |
|
197
|
|
|
|
|
|
|
Arg [2] : String - pos_start |
|
198
|
|
|
|
|
|
|
Arg [3] : String - pos_end |
|
199
|
|
|
|
|
|
|
Arg [4] : String - strand |
|
200
|
|
|
|
|
|
|
Arg [5] : (Optional) Subroutine - see C for more details |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Description : Return best annotation candidate according to the priorities given |
|
203
|
|
|
|
|
|
|
by the subroutine in argument. |
|
204
|
|
|
|
|
|
|
ReturnType : Hash( feature_name => CracTools::GFF::Annotation, ...), Int(priority), String(type) |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=cut |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub getBestAnnotationCandidate { |
|
209
|
1
|
|
|
1
|
1
|
7
|
my $self = shift; |
|
210
|
1
|
|
|
|
|
3
|
my ($chr,$pos_start,$pos_end,$strand,$prioritySub) = @_; |
|
211
|
|
|
|
|
|
|
|
|
212
|
1
|
50
|
|
|
|
5
|
$prioritySub = \&getCandidatePriorityDefault unless defined $prioritySub; |
|
213
|
|
|
|
|
|
|
|
|
214
|
1
|
|
|
|
|
5
|
my @candidates = $self->getAnnotationCandidates($chr,$pos_start,$pos_end,$strand); |
|
215
|
1
|
|
|
|
|
3
|
my ($best_priority,$best_candidate,$best_type); |
|
216
|
1
|
|
|
|
|
4
|
foreach my $candi (@candidates) { |
|
217
|
2
|
|
|
|
|
7
|
my ($priority,$type) = $prioritySub->($pos_start,$pos_end,$candi); |
|
218
|
2
|
100
|
|
|
|
9
|
if($priority != -1) { |
|
219
|
1
|
50
|
33
|
|
|
22
|
if(!defined $best_priority || $priority < $best_priority) { |
|
220
|
1
|
|
|
|
|
3
|
$best_priority = $priority; |
|
221
|
1
|
|
|
|
|
2
|
$best_candidate = $candi; |
|
222
|
1
|
|
|
|
|
5
|
$best_type = $type; |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
} |
|
226
|
1
|
|
|
|
|
20
|
return $best_candidate,$best_priority,$best_type; |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=head2 getAnnotationCandidates |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
Arg [1] : String - chr |
|
232
|
|
|
|
|
|
|
Arg [2] : String - pos_start |
|
233
|
|
|
|
|
|
|
Arg [3] : String - pos_end |
|
234
|
|
|
|
|
|
|
Arg [4] : String - strand |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
Description : Return an array with all annotation candidates overlapping the |
|
237
|
|
|
|
|
|
|
chromosomic region. |
|
238
|
|
|
|
|
|
|
ReturnType : Array of Hash( feature_name => CracTools::GFF::Annotation, ...) |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=cut |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub getAnnotationCandidates { |
|
243
|
4
|
|
|
4
|
1
|
13
|
my $self = shift; |
|
244
|
4
|
|
|
|
|
7
|
my ($chr,$pos_start,$pos_end,$strand) = @_; |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# get GFF annotations that overlap the region to annotate |
|
247
|
4
|
|
|
|
|
24
|
my $annotations = $self->{gff_query}->fetchByRegion($chr,$pos_start,$pos_end,$strand); |
|
248
|
|
|
|
|
|
|
|
|
249
|
4
|
|
|
|
|
10
|
my %annot_hash = (); |
|
250
|
4
|
|
|
|
|
10
|
my @candidates = (); |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# Construct annotation hash with annot ID as key |
|
253
|
4
|
|
|
|
|
6
|
foreach my $annot_line (@{$annotations}) { |
|
|
4
|
|
|
|
|
11
|
|
|
254
|
20
|
|
|
|
|
190
|
my $annot = CracTools::GFF::Annotation->new($annot_line,'gff3'); |
|
255
|
20
|
|
|
|
|
68
|
$annot_hash{$annot->attribute('ID')} = $annot; |
|
256
|
|
|
|
|
|
|
} |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# Find root in annotation tree |
|
259
|
4
|
|
|
|
|
17
|
foreach my $annot_id (keys %annot_hash) { |
|
260
|
20
|
|
|
|
|
157
|
my @parents = $annot_hash{$annot_id}->parents; |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# we have foud a root, lets constructs candidates |
|
263
|
20
|
100
|
|
|
|
57
|
if(scalar @parents == 0) { |
|
264
|
8
|
|
|
|
|
21
|
push @candidates, _constructCandidate($annot_id,my $new_candidate,\%annot_hash); |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
|
|
268
|
4
|
|
|
|
|
164
|
return @candidates; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head2 getCandidatePriorityDefault |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
Arg [1] : String - pos_start |
|
274
|
|
|
|
|
|
|
Arg [2] : String - pos_end |
|
275
|
|
|
|
|
|
|
Arg [3] : hash - candidate |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
Description : Default method used to give a priority to a candidate. |
|
278
|
|
|
|
|
|
|
You can create your own priority method to fit your specific need |
|
279
|
|
|
|
|
|
|
for selecting the best annotation. |
|
280
|
|
|
|
|
|
|
The best priority is 0. A priority of -1 means that this candidate |
|
281
|
|
|
|
|
|
|
should be avoided. |
|
282
|
|
|
|
|
|
|
ReturnType : Array ($priority,$type) where $priority is an integer and $type a string |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=cut |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub getCandidatePriorityDefault { |
|
287
|
2
|
|
|
2
|
1
|
4
|
my ($pos_start,$pos_end,$candidate) = @_; |
|
288
|
2
|
|
|
|
|
5
|
my ($priority,$type) = (-1,''); |
|
289
|
2
|
|
|
|
|
7
|
my ($mRNA,$exon) = ($candidate->{mRNA},$candidate->{exon}); |
|
290
|
2
|
100
|
|
|
|
8
|
if(defined $mRNA) { |
|
291
|
1
|
50
|
|
|
|
5
|
if($mRNA->attribute('type') =~ /protein_coding/i) { |
|
292
|
1
|
50
|
|
|
|
6
|
if(defined $exon) { |
|
293
|
1
|
50
|
33
|
|
|
6
|
if($exon->start > $pos_start || $exon->end < $pos_end) { |
|
294
|
1
|
|
|
|
|
2
|
$priority = 1; |
|
295
|
1
|
50
|
|
|
|
22
|
if(defined $candidate->{three}) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
296
|
0
|
|
|
|
|
0
|
$type = '3PRIM_UTR'; |
|
297
|
|
|
|
|
|
|
} elsif(defined $candidate->{five}) { |
|
298
|
0
|
|
|
|
|
0
|
$type = '5PRIM_UTR'; |
|
299
|
|
|
|
|
|
|
} elsif(defined $candidate->{cds}) { |
|
300
|
1
|
|
|
|
|
3
|
$type = 'CDS'; |
|
301
|
|
|
|
|
|
|
} else { |
|
302
|
0
|
|
|
|
|
0
|
$type = 'EXON'; |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
} else { |
|
305
|
0
|
|
|
|
|
0
|
$priority = 2; |
|
306
|
0
|
|
|
|
|
0
|
$type = 'INXON'; |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
} |
|
309
|
|
|
|
|
|
|
} else { |
|
310
|
0
|
0
|
|
|
|
0
|
if(defined $exon) { |
|
311
|
0
|
0
|
0
|
|
|
0
|
if($exon->start > $pos_start || $exon->end < $pos_end) { |
|
312
|
0
|
|
|
|
|
0
|
$priority = 3; |
|
313
|
0
|
|
|
|
|
0
|
$type = 'NON_CODING'; |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
} |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
} |
|
318
|
2
|
|
|
|
|
7
|
return ($priority,$type); |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=head1 PRIVATE METHODS |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=head2 _init |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
Description : init method, load GFF annotation into a |
|
326
|
|
|
|
|
|
|
CracTools::GFF::Query object. |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=cut |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
sub _init { |
|
331
|
1
|
|
|
1
|
|
3
|
my $self = shift; |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# Create a GFF file to query exons |
|
334
|
1
|
|
|
|
|
17
|
my $gff_query = CracTools::GFF::Query->new($self->{gff_file}); |
|
335
|
1
|
|
|
|
|
4
|
$self->{gff_query} = $gff_query; |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
} |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=head2 _constructCandidate |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Arg [1] : String - annot_id |
|
342
|
|
|
|
|
|
|
Arg [2] : Hash ref - candidate |
|
343
|
|
|
|
|
|
|
Since this method is recursive, this is the object that |
|
344
|
|
|
|
|
|
|
we are constructing |
|
345
|
|
|
|
|
|
|
Arg [3] : Hash ref - annot_hash |
|
346
|
|
|
|
|
|
|
annot_hash is a hash reference where keys are annotion IDs |
|
347
|
|
|
|
|
|
|
and values are CracTools::GFF::Annotation objects. |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
Description : _constructCandidate is a recursive method that build a |
|
350
|
|
|
|
|
|
|
candidate hash. |
|
351
|
|
|
|
|
|
|
ReturnType : Candidate Hash ref where keys are GFF features and |
|
352
|
|
|
|
|
|
|
values are CracTools::GFF::Annotation objects : |
|
353
|
|
|
|
|
|
|
{ feature => CracTools::GFF::Annotation, ...} |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=cut |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub _constructCandidate { |
|
358
|
20
|
|
|
20
|
|
35
|
my ($annot_id,$candidate,$annot_hash) = @_; |
|
359
|
20
|
|
|
|
|
72
|
$candidate->{$annot_hash->{$annot_id}->feature} = $annot_hash->{$annot_id}; |
|
360
|
20
|
|
|
|
|
31
|
foreach my $annot (values %{$annot_hash}) { |
|
|
20
|
|
|
|
|
50
|
|
|
361
|
100
|
|
|
|
|
386
|
my @parents = $annot->parents; |
|
362
|
100
|
|
|
|
|
210
|
foreach my $parent (@parents) { |
|
363
|
60
|
100
|
|
|
|
225
|
if($parent eq $annot_id) { |
|
364
|
12
|
|
|
|
|
30
|
_constructCandidate($annot->attribute('ID'),$candidate,$annot_hash); |
|
365
|
|
|
|
|
|
|
} |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
} |
|
368
|
20
|
|
|
|
|
1447
|
return $candidate; |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
1; |