line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Gonzales::Feat; |
2
|
4
|
|
|
4
|
|
119201
|
use strict; |
|
4
|
|
|
|
|
18
|
|
|
4
|
|
|
|
|
124
|
|
3
|
4
|
|
|
4
|
|
20
|
use warnings; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
101
|
|
4
|
4
|
|
|
4
|
|
22
|
use Carp; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
235
|
|
5
|
|
|
|
|
|
|
|
6
|
4
|
|
|
4
|
|
512
|
use Mouse; |
|
4
|
|
|
|
|
29656
|
|
|
4
|
|
|
|
|
23
|
|
7
|
4
|
|
|
4
|
|
2350
|
use List::MoreUtils qw/zip/; |
|
4
|
|
|
|
|
14662
|
|
|
4
|
|
|
|
|
32
|
|
8
|
4
|
|
|
4
|
|
3051
|
use Data::Dumper; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
210
|
|
9
|
4
|
|
|
4
|
|
2261
|
use Storable qw(dclone); |
|
4
|
|
|
|
|
10026
|
|
|
4
|
|
|
|
|
266
|
|
10
|
4
|
|
|
4
|
|
30
|
use Scalar::Util qw/refaddr/; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
229
|
|
11
|
4
|
|
|
4
|
|
2317
|
use Bio::Gonzales::Seq::Util qw/strand_convert/; |
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
4040
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $QUIET_MODE; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our $ATTR_ESCAPE_RE = qr/([\x00-\x1F\x7F%&\=;,])/; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our %FIXED_ATTRIBUTE_NAMES = ( |
18
|
|
|
|
|
|
|
ID => 1, |
19
|
|
|
|
|
|
|
Parent => 2, |
20
|
|
|
|
|
|
|
Target => 3, |
21
|
|
|
|
|
|
|
Name => 4, |
22
|
|
|
|
|
|
|
Alias => 5, |
23
|
|
|
|
|
|
|
Gap => 6, |
24
|
|
|
|
|
|
|
Derives_from => 7, |
25
|
|
|
|
|
|
|
Dbxref => 9, |
26
|
|
|
|
|
|
|
Ontology_term => 10, |
27
|
|
|
|
|
|
|
Is_circular => 11, |
28
|
|
|
|
|
|
|
Note => 99, |
29
|
|
|
|
|
|
|
); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
our $VERSION = '0.083'; # VERSION |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
extends 'Bio::Gonzales::MiniFeat'; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
has [qw/seq_id start end strand/] => ( is => 'rw', required => 1 ); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
has [qw/phase score /] => ( is => 'rw' ); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub as_hash { |
40
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
41
|
|
|
|
|
|
|
return { |
42
|
0
|
|
|
|
|
0
|
seq_id => $self->seq_id, |
43
|
|
|
|
|
|
|
source => $self->source, |
44
|
|
|
|
|
|
|
type => $self->type, |
45
|
|
|
|
|
|
|
attributes => $self->attributes, |
46
|
|
|
|
|
|
|
start => $self->start, |
47
|
|
|
|
|
|
|
end => $self->end, |
48
|
|
|
|
|
|
|
strand => $self->strand, |
49
|
|
|
|
|
|
|
phase => $self->phase, |
50
|
|
|
|
|
|
|
score => $self->score, |
51
|
|
|
|
|
|
|
}; |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
26
|
|
|
26
|
1
|
125
|
sub scf_id { return shift->seq_id(@_); } |
55
|
|
|
|
|
|
|
|
56
|
0
|
|
|
0
|
1
|
0
|
sub length { return $_[0]->end - $_[0]->start + 1 } |
57
|
|
|
|
|
|
|
|
58
|
0
|
|
|
0
|
1
|
0
|
sub begin { return shift->start(@_) } |
59
|
|
|
|
|
|
|
|
60
|
0
|
|
|
0
|
1
|
0
|
sub Convert_strand { strand_convert(@_) } |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub switch_coords { |
63
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
64
|
|
|
|
|
|
|
|
65
|
0
|
|
|
|
|
0
|
my ( $b, $e ) = ( $self->start, $self->end ); |
66
|
0
|
|
|
|
|
0
|
$self->start($e); |
67
|
0
|
|
|
|
|
0
|
$self->end($b); |
68
|
|
|
|
|
|
|
|
69
|
0
|
|
|
|
|
0
|
return $self; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub sort_subfeats { |
73
|
57
|
|
|
57
|
0
|
90
|
my ($self) = @_; |
74
|
|
|
|
|
|
|
|
75
|
57
|
50
|
|
|
|
86
|
my @sf = sort { ( $a->start <=> $b->start ) || ( $b->end <=> $a->end ) } @{ $self->subfeats }; |
|
45
|
|
|
|
|
151
|
|
|
57
|
|
|
|
|
125
|
|
76
|
57
|
|
|
|
|
164
|
$self->subfeats( \@sf ); |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub to_gff3 { |
80
|
142
|
|
|
142
|
0
|
238
|
my ( $self, $escape_whitespace_everywhere ) = @_; |
81
|
|
|
|
|
|
|
|
82
|
142
|
|
|
|
|
353
|
my $strand = strand_convert( $self->strand ); |
83
|
|
|
|
|
|
|
|
84
|
142
|
|
|
|
|
314
|
my $attributes = $self->attributes; |
85
|
|
|
|
|
|
|
#sort the attributes |
86
|
142
|
|
100
|
|
|
485
|
my @attr_names = sort { ( $FIXED_ATTRIBUTE_NAMES{$a} || 98 ) <=> ( $FIXED_ATTRIBUTE_NAMES{$b} || 98 ) } |
|
392
|
|
100
|
|
|
1428
|
|
87
|
|
|
|
|
|
|
keys %$attributes; |
88
|
|
|
|
|
|
|
|
89
|
142
|
|
|
|
|
271
|
my @groups; |
90
|
142
|
|
|
|
|
218
|
for my $a (@attr_names) { |
91
|
424
|
|
|
|
|
565
|
my @escaped_v; |
92
|
424
|
|
|
|
|
496
|
for my $v ( @{ $attributes->{$a} } ) { |
|
424
|
|
|
|
|
709
|
|
93
|
426
|
50
|
|
|
|
718
|
unless ( defined($v) ) { |
94
|
0
|
|
|
|
|
0
|
carp "The attribute " . $a . " of feature " . $self->id . " has uninitialized values"; |
95
|
0
|
|
|
|
|
0
|
$v = ''; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
426
|
|
|
|
|
1306
|
$v =~ s/$ATTR_ESCAPE_RE/sprintf("%%%02X",ord($1))/ge; |
|
0
|
|
|
|
|
0
|
|
99
|
426
|
50
|
33
|
|
|
808
|
$v =~ s/ /%20/g if ( $escape_whitespace_everywhere && $a ne 'Target' ); |
100
|
426
|
|
|
|
|
834
|
push @escaped_v, $v; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
424
|
50
|
|
|
|
778
|
if ( $a eq 'Target' ) { |
104
|
0
|
|
|
|
|
0
|
for my $v (@escaped_v) { |
105
|
0
|
0
|
|
|
|
0
|
if ( $v =~ /^"?(.*?)\s+(\d+\s+\d+(?:\s+[-.+])?)\s*"?$/ ) { |
106
|
0
|
|
|
|
|
0
|
my ( $tid, $rest ) = ( $1, $2 ); |
107
|
0
|
|
|
|
|
0
|
$tid =~ s/ /%20/g; |
108
|
0
|
|
|
|
|
0
|
$v = join " ", $tid, $rest; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
424
|
|
|
|
|
909
|
$a =~ s/$ATTR_ESCAPE_RE/sprintf("%%%02X",ord($1))/ge; |
|
0
|
|
|
|
|
0
|
|
114
|
|
|
|
|
|
|
|
115
|
424
|
|
|
|
|
1265
|
push @groups, $a . '=' . join( ',', @escaped_v ); |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
142
|
|
100
|
|
|
1528
|
return join( "\t", |
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
119
|
|
|
|
|
|
|
$self->seq_id, $self->source // '.', $self->type, |
120
|
|
|
|
|
|
|
$self->start, $self->end, $self->score // '.', |
121
|
|
|
|
|
|
|
$strand, $self->phase // '.', join( ';', @groups ) ) |
122
|
|
|
|
|
|
|
. "\n"; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
1; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
__END__ |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 NAME |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Bio::Gonzales::Feat - a sequence feature |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head1 SYNOPSIS |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Bio::Gonzales::Feat->new( |
137
|
|
|
|
|
|
|
seq_id => 'chr01', |
138
|
|
|
|
|
|
|
source => 'glimmerhmm', |
139
|
|
|
|
|
|
|
type => 'exon', |
140
|
|
|
|
|
|
|
start => 324, |
141
|
|
|
|
|
|
|
end => 6342, |
142
|
|
|
|
|
|
|
strand => -1, |
143
|
|
|
|
|
|
|
attributes => { ID => [ 'exon01' ], Parent => [ 'gene01', 'gene02' ] }, |
144
|
|
|
|
|
|
|
); |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head1 DESCRIPTION |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Represents a sequence feature. The field C<attributes> is not required to |
149
|
|
|
|
|
|
|
create an object of class Bio::Gonzales::Feat. This class is based on the |
150
|
|
|
|
|
|
|
L<Sequence Ontology GFF3 specification|http://www.sequenceontology.org/gff3.shtml> |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 METHODS |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=over 4 |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=item B<< \%attributes = $f->attr >> |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=item B<< \%attributes = $f->attributes >> |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=item B<< $sequence_id = $f->seq_id >> |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=item B<< $souce = $f->source >> |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=item B<< $f->source($new_source) >> |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Gets and sets the source attribute of the feature. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=item B<< $type = $f->type >> |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=item B<< $f->type($new_type) >> |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Gets and sets the type attribute of the feature. |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=item B<< $start_coord = $f->start >> |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=item B<< $start_coord = $f->begin >> |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=item B<< $f->start($start_1_based_coord) >> |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=item B<< $f->begin($start_1_baed_coord) >> |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Get or set the start coord of the feature. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=item B<< $f->end >> |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
The same syntax as C<$f->start>, only for the end coordianate. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=item B<< $f->strand($strand) >> |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=item B<< $strand = $f->strand >> |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Set or get the strand. The strand can be -1 (minus strand), 0 (strand unknown) or 1 (plus strand). |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=item B<< $phase = $f->phase >> |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=item B<< $f->phase($phase) >> |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Gets or sets the phase. |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=item B<< $score = $f->score >> |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=item B<< $f->score($score) >> |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Gets or sets the score. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=item B<< $f->attributes >> |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=item B<< $f->attr >> |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Get or set the attributes of the feature. Structure: |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
{ |
213
|
|
|
|
|
|
|
ID => [ 'id01' ], |
214
|
|
|
|
|
|
|
Parent => [ 'parent1', 'parent2', ... ] |
215
|
|
|
|
|
|
|
... |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=item B<< $f->subfeats >> |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Gives access to a general container for subfeature objects. Makes grouping |
221
|
|
|
|
|
|
|
easier, e.g. for BED output format. An example would be an 'mRNA'-object that |
222
|
|
|
|
|
|
|
has several exons as subfeatures. |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item B<< $f->parentfeats >> |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
The same as C<$f->subfeats>, only with parent relation. This function is |
227
|
|
|
|
|
|
|
completely unrelated to the C<$f->parent_id> function. C<$f->parent_id> only |
228
|
|
|
|
|
|
|
accesses the attributes, not the parentfeature container. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=item B<< $f->scf_id >> |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
This is a synonym for C<$f->seq_id>. |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=item B<< $first_value = $f->attr_first($attribute_key) >> |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=item B<< $first_value = $f->first_attr($attribute_key) >> |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
The functions C<attr_first> and C<first_attr> retrieve the value of the first |
239
|
|
|
|
|
|
|
element of the given attribute. An example would be |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
my $id = $f->attr_first("ID"); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# in case of multiple parents only the first entry/parent will be returned. |
244
|
|
|
|
|
|
|
my $parent = $f->attr_first("Parent"); |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=item B<< $id = $f->id >> |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Retrieve the value of the "ID" attribute. If a feature has multiple ids, a |
250
|
|
|
|
|
|
|
warning will be printed. Effectively a shortcut for C<$f->attributes->{ID}[0]>. |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=item B<< @ids = $f->ids >> |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=item B<< \@ids = $f->ids >> |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
A shortcut for C<$f->attributes->{ID}>. Returns a list of IDs in list context, |
257
|
|
|
|
|
|
|
a reference to the ID list in scalar context. |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=item B<< @parent_ids = $f->parent_ids >> |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=item B<< \@parent_ids = $f->parent_ids >> |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
A shortcut for C<$f->attributes->{Parent}>. Returns a list of parent IDs in list context, |
264
|
|
|
|
|
|
|
a reference to the parent ID list in scalar context. |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=item B<< $parent_id = $f->parent_id >> |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
A shortcut for C<$f->attributes->{Parent}[0]>. Gives a warning if multiple |
269
|
|
|
|
|
|
|
parent ids are present. |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=item B<< $f->add_attr(%attributes) >> |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
To add an attribute, call C<add_attr> with either a hash of the form |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
%attributes = ( |
276
|
|
|
|
|
|
|
ID => "mrna_01", |
277
|
|
|
|
|
|
|
Parent => "gene_01" |
278
|
|
|
|
|
|
|
); |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
or |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
%attributes = ( |
283
|
|
|
|
|
|
|
ID => "exon_01", |
284
|
|
|
|
|
|
|
Parent => [ "gene_01", "gene_02" ], |
285
|
|
|
|
|
|
|
); |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=item B<< \@deleted_attributes = $f->del_attr(@attribute_names) >> |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=item B<< $deleted_attribute = $f->del_attr($attribute_name) >> |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
Deletes all attributes in C<@attribute_names>. |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=item B<< Bio::Gonzales::Feat->Convert_strand($strand) >> |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
Convert between numeric and character strand respresentation. |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=item B<< $cloned_f = $f->clone >> |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
Clone the feature, deeply (incl. subfeatures and parentfeatures). |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=item B<< $length = $f->length >> |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
The length (end -start +1) |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=back |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=head1 SEE ALSO |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head1 AUTHOR |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
jw bargsten, C<< <joachim.bargsten at wur.nl> >> |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=cut |