line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*-CPerl-*- |
2
|
|
|
|
|
|
|
# Last changed Time-stamp: <2014-10-24 10:31:25 mtw> |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Bio::ViennaNGS::AnnoC; |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
15409
|
use 5.12.0; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
32
|
|
7
|
1
|
|
|
1
|
|
408
|
use version; our $VERSION = qv('0.08'); |
|
1
|
|
|
|
|
1342
|
|
|
1
|
|
|
|
|
5
|
|
8
|
1
|
|
|
1
|
|
253
|
use Bio::ViennaNGS qw(sortbed); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Bio::Tools::GFF; |
10
|
|
|
|
|
|
|
use Path::Class; |
11
|
|
|
|
|
|
|
use Carp; |
12
|
|
|
|
|
|
|
use Moose; |
13
|
|
|
|
|
|
|
use namespace::autoclean; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
has 'accession' => ( |
16
|
|
|
|
|
|
|
is => 'rw', |
17
|
|
|
|
|
|
|
isa => 'Str', |
18
|
|
|
|
|
|
|
predicate => 'has_accession', |
19
|
|
|
|
|
|
|
); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has 'features' => ( |
22
|
|
|
|
|
|
|
is => 'ro', |
23
|
|
|
|
|
|
|
isa => 'HashRef', |
24
|
|
|
|
|
|
|
predicate => 'has_features', |
25
|
|
|
|
|
|
|
default => sub { {} }, |
26
|
|
|
|
|
|
|
); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
has 'nr_features' => ( |
29
|
|
|
|
|
|
|
is => 'ro', |
30
|
|
|
|
|
|
|
isa => 'Int', |
31
|
|
|
|
|
|
|
builder => '_get_nr_of_features', |
32
|
|
|
|
|
|
|
lazy => 1, |
33
|
|
|
|
|
|
|
); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
has 'featstat' => ( |
36
|
|
|
|
|
|
|
is => 'ro', |
37
|
|
|
|
|
|
|
isa => 'HashRef', |
38
|
|
|
|
|
|
|
builder => '_set_featstat', |
39
|
|
|
|
|
|
|
predicate => 'has_featstat', |
40
|
|
|
|
|
|
|
lazy => 1, |
41
|
|
|
|
|
|
|
); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
before 'featstat' => sub { |
44
|
|
|
|
|
|
|
my $self = shift; |
45
|
|
|
|
|
|
|
$self->_get_nr_of_features(); |
46
|
|
|
|
|
|
|
}; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub _set_featstat { |
49
|
|
|
|
|
|
|
my $self = shift; |
50
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
51
|
|
|
|
|
|
|
my %fs = (); |
52
|
|
|
|
|
|
|
confess "ERROR [$this_function] \$self->features not available" |
53
|
|
|
|
|
|
|
unless ($self->has_features); |
54
|
|
|
|
|
|
|
$fs{total} = 0; |
55
|
|
|
|
|
|
|
$fs{origin} = "$this_function ".$VERSION; |
56
|
|
|
|
|
|
|
$fs{count} = $self->nr_features; |
57
|
|
|
|
|
|
|
foreach my $uid ( keys %{$self->features} ){ |
58
|
|
|
|
|
|
|
my $gbkey = ${$self->features}{$uid}->{gbkey}; |
59
|
|
|
|
|
|
|
$fs{total} += 1; |
60
|
|
|
|
|
|
|
unless (exists $fs{$gbkey}){ |
61
|
|
|
|
|
|
|
$fs{$gbkey} = 0; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
$fs{$gbkey} += 1; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
return \%fs; |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub _get_nr_of_features { |
69
|
|
|
|
|
|
|
my $self = shift; |
70
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
71
|
|
|
|
|
|
|
confess "ERROR [$this_function] \$self->features not available" |
72
|
|
|
|
|
|
|
unless ($self->has_features); |
73
|
|
|
|
|
|
|
return (keys %{$self->features}); |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub parse_gff { |
77
|
|
|
|
|
|
|
my ($self,$in_file) = @_; |
78
|
|
|
|
|
|
|
my ($i,$gffio,$header,$f,$gbkey); |
79
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
$gffio = Bio::Tools::GFF->new(-file => $in_file, |
82
|
|
|
|
|
|
|
-gff_version => 3, |
83
|
|
|
|
|
|
|
); |
84
|
|
|
|
|
|
|
$gffio->ignore_sequence(1); |
85
|
|
|
|
|
|
|
if ($header = $gffio->next_segment() ){ |
86
|
|
|
|
|
|
|
$self->accession( $header->display_id() ); |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
else{ carp "ERROR [$this_function] Cannot parse GFF header\n" } |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
while($f = $gffio->next_feature()) { |
91
|
|
|
|
|
|
|
my ($uid,$feat_name); |
92
|
|
|
|
|
|
|
my @name = my @id = my @gbkeys = (); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
next if ($f->primary_tag() eq "exon"); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# 1) determine gbkey of the current feature |
97
|
|
|
|
|
|
|
@gbkeys = $f->get_tag_values("gbkey"); |
98
|
|
|
|
|
|
|
$gbkey = $gbkeys[0]; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# 2) get a unique ID for each feature |
101
|
|
|
|
|
|
|
if ($f->has_tag('ID')){ |
102
|
|
|
|
|
|
|
@id = $f->get_tag_values('ID'); |
103
|
|
|
|
|
|
|
$uid = $id[0]; # ID=id101 |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
else { |
106
|
|
|
|
|
|
|
croak "ERROR [$this_function] Feature '$gbkey' at pos.\ |
107
|
|
|
|
|
|
|
$f->start does not have \'ID\' attribute\n"; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# 3) assign parent's unique ID in case a parent record exists |
111
|
|
|
|
|
|
|
if ($f->has_tag('Parent')){ |
112
|
|
|
|
|
|
|
@id = $f->get_tag_values('Parent'); |
113
|
|
|
|
|
|
|
$uid = $id[0]; # ID=id101 |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# 4) find a name for the current feature, use 'Name' or 'ID' attribute |
117
|
|
|
|
|
|
|
if ($f->has_tag('Name')){ |
118
|
|
|
|
|
|
|
@name = $f->get_tag_values('Name'); |
119
|
|
|
|
|
|
|
$feat_name = $name[0]; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
elsif ($f->has_tag('ID')){ |
122
|
|
|
|
|
|
|
@id = $f->get_tag_values('ID'); |
123
|
|
|
|
|
|
|
$feat_name = $id[0]; # ID=id101, use ID as feature name |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
else { |
126
|
|
|
|
|
|
|
croak "ERROR [$this_function] Cannot set name for feature \ |
127
|
|
|
|
|
|
|
$f->gbkey at pos. $f->start\n"; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
unless (exists ${$self->features}{$uid}) { # gene / ribosome_entry_site / etc. |
131
|
|
|
|
|
|
|
${$self->features}{$uid}->{start} = $f->start; |
132
|
|
|
|
|
|
|
${$self->features}{$uid}->{end} = $f->end; |
133
|
|
|
|
|
|
|
${$self->features}{$uid}->{strand} = $f->strand; |
134
|
|
|
|
|
|
|
${$self->features}{$uid}->{length} = $f->length; |
135
|
|
|
|
|
|
|
${$self->features}{$uid}->{seqid} = $f->seq_id; |
136
|
|
|
|
|
|
|
${$self->features}{$uid}->{score} = $f->score || 0; |
137
|
|
|
|
|
|
|
${$self->features}{$uid}->{gbkey} = $gbkey; |
138
|
|
|
|
|
|
|
${$self->features}{$uid}->{name} = $feat_name; |
139
|
|
|
|
|
|
|
${$self->features}{$uid}->{uid} = $uid; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
else { # CDS / tRNA / rRNA / etc. |
142
|
|
|
|
|
|
|
${$self->features}{$uid}->{gbkey} = $gbkey; # gbkey for tRNA/ rRNA/ CDS etc |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
$gffio->close(); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub features2bed { |
149
|
|
|
|
|
|
|
my ($self,$gbkey,$dest,$bn,$log) = @_; |
150
|
|
|
|
|
|
|
my ($chrom,$chrom_start,$chrom_end,$name,$score,$strand,$thick_start); |
151
|
|
|
|
|
|
|
my ($thick_end,$reserved,$block_count,$block_sizes,$block_starts); |
152
|
|
|
|
|
|
|
my @ft = (); |
153
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
croak "ERROR [$this_function] $self->features not available" |
156
|
|
|
|
|
|
|
unless ($self->has_features); |
157
|
|
|
|
|
|
|
croak "ERROT [$this_function] $self->featstat not available" |
158
|
|
|
|
|
|
|
unless ($self->has_featstat); |
159
|
|
|
|
|
|
|
croak "ERROR [$this_function] $dest does not exist" |
160
|
|
|
|
|
|
|
unless (-d $dest); |
161
|
|
|
|
|
|
|
if (defined $log){open(LOG, ">>", $log) or croak $!;} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
if (defined $gbkey){ # dump features of just one genbank key |
164
|
|
|
|
|
|
|
confess "ERROR [$this_function] genbank key \'$gbkey\' N/A in hash " |
165
|
|
|
|
|
|
|
unless (exists ${$self->featstat}{$gbkey}); |
166
|
|
|
|
|
|
|
$ft[0] = $gbkey; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
else{ # dump features for all genbank keys |
169
|
|
|
|
|
|
|
foreach my $gbk (keys %{$self->featstat}) { |
170
|
|
|
|
|
|
|
next if ($gbk eq 'total' || $gbk eq 'Src' || $gbk eq 'accession' || |
171
|
|
|
|
|
|
|
$gbk eq 'origin' || $gbk eq 'count'); |
172
|
|
|
|
|
|
|
push @ft,$gbk; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
foreach my $f (@ft){ |
177
|
|
|
|
|
|
|
my $bedname = file($dest,"$bn.$f.bed"); |
178
|
|
|
|
|
|
|
my $bedname_u = file($dest,"$bn.$f.u.bed"); |
179
|
|
|
|
|
|
|
open (BEDOUT, "> $bedname_u") or croak $!; |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# dump unsorted gene annotation from DS to BED12 |
182
|
|
|
|
|
|
|
foreach my $uid (keys %{$self->features}){ |
183
|
|
|
|
|
|
|
next unless (${$self->features}{$uid}->{gbkey} eq $f); |
184
|
|
|
|
|
|
|
my @bedline = (); |
185
|
|
|
|
|
|
|
$chrom = ${$self->features}{$uid}->{seqid}; |
186
|
|
|
|
|
|
|
$chrom_start = ${$self->features}{$uid}->{start}; |
187
|
|
|
|
|
|
|
$chrom_start--; # BED is 0-based |
188
|
|
|
|
|
|
|
$chrom_end = ${$self->features}{$uid}->{end}; |
189
|
|
|
|
|
|
|
$name = ${$self->features}{$uid}->{name}; |
190
|
|
|
|
|
|
|
$score = ${$self->features}{$uid}->{score}; |
191
|
|
|
|
|
|
|
$strand = ${$self->features}{$uid}->{strand} == -1 ? '-' : '+'; #default to + |
192
|
|
|
|
|
|
|
$thick_start = $chrom_start; |
193
|
|
|
|
|
|
|
$thick_end = $chrom_end; |
194
|
|
|
|
|
|
|
$reserved = 0; |
195
|
|
|
|
|
|
|
$block_count = 1; |
196
|
|
|
|
|
|
|
$block_sizes = ${$self->features}{$uid}->{length}.","; |
197
|
|
|
|
|
|
|
$block_starts = "0,"; |
198
|
|
|
|
|
|
|
@bedline = join ("\t", ($chrom,$chrom_start,$chrom_end, |
199
|
|
|
|
|
|
|
$name,$score,$strand,$thick_start, |
200
|
|
|
|
|
|
|
$thick_end,$reserved,$block_count, |
201
|
|
|
|
|
|
|
$block_sizes, $block_starts)); |
202
|
|
|
|
|
|
|
print BEDOUT "@bedline\n"; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
close (BEDOUT); |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sortbed($bedname_u,".",$bedname,1,undef); # sort bed file |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
} # end foreach |
209
|
|
|
|
|
|
|
if (defined $log){close(LOG)}; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub feature_summary { |
213
|
|
|
|
|
|
|
my ($self, $dest) = @_; |
214
|
|
|
|
|
|
|
my ($fn,$fh); |
215
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
croak "ERROR [$this_function] $dest does not exist\n" |
218
|
|
|
|
|
|
|
unless (-d $dest); |
219
|
|
|
|
|
|
|
croak "ERROR [$this_function] $self->accession not available\n" |
220
|
|
|
|
|
|
|
unless ($self->has_accession); |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
$fn = dir($dest,$self->accession.".summary.txt"); |
223
|
|
|
|
|
|
|
open $fh, ">", $fn or croak $!; |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
print $fh "Accession\t ".$self->accession."\n"; |
226
|
|
|
|
|
|
|
print $fh "Origin \t ${$self->featstat}{origin}\n"; |
227
|
|
|
|
|
|
|
foreach my $ft (sort keys %{$self->featstat}){ |
228
|
|
|
|
|
|
|
next if ($ft =~ /total/ || $ft =~ /accession/ || $ft =~ /origin/); |
229
|
|
|
|
|
|
|
print $fh "$ft\t${$self->featstat}{$ft}\n"; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
print $fh "Total\t${$self->featstat}{total}\n"; |
232
|
|
|
|
|
|
|
close $fh; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
1; |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
__END__ |