| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# -*-CPerl-*- |
|
2
|
|
|
|
|
|
|
# Last changed Time-stamp: <2014-10-02 19:17:12 mtw> |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Bio::ViennaNGS::AnnoC; |
|
5
|
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
17370
|
use Exporter; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
61
|
|
|
7
|
1
|
|
|
1
|
|
1831
|
use version; our $VERSION = qv('0.06'); |
|
|
1
|
|
|
|
|
2150
|
|
|
|
1
|
|
|
|
|
7
|
|
|
8
|
1
|
|
|
1
|
|
93
|
use strict; |
|
|
1
|
|
|
|
|
7
|
|
|
|
1
|
|
|
|
|
72
|
|
|
9
|
1
|
|
|
1
|
|
7
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
35
|
|
|
10
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
34
|
|
|
11
|
1
|
|
|
1
|
|
9381
|
use Data::Dumper; |
|
|
1
|
|
|
|
|
10993
|
|
|
|
1
|
|
|
|
|
69
|
|
|
12
|
1
|
|
|
1
|
|
730
|
use Bio::Tools::GFF; |
|
|
1
|
|
|
|
|
104325
|
|
|
|
1
|
|
|
|
|
32
|
|
|
13
|
1
|
|
|
1
|
|
491
|
use Bio::DB::Fasta; |
|
|
1
|
|
|
|
|
13836
|
|
|
|
1
|
|
|
|
|
63
|
|
|
14
|
1
|
|
|
1
|
|
651
|
use Path::Class; |
|
|
1
|
|
|
|
|
17897
|
|
|
|
1
|
|
|
|
|
56
|
|
|
15
|
1
|
|
|
1
|
|
7
|
use Carp; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
910
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
18
|
|
|
|
|
|
|
our @EXPORT_OK = qw(parse_gff feature_summary get_fasta_ids |
|
19
|
|
|
|
|
|
|
$feat $fstat $fastadb |
|
20
|
|
|
|
|
|
|
@fastaids |
|
21
|
|
|
|
|
|
|
%features %featsta); |
|
22
|
|
|
|
|
|
|
our @EXPORT = (); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
|
25
|
|
|
|
|
|
|
#^^^^^^^^^^ Variables ^^^^^^^^^^^# |
|
26
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
our ($fastadb); |
|
29
|
|
|
|
|
|
|
our %features = (); |
|
30
|
|
|
|
|
|
|
our %featstat = (); |
|
31
|
|
|
|
|
|
|
our $feat = \%features; |
|
32
|
|
|
|
|
|
|
our $fstat = \%featstat; |
|
33
|
|
|
|
|
|
|
our @fastaids = (); |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
|
36
|
|
|
|
|
|
|
#^^^^^^^^^^^ Subroutines ^^^^^^^^^^# |
|
37
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub parse_gff { |
|
40
|
0
|
|
|
0
|
1
|
|
my $in_file = shift; |
|
41
|
0
|
|
|
|
|
|
my ($i,$gffio,$feature,$gbkey); |
|
42
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
|
43
|
|
|
|
|
|
|
|
|
44
|
0
|
|
|
|
|
|
$gffio = Bio::Tools::GFF->new(-file => $in_file, |
|
45
|
|
|
|
|
|
|
-gff_version => 3, |
|
46
|
|
|
|
|
|
|
); |
|
47
|
0
|
|
|
|
|
|
$gffio->ignore_sequence(1); |
|
48
|
0
|
0
|
|
|
|
|
if (my $header = $gffio->next_segment() ){ |
|
49
|
0
|
|
|
|
|
|
$featstat{accession}= $header->display_id(); |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
else{ |
|
52
|
0
|
|
|
|
|
|
carp "[$this_function]: Cannot parse GFF header\n"; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
|
while($feature = $gffio->next_feature()) { |
|
56
|
0
|
|
|
|
|
|
my ($uid,$feat_name); |
|
57
|
0
|
|
|
|
|
|
my @name = my @id = my @gbkeys = (); |
|
58
|
|
|
|
|
|
|
|
|
59
|
0
|
0
|
|
|
|
|
next if ($feature->primary_tag() eq "exon"); |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# 1) determine gbkey of the current feature |
|
62
|
0
|
|
|
|
|
|
@gbkeys = $feature->get_tag_values("gbkey"); |
|
63
|
0
|
|
|
|
|
|
$gbkey = $gbkeys[0]; |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# 2) get a unique ID for each feature |
|
66
|
0
|
0
|
|
|
|
|
if ($feature->has_tag('ID')){ |
|
67
|
0
|
|
|
|
|
|
@id = $feature->get_tag_values('ID'); |
|
68
|
0
|
|
|
|
|
|
$uid = $id[0]; # ID=id101 |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
else { |
|
71
|
0
|
|
|
|
|
|
croak "ERROR [$this_function] Feature '$gbkey' at pos.\ |
|
72
|
|
|
|
|
|
|
$feature->start does not have \'ID\' attribute\n"; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# 3) assign parent's unique ID in case a parent record exists |
|
76
|
0
|
0
|
|
|
|
|
if ($feature->has_tag('Parent')){ |
|
77
|
0
|
|
|
|
|
|
@id = $feature->get_tag_values('Parent'); |
|
78
|
0
|
|
|
|
|
|
$uid = $id[0]; # ID=id101 |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# 4) find a name for the current feature, use 'Name' or 'ID' attribute |
|
82
|
0
|
0
|
|
|
|
|
if ($feature->has_tag('Name')){ |
|
|
|
0
|
|
|
|
|
|
|
83
|
0
|
|
|
|
|
|
@name = $feature->get_tag_values('Name'); |
|
84
|
0
|
|
|
|
|
|
$feat_name = $name[0]; |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
elsif ($feature->has_tag('ID')){ |
|
87
|
0
|
|
|
|
|
|
@id = $feature->get_tag_values('ID'); |
|
88
|
0
|
|
|
|
|
|
$feat_name = $id[0]; # ID=id101, use ID as feature name |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
else { |
|
91
|
0
|
|
|
|
|
|
croak "ERROR [$this_function] Cannot set name for feature \ |
|
92
|
|
|
|
|
|
|
$feature->gbkey at pos. $feature->start\n"; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
0
|
0
|
|
|
|
|
unless (exists $features{$uid}) { # gene / ribosome_entry_site / etc. |
|
96
|
0
|
|
|
|
|
|
$features{$uid}->{start} = $feature->start; |
|
97
|
0
|
|
|
|
|
|
$features{$uid}->{end} = $feature->end; |
|
98
|
0
|
|
|
|
|
|
$features{$uid}->{strand} = $feature->strand; |
|
99
|
0
|
|
|
|
|
|
$features{$uid}->{length} = $feature->length; |
|
100
|
0
|
|
|
|
|
|
$features{$uid}->{seqid} = $feature->seq_id; |
|
101
|
0
|
|
0
|
|
|
|
$features{$uid}->{score} = $feature->score || 0; |
|
102
|
0
|
|
|
|
|
|
$features{$uid}->{gbkey} = $gbkey; |
|
103
|
0
|
|
|
|
|
|
$features{$uid}->{name} = $feat_name; |
|
104
|
0
|
|
|
|
|
|
$features{$uid}->{uid} = $uid; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
else { # CDS / tRNA / rRNA / etc |
|
107
|
0
|
|
|
|
|
|
$features{$uid}->{gbkey} = $gbkey; # gbkey for tRNA/ rRNA/ CDS etc |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# finally generate some statistics on features present in this annotation |
|
112
|
0
|
|
|
|
|
|
$featstat{total} = 0; |
|
113
|
0
|
|
|
|
|
|
$featstat{origin} = "$this_function ".$VERSION; |
|
114
|
0
|
|
|
|
|
|
foreach my $ft (keys %features){ |
|
115
|
0
|
|
|
|
|
|
$featstat{total}++; |
|
116
|
0
|
|
|
|
|
|
my $key = $features{$ft}->{gbkey}; |
|
117
|
0
|
0
|
|
|
|
|
unless (exists $featstat{$key}){ |
|
118
|
0
|
|
|
|
|
|
$featstat{$key} = 0; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
0
|
|
|
|
|
|
$featstat{$key} += 1; |
|
121
|
|
|
|
|
|
|
} |
|
122
|
0
|
|
|
|
|
|
$gffio->close(); |
|
123
|
|
|
|
|
|
|
# print Dumper (\%features); |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# feature_summary($fsR,$dest) |
|
127
|
|
|
|
|
|
|
# Print summary of %featstat hash |
|
128
|
|
|
|
|
|
|
# |
|
129
|
|
|
|
|
|
|
# ARG1: reference to %featstat hash |
|
130
|
|
|
|
|
|
|
# ARG2: path for output file |
|
131
|
|
|
|
|
|
|
sub feature_summary { |
|
132
|
0
|
|
|
0
|
1
|
|
my ($fsR, $dest) = @_; |
|
133
|
0
|
|
|
|
|
|
my ($fn,$fh); |
|
134
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
|
135
|
|
|
|
|
|
|
|
|
136
|
0
|
0
|
|
|
|
|
croak "ERROR [$this_function] $dest does not exist\n" |
|
137
|
|
|
|
|
|
|
unless (-d $dest); |
|
138
|
|
|
|
|
|
|
|
|
139
|
0
|
|
|
|
|
|
$fn = dir($dest,$$fsR{accession}.".summary.txt"); |
|
140
|
0
|
0
|
|
|
|
|
open $fh, ">", $fn or croak $!; |
|
141
|
|
|
|
|
|
|
|
|
142
|
0
|
|
|
|
|
|
print $fh "Accession\t $$fsR{accession}\n"; |
|
143
|
0
|
|
|
|
|
|
print $fh "Origin \t $$fsR{origin}\n"; |
|
144
|
0
|
|
|
|
|
|
foreach my $ft (sort keys %$fsR){ |
|
145
|
0
|
0
|
0
|
|
|
|
next if ($ft =~ /total/ || $ft =~ /accession/ || $ft =~ /origin/); |
|
|
|
|
0
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
print $fh "$ft\t$$fsR{$ft}\n"; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
0
|
|
|
|
|
|
print $fh "Total\t$$fsR{total}\n"; |
|
149
|
0
|
|
|
|
|
|
close $fh; |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# get_fasta_ids($fa_in) |
|
153
|
|
|
|
|
|
|
# Returns an array with all fasta ids from a (multi)fasta file |
|
154
|
|
|
|
|
|
|
# |
|
155
|
|
|
|
|
|
|
# ARG1: fa_in |
|
156
|
|
|
|
|
|
|
sub get_fasta_ids { |
|
157
|
0
|
|
|
0
|
1
|
|
my $fa_in = shift; |
|
158
|
0
|
0
|
|
|
|
|
unless (defined $fa_in){ |
|
159
|
0
|
|
|
|
|
|
confess "Fasta input not available"; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
0
|
0
|
|
|
|
|
$fastadb = Bio::DB::Fasta->new($fa_in) or die $!; |
|
162
|
0
|
|
|
|
|
|
@fastaids = $fastadb->ids; |
|
163
|
0
|
|
|
|
|
|
return @fastaids; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
1; |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
__END__ |