line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*-CPerl-*- |
2
|
|
|
|
|
|
|
# Last changed Time-stamp: <2014-10-02 19:17:12 mtw> |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Bio::ViennaNGS::AnnoC; |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
17370
|
use Exporter; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
61
|
|
7
|
1
|
|
|
1
|
|
1831
|
use version; our $VERSION = qv('0.06'); |
|
1
|
|
|
|
|
2150
|
|
|
1
|
|
|
|
|
7
|
|
8
|
1
|
|
|
1
|
|
93
|
use strict; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
72
|
|
9
|
1
|
|
|
1
|
|
7
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
35
|
|
10
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
34
|
|
11
|
1
|
|
|
1
|
|
9381
|
use Data::Dumper; |
|
1
|
|
|
|
|
10993
|
|
|
1
|
|
|
|
|
69
|
|
12
|
1
|
|
|
1
|
|
730
|
use Bio::Tools::GFF; |
|
1
|
|
|
|
|
104325
|
|
|
1
|
|
|
|
|
32
|
|
13
|
1
|
|
|
1
|
|
491
|
use Bio::DB::Fasta; |
|
1
|
|
|
|
|
13836
|
|
|
1
|
|
|
|
|
63
|
|
14
|
1
|
|
|
1
|
|
651
|
use Path::Class; |
|
1
|
|
|
|
|
17897
|
|
|
1
|
|
|
|
|
56
|
|
15
|
1
|
|
|
1
|
|
7
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
910
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
18
|
|
|
|
|
|
|
our @EXPORT_OK = qw(parse_gff feature_summary get_fasta_ids |
19
|
|
|
|
|
|
|
$feat $fstat $fastadb |
20
|
|
|
|
|
|
|
@fastaids |
21
|
|
|
|
|
|
|
%features %featsta); |
22
|
|
|
|
|
|
|
our @EXPORT = (); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
25
|
|
|
|
|
|
|
#^^^^^^^^^^ Variables ^^^^^^^^^^^# |
26
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
our ($fastadb); |
29
|
|
|
|
|
|
|
our %features = (); |
30
|
|
|
|
|
|
|
our %featstat = (); |
31
|
|
|
|
|
|
|
our $feat = \%features; |
32
|
|
|
|
|
|
|
our $fstat = \%featstat; |
33
|
|
|
|
|
|
|
our @fastaids = (); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
36
|
|
|
|
|
|
|
#^^^^^^^^^^^ Subroutines ^^^^^^^^^^# |
37
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub parse_gff { |
40
|
0
|
|
|
0
|
1
|
|
my $in_file = shift; |
41
|
0
|
|
|
|
|
|
my ($i,$gffio,$feature,$gbkey); |
42
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
43
|
|
|
|
|
|
|
|
44
|
0
|
|
|
|
|
|
$gffio = Bio::Tools::GFF->new(-file => $in_file, |
45
|
|
|
|
|
|
|
-gff_version => 3, |
46
|
|
|
|
|
|
|
); |
47
|
0
|
|
|
|
|
|
$gffio->ignore_sequence(1); |
48
|
0
|
0
|
|
|
|
|
if (my $header = $gffio->next_segment() ){ |
49
|
0
|
|
|
|
|
|
$featstat{accession}= $header->display_id(); |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
else{ |
52
|
0
|
|
|
|
|
|
carp "[$this_function]: Cannot parse GFF header\n"; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
|
while($feature = $gffio->next_feature()) { |
56
|
0
|
|
|
|
|
|
my ($uid,$feat_name); |
57
|
0
|
|
|
|
|
|
my @name = my @id = my @gbkeys = (); |
58
|
|
|
|
|
|
|
|
59
|
0
|
0
|
|
|
|
|
next if ($feature->primary_tag() eq "exon"); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# 1) determine gbkey of the current feature |
62
|
0
|
|
|
|
|
|
@gbkeys = $feature->get_tag_values("gbkey"); |
63
|
0
|
|
|
|
|
|
$gbkey = $gbkeys[0]; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# 2) get a unique ID for each feature |
66
|
0
|
0
|
|
|
|
|
if ($feature->has_tag('ID')){ |
67
|
0
|
|
|
|
|
|
@id = $feature->get_tag_values('ID'); |
68
|
0
|
|
|
|
|
|
$uid = $id[0]; # ID=id101 |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
else { |
71
|
0
|
|
|
|
|
|
croak "ERROR [$this_function] Feature '$gbkey' at pos.\ |
72
|
|
|
|
|
|
|
$feature->start does not have \'ID\' attribute\n"; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# 3) assign parent's unique ID in case a parent record exists |
76
|
0
|
0
|
|
|
|
|
if ($feature->has_tag('Parent')){ |
77
|
0
|
|
|
|
|
|
@id = $feature->get_tag_values('Parent'); |
78
|
0
|
|
|
|
|
|
$uid = $id[0]; # ID=id101 |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# 4) find a name for the current feature, use 'Name' or 'ID' attribute |
82
|
0
|
0
|
|
|
|
|
if ($feature->has_tag('Name')){ |
|
|
0
|
|
|
|
|
|
83
|
0
|
|
|
|
|
|
@name = $feature->get_tag_values('Name'); |
84
|
0
|
|
|
|
|
|
$feat_name = $name[0]; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
elsif ($feature->has_tag('ID')){ |
87
|
0
|
|
|
|
|
|
@id = $feature->get_tag_values('ID'); |
88
|
0
|
|
|
|
|
|
$feat_name = $id[0]; # ID=id101, use ID as feature name |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
else { |
91
|
0
|
|
|
|
|
|
croak "ERROR [$this_function] Cannot set name for feature \ |
92
|
|
|
|
|
|
|
$feature->gbkey at pos. $feature->start\n"; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
0
|
0
|
|
|
|
|
unless (exists $features{$uid}) { # gene / ribosome_entry_site / etc. |
96
|
0
|
|
|
|
|
|
$features{$uid}->{start} = $feature->start; |
97
|
0
|
|
|
|
|
|
$features{$uid}->{end} = $feature->end; |
98
|
0
|
|
|
|
|
|
$features{$uid}->{strand} = $feature->strand; |
99
|
0
|
|
|
|
|
|
$features{$uid}->{length} = $feature->length; |
100
|
0
|
|
|
|
|
|
$features{$uid}->{seqid} = $feature->seq_id; |
101
|
0
|
|
0
|
|
|
|
$features{$uid}->{score} = $feature->score || 0; |
102
|
0
|
|
|
|
|
|
$features{$uid}->{gbkey} = $gbkey; |
103
|
0
|
|
|
|
|
|
$features{$uid}->{name} = $feat_name; |
104
|
0
|
|
|
|
|
|
$features{$uid}->{uid} = $uid; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
else { # CDS / tRNA / rRNA / etc |
107
|
0
|
|
|
|
|
|
$features{$uid}->{gbkey} = $gbkey; # gbkey for tRNA/ rRNA/ CDS etc |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# finally generate some statistics on features present in this annotation |
112
|
0
|
|
|
|
|
|
$featstat{total} = 0; |
113
|
0
|
|
|
|
|
|
$featstat{origin} = "$this_function ".$VERSION; |
114
|
0
|
|
|
|
|
|
foreach my $ft (keys %features){ |
115
|
0
|
|
|
|
|
|
$featstat{total}++; |
116
|
0
|
|
|
|
|
|
my $key = $features{$ft}->{gbkey}; |
117
|
0
|
0
|
|
|
|
|
unless (exists $featstat{$key}){ |
118
|
0
|
|
|
|
|
|
$featstat{$key} = 0; |
119
|
|
|
|
|
|
|
} |
120
|
0
|
|
|
|
|
|
$featstat{$key} += 1; |
121
|
|
|
|
|
|
|
} |
122
|
0
|
|
|
|
|
|
$gffio->close(); |
123
|
|
|
|
|
|
|
# print Dumper (\%features); |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# feature_summary($fsR,$dest) |
127
|
|
|
|
|
|
|
# Print summary of %featstat hash |
128
|
|
|
|
|
|
|
# |
129
|
|
|
|
|
|
|
# ARG1: reference to %featstat hash |
130
|
|
|
|
|
|
|
# ARG2: path for output file |
131
|
|
|
|
|
|
|
sub feature_summary { |
132
|
0
|
|
|
0
|
1
|
|
my ($fsR, $dest) = @_; |
133
|
0
|
|
|
|
|
|
my ($fn,$fh); |
134
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
135
|
|
|
|
|
|
|
|
136
|
0
|
0
|
|
|
|
|
croak "ERROR [$this_function] $dest does not exist\n" |
137
|
|
|
|
|
|
|
unless (-d $dest); |
138
|
|
|
|
|
|
|
|
139
|
0
|
|
|
|
|
|
$fn = dir($dest,$$fsR{accession}.".summary.txt"); |
140
|
0
|
0
|
|
|
|
|
open $fh, ">", $fn or croak $!; |
141
|
|
|
|
|
|
|
|
142
|
0
|
|
|
|
|
|
print $fh "Accession\t $$fsR{accession}\n"; |
143
|
0
|
|
|
|
|
|
print $fh "Origin \t $$fsR{origin}\n"; |
144
|
0
|
|
|
|
|
|
foreach my $ft (sort keys %$fsR){ |
145
|
0
|
0
|
0
|
|
|
|
next if ($ft =~ /total/ || $ft =~ /accession/ || $ft =~ /origin/); |
|
|
|
0
|
|
|
|
|
146
|
0
|
|
|
|
|
|
print $fh "$ft\t$$fsR{$ft}\n"; |
147
|
|
|
|
|
|
|
} |
148
|
0
|
|
|
|
|
|
print $fh "Total\t$$fsR{total}\n"; |
149
|
0
|
|
|
|
|
|
close $fh; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# get_fasta_ids($fa_in) |
153
|
|
|
|
|
|
|
# Returns an array with all fasta ids from a (multi)fasta file |
154
|
|
|
|
|
|
|
# |
155
|
|
|
|
|
|
|
# ARG1: fa_in |
156
|
|
|
|
|
|
|
sub get_fasta_ids { |
157
|
0
|
|
|
0
|
1
|
|
my $fa_in = shift; |
158
|
0
|
0
|
|
|
|
|
unless (defined $fa_in){ |
159
|
0
|
|
|
|
|
|
confess "Fasta input not available"; |
160
|
|
|
|
|
|
|
} |
161
|
0
|
0
|
|
|
|
|
$fastadb = Bio::DB::Fasta->new($fa_in) or die $!; |
162
|
0
|
|
|
|
|
|
@fastaids = $fastadb->ids; |
163
|
0
|
|
|
|
|
|
return @fastaids; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
1; |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
__END__ |