line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Gonzales::Search::IO::HMMER3; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
113929
|
use Mouse; |
|
2
|
|
|
|
|
58239
|
|
|
2
|
|
|
|
|
9
|
|
4
|
|
|
|
|
|
|
|
5
|
2
|
|
|
2
|
|
836
|
use warnings; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
54
|
|
6
|
2
|
|
|
2
|
|
13
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
36
|
|
7
|
2
|
|
|
2
|
|
9
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
148
|
|
8
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
52
|
use 5.010; |
|
2
|
|
|
|
|
7
|
|
10
|
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
|
13
|
use Data::Dumper; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
157
|
|
12
|
2
|
|
|
2
|
|
14
|
use List::Util; |
|
2
|
|
|
|
|
13
|
|
|
2
|
|
|
|
|
149
|
|
13
|
2
|
|
|
2
|
|
1355
|
use List::MoreUtils qw/indexes firstidx/; |
|
2
|
|
|
|
|
30782
|
|
|
2
|
|
|
|
|
15
|
|
14
|
2
|
|
|
2
|
|
2907
|
use Bio::Gonzales::Util qw/hash_merge/; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
3156
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
with 'Bio::Gonzales::Util::Role::FileIO'; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our $VERSION = '0.083'; # VERSION |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
has '_current_query' => ( is => 'rw', default => 0 ); |
21
|
|
|
|
|
|
|
has '_current_hit' => ( is => 'rw', default => 0 ); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
around BUILDARGS => sub { |
24
|
|
|
|
|
|
|
my $orig = shift; |
25
|
|
|
|
|
|
|
my $class = shift; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
if ( @_ == 1 && Bio::Gonzales::Util::File::is_fh( $_[0] ) ) { |
28
|
|
|
|
|
|
|
return $class->$orig( fh => $_[0] ); |
29
|
|
|
|
|
|
|
} else { |
30
|
|
|
|
|
|
|
return $class->$orig(@_); |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
}; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub _parse_result { |
35
|
5
|
|
|
5
|
|
13
|
my ($self) = @_; |
36
|
|
|
|
|
|
|
|
37
|
5
|
|
|
|
|
46
|
my $fhi = $self->_fhi; |
38
|
|
|
|
|
|
|
|
39
|
5
|
|
|
|
|
19
|
my @header = ( [] ); |
40
|
5
|
|
|
|
|
11
|
my @queries = ( [] ); |
41
|
|
|
|
|
|
|
|
42
|
5
|
|
|
|
|
11
|
my $current_query; |
43
|
5
|
|
|
|
|
14
|
while ( defined( my $l = $fhi->() ) ) { |
44
|
9769
|
|
|
|
|
17288
|
$l =~ s/\r\n/\n/; |
45
|
9769
|
|
|
|
|
13782
|
chomp $l; |
46
|
|
|
|
|
|
|
|
47
|
9769
|
100
|
|
|
|
24951
|
next if ( $l =~ /^\s*$/ ); |
48
|
|
|
|
|
|
|
#no queries read, yet, so |
49
|
8218
|
100
|
100
|
|
|
11476
|
if ( !@{ $queries[-1] } && $l =~ /^#/ ) { |
|
8218
|
|
|
|
|
18438
|
|
50
|
|
|
|
|
|
|
#parse header |
51
|
|
|
|
|
|
|
|
52
|
51
|
|
|
|
|
74
|
push @{ $header[-1] }, $l; |
|
51
|
|
|
|
|
163
|
|
53
|
|
|
|
|
|
|
} else { |
54
|
|
|
|
|
|
|
#parsing content |
55
|
|
|
|
|
|
|
|
56
|
8167
|
100
|
|
|
|
13321
|
if ( $l =~ m@^//$@ ) { |
57
|
14
|
|
|
|
|
42
|
push @queries, []; |
58
|
14
|
|
|
|
|
46
|
push @header, []; |
59
|
|
|
|
|
|
|
} else { |
60
|
8153
|
|
|
|
|
10542
|
push @{ $queries[-1] }, $l; |
|
8153
|
|
|
|
|
22850
|
|
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
pop @header |
65
|
5
|
50
|
|
|
|
13
|
if ( @{ $header[-1] } == 0 ); |
|
5
|
|
|
|
|
20
|
|
66
|
|
|
|
|
|
|
pop @queries |
67
|
5
|
50
|
|
|
|
16
|
if ( @{ $queries[-1] } == 0 ); |
|
5
|
|
|
|
|
16
|
|
68
|
|
|
|
|
|
|
|
69
|
5
|
|
|
|
|
31
|
return ( \@header, \@queries ); |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub parse { |
73
|
5
|
|
|
5
|
0
|
26
|
my ($self) = @_; |
74
|
|
|
|
|
|
|
|
75
|
5
|
|
|
|
|
18
|
my ( $h_raw, $q_raw ) = $self->_parse_result; |
76
|
|
|
|
|
|
|
|
77
|
5
|
|
|
|
|
27
|
my $q_parsed = $self->_parse_queries($q_raw); |
78
|
5
|
|
|
|
|
28
|
my $q = $self->_format_queries($q_parsed); |
79
|
|
|
|
|
|
|
#my $h = $self->_parse_header($h_raw); |
80
|
|
|
|
|
|
|
|
81
|
5
|
|
|
|
|
667
|
return ($q); |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub _parse_queries { |
85
|
5
|
|
|
5
|
|
24
|
my ( $self, $raw_queries ) = @_; |
86
|
|
|
|
|
|
|
|
87
|
5
|
|
|
|
|
12
|
my @result; |
88
|
5
|
|
|
|
|
17
|
for my $q_raw (@$raw_queries) { |
89
|
|
|
|
|
|
|
# first split into query header and query body |
90
|
|
|
|
|
|
|
|
91
|
14
|
|
|
|
|
26
|
my %query; |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
#parse query id |
94
|
14
|
50
|
|
|
|
72
|
if ( $q_raw->[0] =~ /^Query:/ ) { |
95
|
14
|
|
|
|
|
48
|
$query{id} = shift @$q_raw; |
96
|
14
|
|
|
|
|
79
|
$query{id} =~ s/Query:\s+//; |
97
|
14
|
|
|
|
|
78
|
$query{id} =~ s/\s+\[[^\]]+\]$//; |
98
|
|
|
|
|
|
|
} else { |
99
|
0
|
|
|
|
|
0
|
confess "parsing error: could not parse query id"; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
#parse accession id |
103
|
14
|
100
|
|
|
|
53
|
if ( $q_raw->[0] =~ /^Accession:/ ) { |
104
|
7
|
|
|
|
|
21
|
$query{accession} = shift @$q_raw; |
105
|
7
|
|
|
|
|
25
|
$query{accession} =~ s/Accession:\s+//; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#get rid of column descriptions |
109
|
14
|
|
|
36
|
|
199
|
my $scores_begin = firstidx {/^\s/} @$q_raw; |
|
36
|
|
|
|
|
104
|
|
110
|
14
|
|
|
|
|
70
|
splice( @$q_raw, 0, $scores_begin + 3 ); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# find the beginning of the domain annotation |
113
|
14
|
|
|
1458
|
|
83
|
my $domain_ann_begin = firstidx {/^\S/} @$q_raw; |
|
1458
|
|
|
|
|
2373
|
|
114
|
14
|
|
|
|
|
315
|
$query{scores} = [ splice( @$q_raw, 0, $domain_ann_begin ) ]; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
#get rid of Domain blah blah line |
117
|
14
|
|
|
|
|
67
|
shift @$q_raw; |
118
|
14
|
|
|
6456
|
|
143
|
my $footer_begin = firstidx { $_ !~ /^(?:(?:>>)|(?:\s+))/ } @$q_raw; |
|
6456
|
|
|
|
|
11812
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
#cut out the query body and also get rid of the fancy column descriptions |
121
|
14
|
|
|
|
|
124
|
$query{domain_annotation} = [ grep { $_ !~ /^(\s+#|[-\s]+$)/ } splice( @$q_raw, 0, $footer_begin ) ]; |
|
6442
|
|
|
|
|
15154
|
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
#the rest is internal statistics |
124
|
14
|
|
|
|
|
310
|
$query{internal_stat} = [@$q_raw]; |
125
|
|
|
|
|
|
|
|
126
|
14
|
|
|
|
|
57
|
push @result, \%query; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
5
|
|
|
|
|
16
|
return \@result; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub _parse_header { |
133
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub _format_queries { |
138
|
5
|
|
|
5
|
|
20
|
my ( $self, $parsed_queries, $has_alignments_included ) = @_; |
139
|
|
|
|
|
|
|
|
140
|
5
|
|
|
|
|
69
|
my %queries; |
141
|
5
|
|
|
|
|
15
|
for my $pq (@$parsed_queries) { |
142
|
14
|
|
|
|
|
26
|
my %hits; |
143
|
14
|
|
|
|
|
32
|
my %current_hit = (); |
144
|
14
|
|
|
|
|
27
|
my $skip_lines = 1; |
145
|
14
|
|
|
|
|
24
|
for my $p ( @{ $pq->{domain_annotation} } ) { |
|
14
|
|
|
|
|
47
|
|
146
|
|
|
|
|
|
|
#>> sph|PGSC0003DMP200011920 (sph) PGSC0003DMT200017313 Protein |
147
|
3558
|
100
|
100
|
|
|
12258
|
if ( $p =~ /^>>\s*(\S+)/ ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
148
|
1442
|
100
|
|
|
|
2586
|
if (%current_hit) { |
149
|
1416
|
|
|
|
|
2244
|
$hits{ $current_hit{id} } = $current_hit{domain_annotations}; |
150
|
1416
|
|
|
|
|
2672
|
%current_hit = (); |
151
|
|
|
|
|
|
|
} |
152
|
1442
|
|
|
|
|
3505
|
$current_hit{id} = $1; |
153
|
1442
|
|
|
|
|
2014
|
$skip_lines = 0; |
154
|
|
|
|
|
|
|
} elsif ( !$skip_lines && $p =~ /^\s*\d+/ ) { |
155
|
1849
|
|
100
|
|
|
6695
|
$current_hit{domain_annotations} //= []; |
156
|
|
|
|
|
|
|
|
157
|
1849
|
|
|
|
|
12562
|
my %domain_annotation = ( |
158
|
|
|
|
|
|
|
score => substr( $p, 7, 6 ), |
159
|
|
|
|
|
|
|
bias => substr( $p, 14, 5 ), |
160
|
|
|
|
|
|
|
c_evalue => substr( $p, 20, 9 ), |
161
|
|
|
|
|
|
|
i_evalue => substr( $p, 30, 9 ), |
162
|
|
|
|
|
|
|
hmm_from => substr( $p, 40, 7 ), |
163
|
|
|
|
|
|
|
hmm_to => substr( $p, 48, 7 ), |
164
|
|
|
|
|
|
|
align_from => substr( $p, 59, 7 ), |
165
|
|
|
|
|
|
|
align_to => substr( $p, 67, 7 ), |
166
|
|
|
|
|
|
|
env_from => substr( $p, 78, 7 ), |
167
|
|
|
|
|
|
|
env_to => substr( $p, 86, 7 ), |
168
|
|
|
|
|
|
|
acc => substr( $p, 97, 4 ), |
169
|
|
|
|
|
|
|
); |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
#remove leading spaces |
172
|
1849
|
|
|
|
|
6917
|
map { $domain_annotation{$_} =~ s/^\s*// } keys %domain_annotation; |
|
20339
|
|
|
|
|
48162
|
|
173
|
|
|
|
|
|
|
|
174
|
1849
|
|
|
|
|
3398
|
push @{ $current_hit{domain_annotations} }, \%domain_annotation; |
|
1849
|
|
|
|
|
4614
|
|
175
|
|
|
|
|
|
|
} elsif ( $p =~ /^\s*Alignments for each domain:/ ) { |
176
|
|
|
|
|
|
|
# we found all domains, so skip till next ">>" |
177
|
14
|
|
|
|
|
32
|
%current_hit = (); |
178
|
14
|
|
|
|
|
22
|
$skip_lines = 1; |
179
|
14
|
|
|
|
|
23
|
next; |
180
|
|
|
|
|
|
|
} else { |
181
|
253
|
|
|
|
|
322
|
$skip_lines = 1; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
$hits{ $current_hit{id} } = $current_hit{domain_annotations} |
185
|
3544
|
100
|
|
|
|
8738
|
if (%current_hit); |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
#compose id of query id/name and accession id/name |
188
|
14
|
|
|
|
|
44
|
my $id = $pq->{id}; |
189
|
|
|
|
|
|
|
$id .= "#" . $pq->{accession} |
190
|
14
|
100
|
|
|
|
44
|
if ( exists( $pq->{accession} ) ); |
191
|
14
|
|
50
|
|
|
85
|
$queries{$id} //= {}; |
192
|
|
|
|
|
|
|
#id => { hit_id => [ { hit information }, ... ] } |
193
|
14
|
|
|
|
|
74
|
hash_merge( $queries{$id}, \%hits ); |
194
|
|
|
|
|
|
|
} |
195
|
5
|
|
|
|
|
18
|
return \%queries; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
1; |