| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
=head1 NAME |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
Bio::Tools::PrositeScan - Parser for ps_scan result |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
use Bio::Tools::PrositeScan; |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
my $factory = Bio::Tools::PrositeScan->new( |
|
11
|
|
|
|
|
|
|
-file => 'out.PrositeScan', |
|
12
|
|
|
|
|
|
|
-format => 'fasta' |
|
13
|
|
|
|
|
|
|
); |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
while(my $match = $factory->next_prediction){ |
|
16
|
|
|
|
|
|
|
# $match is a Bio::SeqFeature::FeaturePair |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# Sequence ID |
|
19
|
|
|
|
|
|
|
my $seq_id = $match->seq_id; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# PROSITE accession number |
|
22
|
|
|
|
|
|
|
my $psac = $match->hseq_id; |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# Coordinates |
|
25
|
|
|
|
|
|
|
my @coords = ( $match->start, $match->end ); |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Subsequence |
|
28
|
|
|
|
|
|
|
my $seq = $match->feature1->seq; |
|
29
|
|
|
|
|
|
|
} |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This is a parser of the output of the ps_scan program. It takes either a file |
|
34
|
|
|
|
|
|
|
handle or a file name, and returns a L object. |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Note that the current implementation parses the entire file at once. |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 AUTHOR |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Juguang Xiao, juguang@tll.org.sg |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=over |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=item * L |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=item * L |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=back |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=cut |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Let the code begin... |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
package Bio::Tools::PrositeScan; |
|
57
|
1
|
|
|
1
|
|
440
|
use vars qw(@FORMATS); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
42
|
|
|
58
|
1
|
|
|
1
|
|
4
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
17
|
|
|
59
|
1
|
|
|
1
|
|
338
|
use Bio::Seq; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
28
|
|
|
60
|
1
|
|
|
1
|
|
292
|
use Bio::SeqFeature::Generic; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
27
|
|
|
61
|
1
|
|
|
1
|
|
258
|
use Bio::SeqFeature::FeaturePair; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
31
|
|
|
62
|
|
|
|
|
|
|
|
|
63
|
1
|
|
|
1
|
|
4
|
use base qw(Bio::Root::Root Bio::Root::IO); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
596
|
|
|
64
|
|
|
|
|
|
|
@FORMATS = qw(SCAN FASTA PSA MSA PFF MATCHLIST); |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head2 new |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Title : new |
|
69
|
|
|
|
|
|
|
Usage : Bio::Tools::PrositeScan->new(-file => 'out.PrositeScan'); |
|
70
|
|
|
|
|
|
|
Bio::Tools::PrositeScan->new(-fh => \*FH); |
|
71
|
|
|
|
|
|
|
Returns : L |
|
72
|
|
|
|
|
|
|
Args : -format => string representing the format type for the |
|
73
|
|
|
|
|
|
|
ps_scan output, REQUIRED |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
The C<-format> argument must currently be set to C since this is the |
|
76
|
|
|
|
|
|
|
only parser implemented. This corresponds with using the ps_scan arguments |
|
77
|
|
|
|
|
|
|
C<-o fasta>. |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub new { |
|
82
|
1
|
|
|
1
|
1
|
5
|
my ($class, @args) = @_; |
|
83
|
1
|
|
|
|
|
9
|
my $self = $class->SUPER::new(@args); |
|
84
|
1
|
|
|
|
|
7
|
$self->_initialize_io(@args); |
|
85
|
1
|
|
|
|
|
3
|
my ($format) = $self->_rearrange([qw(FORMAT)], @args); |
|
86
|
1
|
50
|
|
|
|
3
|
$format || $self->throw("format needed"); |
|
87
|
1
|
50
|
|
|
|
19
|
if(grep /^$format$/i, @FORMATS){ |
|
88
|
1
|
|
|
|
|
4
|
$self->format($format); |
|
89
|
|
|
|
|
|
|
}else{ |
|
90
|
0
|
|
|
|
|
0
|
$self->throw("Invalid format, [$format]"); |
|
91
|
|
|
|
|
|
|
} |
|
92
|
1
|
|
|
|
|
3
|
return $self; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub format { |
|
96
|
3
|
|
|
3
|
1
|
3
|
my $self = shift; |
|
97
|
3
|
100
|
|
|
|
7
|
return $self->{_format} = shift if(@_); |
|
98
|
2
|
|
|
|
|
7
|
return $self->{_format}; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head2 next_prediction |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Title : new |
|
104
|
|
|
|
|
|
|
Usage : |
|
105
|
|
|
|
|
|
|
while($result = $factory->next_prediction){ |
|
106
|
|
|
|
|
|
|
; |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Returns : a Bio::SeqFeature::FeaturePair object where |
|
110
|
|
|
|
|
|
|
feature1 is the matched subsequence and |
|
111
|
|
|
|
|
|
|
feature2 is the PROSITE accession number. |
|
112
|
|
|
|
|
|
|
See . |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=cut |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub next_prediction { |
|
117
|
5
|
|
|
5
|
1
|
2740
|
my ($self) = @_; |
|
118
|
5
|
100
|
|
|
|
9
|
unless($self->_parsed){ |
|
119
|
1
|
|
|
|
|
3
|
$self->_parse; |
|
120
|
1
|
|
|
|
|
2
|
$self->_parsed(1); |
|
121
|
|
|
|
|
|
|
} |
|
122
|
5
|
|
|
|
|
5
|
return shift @{$self->{_matches}}; |
|
|
5
|
|
|
|
|
11
|
|
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub next_result { |
|
126
|
0
|
|
|
0
|
0
|
0
|
return shift->next_prediction; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub _parsed { |
|
130
|
6
|
|
|
6
|
|
6
|
my $self = shift; |
|
131
|
6
|
50
|
66
|
|
|
17
|
return $self->{_parsed} = 1 if @_ && $_[0]; |
|
132
|
5
|
|
|
|
|
10
|
return $self->{_parsed}; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub _parse { |
|
136
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
|
137
|
1
|
|
|
|
|
2
|
my $format = $self->format; |
|
138
|
1
|
50
|
|
|
|
3
|
if($self->format =~ /^fasta$/){ |
|
139
|
1
|
|
|
|
|
3
|
$self->_parse_fasta; |
|
140
|
|
|
|
|
|
|
}else{ |
|
141
|
0
|
|
|
|
|
0
|
$self->throw("the [$format] parser has not been written"); |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub _parse_fasta { |
|
146
|
1
|
|
|
1
|
|
2
|
my ($self) = @_; |
|
147
|
1
|
|
|
|
|
2
|
my @matches; |
|
148
|
|
|
|
|
|
|
my $fp; |
|
149
|
1
|
|
|
|
|
0
|
my $seq; |
|
150
|
1
|
|
|
|
|
9
|
while(defined($_ = $self->_readline)){ |
|
151
|
10
|
|
|
|
|
14
|
chop; |
|
152
|
10
|
100
|
|
|
|
27
|
if(/^\>([^>]+)/){ |
|
153
|
4
|
|
|
|
|
10
|
my $fasta_head = $1; |
|
154
|
4
|
50
|
|
|
|
16
|
if($fasta_head =~ /([^\/]+)\/(\d+)\-(\d+)(\s+)\:(\s+)(\S+)/){ |
|
155
|
4
|
|
|
|
|
7
|
my $q_id = $1; |
|
156
|
4
|
|
|
|
|
6
|
my $q_start = $2; |
|
157
|
4
|
|
|
|
|
5
|
my $q_end = $3; |
|
158
|
4
|
|
|
|
|
4
|
my $h_id = $6; |
|
159
|
4
|
100
|
|
|
|
7
|
if(defined $fp){ |
|
160
|
3
|
|
|
|
|
7
|
$self->_attach_seq($seq, $fp); |
|
161
|
3
|
|
|
|
|
5
|
push @matches, $fp; |
|
162
|
|
|
|
|
|
|
} |
|
163
|
4
|
|
|
|
|
17
|
$fp = Bio::SeqFeature::FeaturePair->new( |
|
164
|
|
|
|
|
|
|
-feature1 => Bio::SeqFeature::Generic->new( |
|
165
|
|
|
|
|
|
|
-seq_id => $q_id, |
|
166
|
|
|
|
|
|
|
-start => $q_start, |
|
167
|
|
|
|
|
|
|
-end => $q_end |
|
168
|
|
|
|
|
|
|
), |
|
169
|
|
|
|
|
|
|
-feature2 => Bio::SeqFeature::Generic->new( |
|
170
|
|
|
|
|
|
|
-seq_id => $h_id, |
|
171
|
|
|
|
|
|
|
-start => 0, |
|
172
|
|
|
|
|
|
|
-end => 0 |
|
173
|
|
|
|
|
|
|
) |
|
174
|
|
|
|
|
|
|
); |
|
175
|
4
|
|
|
|
|
14
|
$seq = ''; |
|
176
|
|
|
|
|
|
|
}else{ |
|
177
|
0
|
|
|
|
|
0
|
$self->throw("ERR:\t\[$_\]"); |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
}else{ # sequence lines, ignored |
|
180
|
6
|
|
|
|
|
11
|
$seq .= $_; |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
} |
|
183
|
1
|
50
|
|
|
|
3
|
if(defined $fp){ |
|
184
|
1
|
|
|
|
|
3
|
$self->_attach_seq($seq, $fp); |
|
185
|
1
|
|
|
|
|
2
|
push @matches, $fp; |
|
186
|
|
|
|
|
|
|
} |
|
187
|
1
|
|
|
|
|
2
|
push @{$self->{_matches}}, @matches; |
|
|
1
|
|
|
|
|
4
|
|
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub _attach_seq { |
|
191
|
4
|
|
|
4
|
|
8
|
my ($self, $seq, $fp) = @_; |
|
192
|
4
|
50
|
|
|
|
6
|
if(defined $fp){ |
|
193
|
4
|
|
|
|
|
9
|
my $whole_seq = 'X' x ($fp->start-1); |
|
194
|
4
|
|
|
|
|
10
|
$whole_seq .= $seq; |
|
195
|
4
|
|
|
|
|
6
|
$fp->feature1->attach_seq( |
|
196
|
|
|
|
|
|
|
Bio::Seq->new(-seq => $whole_seq) |
|
197
|
|
|
|
|
|
|
); |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
1; |