line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
=head1 NAME |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
Bio::Tools::PrositeScan - Parser for ps_scan result |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 SYNOPSIS |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
use Bio::Tools::PrositeScan; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
my $factory = Bio::Tools::PrositeScan->new( |
11
|
|
|
|
|
|
|
-file => 'out.PrositeScan', |
12
|
|
|
|
|
|
|
-format => 'fasta' |
13
|
|
|
|
|
|
|
); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
while(my $match = $factory->next_prediction){ |
16
|
|
|
|
|
|
|
# $match is a Bio::SeqFeature::FeaturePair |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# Sequence ID |
19
|
|
|
|
|
|
|
my $seq_id = $match->seq_id; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# PROSITE accession number |
22
|
|
|
|
|
|
|
my $psac = $match->hseq_id; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# Coordinates |
25
|
|
|
|
|
|
|
my @coords = ( $match->start, $match->end ); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Subsequence |
28
|
|
|
|
|
|
|
my $seq = $match->feature1->seq; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This is a parser of the output of the ps_scan program. It takes either a file |
34
|
|
|
|
|
|
|
handle or a file name, and returns a L object. |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Note that the current implementation parses the entire file at once. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 AUTHOR |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Juguang Xiao, juguang@tll.org.sg |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 SEE ALSO |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=over |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=item * L |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=item * L |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=back |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=cut |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Let the code begin... |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
package Bio::Tools::PrositeScan; |
57
|
1
|
|
|
1
|
|
440
|
use vars qw(@FORMATS); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
42
|
|
58
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
17
|
|
59
|
1
|
|
|
1
|
|
338
|
use Bio::Seq; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
28
|
|
60
|
1
|
|
|
1
|
|
292
|
use Bio::SeqFeature::Generic; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
27
|
|
61
|
1
|
|
|
1
|
|
258
|
use Bio::SeqFeature::FeaturePair; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
31
|
|
62
|
|
|
|
|
|
|
|
63
|
1
|
|
|
1
|
|
4
|
use base qw(Bio::Root::Root Bio::Root::IO); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
596
|
|
64
|
|
|
|
|
|
|
@FORMATS = qw(SCAN FASTA PSA MSA PFF MATCHLIST); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head2 new |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Title : new |
69
|
|
|
|
|
|
|
Usage : Bio::Tools::PrositeScan->new(-file => 'out.PrositeScan'); |
70
|
|
|
|
|
|
|
Bio::Tools::PrositeScan->new(-fh => \*FH); |
71
|
|
|
|
|
|
|
Returns : L |
72
|
|
|
|
|
|
|
Args : -format => string representing the format type for the |
73
|
|
|
|
|
|
|
ps_scan output, REQUIRED |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
The C<-format> argument must currently be set to C since this is the |
76
|
|
|
|
|
|
|
only parser implemented. This corresponds with using the ps_scan arguments |
77
|
|
|
|
|
|
|
C<-o fasta>. |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub new { |
82
|
1
|
|
|
1
|
1
|
5
|
my ($class, @args) = @_; |
83
|
1
|
|
|
|
|
9
|
my $self = $class->SUPER::new(@args); |
84
|
1
|
|
|
|
|
7
|
$self->_initialize_io(@args); |
85
|
1
|
|
|
|
|
3
|
my ($format) = $self->_rearrange([qw(FORMAT)], @args); |
86
|
1
|
50
|
|
|
|
3
|
$format || $self->throw("format needed"); |
87
|
1
|
50
|
|
|
|
19
|
if(grep /^$format$/i, @FORMATS){ |
88
|
1
|
|
|
|
|
4
|
$self->format($format); |
89
|
|
|
|
|
|
|
}else{ |
90
|
0
|
|
|
|
|
0
|
$self->throw("Invalid format, [$format]"); |
91
|
|
|
|
|
|
|
} |
92
|
1
|
|
|
|
|
3
|
return $self; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub format { |
96
|
3
|
|
|
3
|
1
|
3
|
my $self = shift; |
97
|
3
|
100
|
|
|
|
7
|
return $self->{_format} = shift if(@_); |
98
|
2
|
|
|
|
|
7
|
return $self->{_format}; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head2 next_prediction |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Title : new |
104
|
|
|
|
|
|
|
Usage : |
105
|
|
|
|
|
|
|
while($result = $factory->next_prediction){ |
106
|
|
|
|
|
|
|
; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Returns : a Bio::SeqFeature::FeaturePair object where |
110
|
|
|
|
|
|
|
feature1 is the matched subsequence and |
111
|
|
|
|
|
|
|
feature2 is the PROSITE accession number. |
112
|
|
|
|
|
|
|
See . |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=cut |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub next_prediction { |
117
|
5
|
|
|
5
|
1
|
2740
|
my ($self) = @_; |
118
|
5
|
100
|
|
|
|
9
|
unless($self->_parsed){ |
119
|
1
|
|
|
|
|
3
|
$self->_parse; |
120
|
1
|
|
|
|
|
2
|
$self->_parsed(1); |
121
|
|
|
|
|
|
|
} |
122
|
5
|
|
|
|
|
5
|
return shift @{$self->{_matches}}; |
|
5
|
|
|
|
|
11
|
|
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub next_result { |
126
|
0
|
|
|
0
|
0
|
0
|
return shift->next_prediction; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub _parsed { |
130
|
6
|
|
|
6
|
|
6
|
my $self = shift; |
131
|
6
|
50
|
66
|
|
|
17
|
return $self->{_parsed} = 1 if @_ && $_[0]; |
132
|
5
|
|
|
|
|
10
|
return $self->{_parsed}; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub _parse { |
136
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
137
|
1
|
|
|
|
|
2
|
my $format = $self->format; |
138
|
1
|
50
|
|
|
|
3
|
if($self->format =~ /^fasta$/){ |
139
|
1
|
|
|
|
|
3
|
$self->_parse_fasta; |
140
|
|
|
|
|
|
|
}else{ |
141
|
0
|
|
|
|
|
0
|
$self->throw("the [$format] parser has not been written"); |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub _parse_fasta { |
146
|
1
|
|
|
1
|
|
2
|
my ($self) = @_; |
147
|
1
|
|
|
|
|
2
|
my @matches; |
148
|
|
|
|
|
|
|
my $fp; |
149
|
1
|
|
|
|
|
0
|
my $seq; |
150
|
1
|
|
|
|
|
9
|
while(defined($_ = $self->_readline)){ |
151
|
10
|
|
|
|
|
14
|
chop; |
152
|
10
|
100
|
|
|
|
27
|
if(/^\>([^>]+)/){ |
153
|
4
|
|
|
|
|
10
|
my $fasta_head = $1; |
154
|
4
|
50
|
|
|
|
16
|
if($fasta_head =~ /([^\/]+)\/(\d+)\-(\d+)(\s+)\:(\s+)(\S+)/){ |
155
|
4
|
|
|
|
|
7
|
my $q_id = $1; |
156
|
4
|
|
|
|
|
6
|
my $q_start = $2; |
157
|
4
|
|
|
|
|
5
|
my $q_end = $3; |
158
|
4
|
|
|
|
|
4
|
my $h_id = $6; |
159
|
4
|
100
|
|
|
|
7
|
if(defined $fp){ |
160
|
3
|
|
|
|
|
7
|
$self->_attach_seq($seq, $fp); |
161
|
3
|
|
|
|
|
5
|
push @matches, $fp; |
162
|
|
|
|
|
|
|
} |
163
|
4
|
|
|
|
|
17
|
$fp = Bio::SeqFeature::FeaturePair->new( |
164
|
|
|
|
|
|
|
-feature1 => Bio::SeqFeature::Generic->new( |
165
|
|
|
|
|
|
|
-seq_id => $q_id, |
166
|
|
|
|
|
|
|
-start => $q_start, |
167
|
|
|
|
|
|
|
-end => $q_end |
168
|
|
|
|
|
|
|
), |
169
|
|
|
|
|
|
|
-feature2 => Bio::SeqFeature::Generic->new( |
170
|
|
|
|
|
|
|
-seq_id => $h_id, |
171
|
|
|
|
|
|
|
-start => 0, |
172
|
|
|
|
|
|
|
-end => 0 |
173
|
|
|
|
|
|
|
) |
174
|
|
|
|
|
|
|
); |
175
|
4
|
|
|
|
|
14
|
$seq = ''; |
176
|
|
|
|
|
|
|
}else{ |
177
|
0
|
|
|
|
|
0
|
$self->throw("ERR:\t\[$_\]"); |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
}else{ # sequence lines, ignored |
180
|
6
|
|
|
|
|
11
|
$seq .= $_; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
} |
183
|
1
|
50
|
|
|
|
3
|
if(defined $fp){ |
184
|
1
|
|
|
|
|
3
|
$self->_attach_seq($seq, $fp); |
185
|
1
|
|
|
|
|
2
|
push @matches, $fp; |
186
|
|
|
|
|
|
|
} |
187
|
1
|
|
|
|
|
2
|
push @{$self->{_matches}}, @matches; |
|
1
|
|
|
|
|
4
|
|
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub _attach_seq { |
191
|
4
|
|
|
4
|
|
8
|
my ($self, $seq, $fp) = @_; |
192
|
4
|
50
|
|
|
|
6
|
if(defined $fp){ |
193
|
4
|
|
|
|
|
9
|
my $whole_seq = 'X' x ($fp->start-1); |
194
|
4
|
|
|
|
|
10
|
$whole_seq .= $seq; |
195
|
4
|
|
|
|
|
6
|
$fp->feature1->attach_seq( |
196
|
|
|
|
|
|
|
Bio::Seq->new(-seq => $whole_seq) |
197
|
|
|
|
|
|
|
); |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
1; |