line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#----------------------------------------------------------------------------- |
2
|
|
|
|
|
|
|
# PACKAGE : Bio::Tools::Sigcleave |
3
|
|
|
|
|
|
|
# AUTHOR : Chris Dagdigian, dag@sonsorol.org |
4
|
|
|
|
|
|
|
# CREATED : Jan 28 1999 |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Copyright (c) 1997-9 bioperl, Chris Dagdigian and others. All Rights Reserved. |
7
|
|
|
|
|
|
|
# This module is free software; you can redistribute it and/or |
8
|
|
|
|
|
|
|
# modify it under the same terms as Perl itself. |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# _History_ |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# Object framework ripped from Steve Chervits's SeqPattern.pm |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
# Core EGCG Sigcleave emulation from perl code developed by |
15
|
|
|
|
|
|
|
# Danh Nguyen & Kamalakar Gulukota which itself was based |
16
|
|
|
|
|
|
|
# loosely on Colgrove's signal.c program. |
17
|
|
|
|
|
|
|
# |
18
|
|
|
|
|
|
|
# The overall idea is to replicate the output of the sigcleave |
19
|
|
|
|
|
|
|
# program which was distributed with the EGCG extension to the GCG sequence |
20
|
|
|
|
|
|
|
# analysis package. There is also an accessor method for just getting at |
21
|
|
|
|
|
|
|
# the raw results. |
22
|
|
|
|
|
|
|
# |
23
|
|
|
|
|
|
|
#----------------------------------------------------------------------------- |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 NAME |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Bio::Tools::Sigcleave - Bioperl object for sigcleave analysis |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 SYNOPSIS |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head2 Object Creation |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
use Bio::Tools::Sigcleave (); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# to keep the module backwar compatible, you can pass it a sequence string, but |
36
|
|
|
|
|
|
|
# there recommended say is to pass it a Seq object |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# this works |
39
|
|
|
|
|
|
|
$seq = "MVLLLILSVLLLKEDVRGSAQSSERRVVAHMPGDIIIGALFSVHHQPTVDKVHERKCGAVREQYGI"; |
40
|
|
|
|
|
|
|
$sig = Bio::Tools::Sigcleave->new(-seq => $seq, |
41
|
|
|
|
|
|
|
-type => 'protein', |
42
|
|
|
|
|
|
|
-threshold=>'3.5', |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
# but you do: |
45
|
|
|
|
|
|
|
$seqobj = Bio::PrimarySeq->new(-seq => $seq); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
$sig = Bio::Tools::Sigcleave->new(-seq => $seqobj, |
48
|
|
|
|
|
|
|
-threshold=>'3.5', |
49
|
|
|
|
|
|
|
); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# now you can detect procaryotic signal sequences as well as eucaryotic |
52
|
|
|
|
|
|
|
$sig->matrix('eucaryotic'); # or 'procaryotic' |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head2 Object Methods & Accessors |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# you can use this method to fine tune the threshod before printing out the results |
58
|
|
|
|
|
|
|
$sig->result_count: |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
%raw_results = $sig->signals; |
61
|
|
|
|
|
|
|
$formatted_output = $sig->pretty_print; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 DESCRIPTION |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
"Sigcleave" was a program distributed as part of the free EGCG add-on |
66
|
|
|
|
|
|
|
to earlier versions of the GCG Sequence Analysis package. A new |
67
|
|
|
|
|
|
|
implementation of the algorithm is now part of EMBOSS package. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
From the EGCG documentation: |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
SigCleave uses the von Heijne method to locate signal sequences, and |
72
|
|
|
|
|
|
|
to identify the cleavage site. The method is 95% accurate in |
73
|
|
|
|
|
|
|
resolving signal sequences from non-signal sequences with a cutoff |
74
|
|
|
|
|
|
|
score of 3.5, and 75-80% accurate in identifying the cleavage |
75
|
|
|
|
|
|
|
site. The program reports all hits above a minimum value. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
The EGCG Sigcleave program was written by Peter Rice (E-mail: |
78
|
|
|
|
|
|
|
pmr@sanger.ac.uk Post: Informatics Division, The Sanger Centre, |
79
|
|
|
|
|
|
|
Wellcome Trust Genome Campus, Hinxton, Cambs, CB10 1SA, UK). |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Since EGCG is no longer distributed for the latest versions of GCG, |
82
|
|
|
|
|
|
|
this code was developed to emulate the output of the original program |
83
|
|
|
|
|
|
|
as much as possible for those who lost access to sigcleave when |
84
|
|
|
|
|
|
|
upgrading to newer versions of GCG. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
There are 2 accessor methods for this object. "signals" will return a |
87
|
|
|
|
|
|
|
perl associative array containing the sigcleave scores keyed by amino |
88
|
|
|
|
|
|
|
acid position. "pretty_print" returns a formatted string similar to |
89
|
|
|
|
|
|
|
the output of the original sigcleave utility. |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
In both cases, the "threshold" setting controls the score reporting |
92
|
|
|
|
|
|
|
level. If no value for threshold is passed in by the user, the code |
93
|
|
|
|
|
|
|
defaults to a reporting value of 3.5. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
In this implemntation the accessor will never return any |
96
|
|
|
|
|
|
|
score/position pair which does not meet the threshold limit. This is |
97
|
|
|
|
|
|
|
the slightly different from the behaviour of the 8.1 EGCG sigcleave |
98
|
|
|
|
|
|
|
program which will report the highest of the under-threshold results |
99
|
|
|
|
|
|
|
if nothing else is found. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Example of pretty_print output: |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
SIGCLEAVE of sigtest from: 1 to 146 |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Report scores over 3.5 |
107
|
|
|
|
|
|
|
Maximum score 4.9 at residue 131 |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Sequence: FVILAAMSIQGSA-NLQTQWKSTASLALET |
110
|
|
|
|
|
|
|
| (signal) | (mature peptide) |
111
|
|
|
|
|
|
|
118 131 |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Other entries above 3.5 |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Maximum score 3.7 at residue 112 |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Sequence: CSRQLFGWLFCKV-HPGAIVFVILAAMSIQGSANLQTQWKSTASLALET |
118
|
|
|
|
|
|
|
| (signal) | (mature peptide) |
119
|
|
|
|
|
|
|
99 112 |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head1 FEEDBACK |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
When updating and maintaining a module, it helps to know that people |
125
|
|
|
|
|
|
|
are actually using it. Let us know if you find a bug, think this code |
126
|
|
|
|
|
|
|
is useful or have any improvements/features to suggest. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head2 Support |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
I |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
135
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
136
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
137
|
|
|
|
|
|
|
with code and data examples if at all possible. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head2 Reporting Bugs |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
142
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
143
|
|
|
|
|
|
|
web: |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head1 AUTHOR |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Chris Dagdigian, dag-at-sonsorol.org & others |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Heikki Lehvaslaiho, heikki-at-bioperl-dot-org |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head1 VERSION |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Bio::Tools::Sigcleave, $Id$ |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head1 COPYRIGHT |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Copyright (c) 1999 Chris Dagdigian & others. All Rights Reserved. |
162
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it |
163
|
|
|
|
|
|
|
under the same terms as Perl itself. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head1 REFERENCES / SEE ALSO |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
von Heijne G. (1986) "A new method for predicting signal sequences |
168
|
|
|
|
|
|
|
cleavage sites." Nucleic Acids Res. 14, 4683-4690. |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
von Heijne G. (1987) in "Sequence Analysis in Molecular Biology: |
171
|
|
|
|
|
|
|
Treasure Trove or Trivial Pursuit" (Acad. Press, (1987), 113-117). |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=head1 APPENDIX |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
The following documentation describes the various functions |
177
|
|
|
|
|
|
|
contained in this module. Some functions are for internal |
178
|
|
|
|
|
|
|
use and are not meant to be called by the user; they are |
179
|
|
|
|
|
|
|
preceded by an underscore ("_"). |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=cut |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# |
184
|
|
|
|
|
|
|
## |
185
|
|
|
|
|
|
|
### |
186
|
|
|
|
|
|
|
#### END of main POD documentation. |
187
|
|
|
|
|
|
|
### |
188
|
|
|
|
|
|
|
## |
189
|
|
|
|
|
|
|
# |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
package Bio::Tools::Sigcleave; |
192
|
|
|
|
|
|
|
|
193
|
1
|
|
|
1
|
|
728
|
use Bio::PrimarySeq; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
23
|
|
194
|
|
|
|
|
|
|
|
195
|
1
|
|
|
1
|
|
3
|
use base qw(Bio::Root::Root); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
55
|
|
196
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
16
|
|
197
|
1
|
|
|
1
|
|
3
|
use vars qw ($ID %WeightTable_euc %WeightTable_pro ); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1223
|
|
198
|
|
|
|
|
|
|
$ID = 'Bio::Tools::Sigcleave'; |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
%WeightTable_euc = ( |
201
|
|
|
|
|
|
|
#Sample: 161 aligned sequences |
202
|
|
|
|
|
|
|
# R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect |
203
|
|
|
|
|
|
|
'A' => [16, 13, 14, 15, 20, 18, 18, 17, 25, 15, 47, 6, 80, 18, 6, 14.5], |
204
|
|
|
|
|
|
|
'C' => [ 3, 6, 9, 7, 9, 14, 6, 8, 5, 6, 19, 3, 9, 8, 3, 4.5], |
205
|
|
|
|
|
|
|
'D' => [ 0, 0, 0, 0, 0, 0, 0, 0, 5, 3, 0, 5, 0, 10, 11, 8.9], |
206
|
|
|
|
|
|
|
'E' => [ 0, 0, 0, 1, 0, 0, 0, 0, 3, 7, 0, 7, 0, 13, 14, 10.0], |
207
|
|
|
|
|
|
|
'F' => [13, 9, 11, 11, 6, 7, 18, 13, 4, 5, 0, 13, 0, 6, 4, 5.6], |
208
|
|
|
|
|
|
|
'G' => [ 4, 4, 3, 6, 3, 13, 3, 2, 19, 34, 5, 7, 39, 10, 7, 12.1], |
209
|
|
|
|
|
|
|
'H' => [ 0, 0, 0, 0, 0, 1, 1, 0, 5, 0, 0, 6, 0, 4, 2, 3.4], |
210
|
|
|
|
|
|
|
'I' => [15, 15, 8, 6, 11, 5, 4, 8, 5, 1, 10, 5, 0, 8, 7, 7.4], |
211
|
|
|
|
|
|
|
'K' => [ 0, 0, 0, 1, 0, 0, 1, 0, 0, 4, 0, 2, 0, 11, 9, 11.3], |
212
|
|
|
|
|
|
|
'L' => [71, 68, 72, 79, 78, 45, 64, 49, 10, 23, 8, 20, 1, 8, 4, 12.1], |
213
|
|
|
|
|
|
|
'M' => [ 0, 3, 7, 4, 1, 6, 2, 2, 0, 0, 0, 1, 0, 1, 2, 2.7], |
214
|
|
|
|
|
|
|
'N' => [ 0, 1, 0, 1, 1, 0, 0, 0, 3, 3, 0, 10, 0, 4, 7, 7.1], |
215
|
|
|
|
|
|
|
'P' => [ 2, 0, 2, 0, 0, 4, 1, 8, 20, 14, 0, 1, 3, 0, 22, 7.4], |
216
|
|
|
|
|
|
|
'Q' => [ 0, 0, 0, 1, 0, 6, 1, 0, 10, 8, 0, 18, 3, 19, 10, 6.3], |
217
|
|
|
|
|
|
|
'R' => [ 2, 0, 0, 0, 0, 1, 0, 0, 7, 4, 0, 15, 0, 12, 9, 7.6], |
218
|
|
|
|
|
|
|
'S' => [ 9, 3, 8, 6, 13, 10, 15, 16, 26, 11, 23, 17, 20, 15, 10, 11.4], |
219
|
|
|
|
|
|
|
'T' => [ 2, 10, 5, 4, 5, 13, 7, 7, 12, 6, 17, 8, 6, 3, 10, 9.7], |
220
|
|
|
|
|
|
|
'V' => [20, 25, 15, 18, 13, 15, 11, 27, 0, 12, 32, 3, 0, 8, 17, 11.1], |
221
|
|
|
|
|
|
|
'W' => [ 4, 3, 3, 1, 1, 2, 6, 3, 1, 3, 0, 9, 0, 2, 0, 1.8], |
222
|
|
|
|
|
|
|
'Y' => [ 0, 1, 4, 0, 0, 1, 3, 1, 1, 2, 0, 5, 0, 1, 7, 5.6] |
223
|
|
|
|
|
|
|
); |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
%WeightTable_pro = ( |
226
|
|
|
|
|
|
|
#Sample: 36 aligned sequences |
227
|
|
|
|
|
|
|
# R -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 +1 +2 Expect |
228
|
|
|
|
|
|
|
'A' => [0, 8, 8, 9, 6, 7, 5, 6, 7, 7, 24, 2, 31, 18, 4, 3.2], |
229
|
|
|
|
|
|
|
'C' => [1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1.0], |
230
|
|
|
|
|
|
|
'D' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 2.0], |
231
|
|
|
|
|
|
|
'E' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 8, 2.2], |
232
|
|
|
|
|
|
|
'F' => [2, 4, 3, 4, 1, 1, 8, 0, 4, 1, 0, 7, 0, 1, 0, 1.3], |
233
|
|
|
|
|
|
|
'G' => [4, 2, 2, 2, 3, 5, 2, 4, 2, 2, 0, 2, 2, 1, 0, 2.7], |
234
|
|
|
|
|
|
|
'H' => [0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 7, 0, 1, 0, 0.8], |
235
|
|
|
|
|
|
|
'I' => [3, 1, 5, 1, 5, 0, 1, 3, 0, 0, 0, 0, 0, 0, 2, 1.7], |
236
|
|
|
|
|
|
|
'K' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 2.5], |
237
|
|
|
|
|
|
|
'L' => [8, 11, 9, 8, 9, 13, 1, 0, 2, 2, 1, 2, 0, 0, 1, 2.7], |
238
|
|
|
|
|
|
|
'M' => [0, 2, 1, 1, 3, 2, 3, 0, 1, 2, 0, 4, 0, 0, 1, 0.6], |
239
|
|
|
|
|
|
|
'N' => [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 3, 0, 1, 4, 1.6], |
240
|
|
|
|
|
|
|
'P' => [0, 1, 1, 1, 1, 1, 2, 3, 5, 2, 0, 0, 0, 0, 5, 1.7], |
241
|
|
|
|
|
|
|
'Q' => [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 1, 1.4], |
242
|
|
|
|
|
|
|
'R' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1.7], |
243
|
|
|
|
|
|
|
'S' => [1, 0, 1, 4, 4, 1, 5, 15, 5, 8, 5, 2, 2, 0, 0, 2.6], |
244
|
|
|
|
|
|
|
'T' => [2, 0, 4, 2, 2, 2, 2, 2, 5, 1, 3, 0, 1, 1, 2, 2.2], |
245
|
|
|
|
|
|
|
'V' => [5, 7, 1, 3, 1, 4, 7, 0, 0, 4, 3, 0, 0, 2, 0, 2.5], |
246
|
|
|
|
|
|
|
'W' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0.4], |
247
|
|
|
|
|
|
|
'Y' => [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 1.3] |
248
|
|
|
|
|
|
|
); |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
## |
252
|
|
|
|
|
|
|
## Now we calculate the _real_ values for the weight tables |
253
|
|
|
|
|
|
|
## |
254
|
|
|
|
|
|
|
## |
255
|
|
|
|
|
|
|
## yeah yeah yeah there is lots of math here that gets repeated |
256
|
|
|
|
|
|
|
## every single time a sigcleave object gets created. This is |
257
|
|
|
|
|
|
|
## a quick hack to make sure that we get the scores as accurate as |
258
|
|
|
|
|
|
|
## possible. Need all those significant digits.... |
259
|
|
|
|
|
|
|
## |
260
|
|
|
|
|
|
|
## suggestions for speedup aproaches welcome |
261
|
|
|
|
|
|
|
## |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
foreach my $i (keys %WeightTable_euc) { |
265
|
|
|
|
|
|
|
my $expected = $WeightTable_euc{$i}[15]; |
266
|
|
|
|
|
|
|
if ($expected > 0) { |
267
|
|
|
|
|
|
|
for (my $j=0; $j<16; $j++) { |
268
|
|
|
|
|
|
|
if ($WeightTable_euc{$i}[$j] == 0) { |
269
|
|
|
|
|
|
|
$WeightTable_euc{$i}[$j] = 1; |
270
|
|
|
|
|
|
|
if ($j == 10 || $j == 12) { |
271
|
|
|
|
|
|
|
$WeightTable_euc{$i}[$j] = 1.e-10; |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
$WeightTable_euc{$i}[$j] = log($WeightTable_euc{$i}[$j]/$expected); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
foreach my $i (keys %WeightTable_pro) { |
281
|
|
|
|
|
|
|
my $expected = $WeightTable_pro{$i}[15]; |
282
|
|
|
|
|
|
|
if ($expected > 0) { |
283
|
|
|
|
|
|
|
for (my $j=0; $j<16; $j++) { |
284
|
|
|
|
|
|
|
if ($WeightTable_pro{$i}[$j] == 0) { |
285
|
|
|
|
|
|
|
$WeightTable_pro{$i}[$j] = 1; |
286
|
|
|
|
|
|
|
if ($j == 10 || $j == 12) { |
287
|
|
|
|
|
|
|
$WeightTable_pro{$i}[$j] = 1.e-10; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
$WeightTable_pro{$i}[$j] = log($WeightTable_pro{$i}[$j]/$expected); |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
##################################################################################### |
296
|
|
|
|
|
|
|
## CONSTRUCTOR ## |
297
|
|
|
|
|
|
|
##################################################################################### |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
sub new { |
301
|
2
|
|
|
2
|
1
|
5
|
my ($class, @args) = @_; |
302
|
|
|
|
|
|
|
|
303
|
2
|
|
|
|
|
7
|
my $self = $class->SUPER::new(@args); |
304
|
|
|
|
|
|
|
#my $self = Bio::Seq->new(@args); |
305
|
|
|
|
|
|
|
|
306
|
2
|
|
|
|
|
8
|
my ($seq, $threshold, $matrix) = $self->_rearrange([qw(SEQ THRESHOLD MATRIX)],@args); |
307
|
|
|
|
|
|
|
|
308
|
2
|
100
|
|
|
|
7
|
defined $threshold && $self->threshold($threshold); |
309
|
2
|
50
|
|
|
|
4
|
$matrix && $self->matrix($matrix); |
310
|
2
|
100
|
|
|
|
4
|
$seq && $self->seq($seq); |
311
|
|
|
|
|
|
|
|
312
|
2
|
|
|
|
|
5
|
return $self; |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=head1 threshold |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
Title : threshold |
320
|
|
|
|
|
|
|
Usage : $value = $self->threshold |
321
|
|
|
|
|
|
|
Purpose : Read/write method sigcleave score reporting threshold. |
322
|
|
|
|
|
|
|
Returns : float. |
323
|
|
|
|
|
|
|
Argument : new value, float |
324
|
|
|
|
|
|
|
Throws : on non-number argument |
325
|
|
|
|
|
|
|
Comments : defaults to 3.5 |
326
|
|
|
|
|
|
|
See Also : n/a |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=cut |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
#---------------- |
331
|
|
|
|
|
|
|
sub threshold { |
332
|
|
|
|
|
|
|
#---------------- |
333
|
8
|
|
|
8
|
0
|
9
|
my ($self, $value) = @_; |
334
|
8
|
100
|
|
|
|
16
|
if( defined $value) { |
335
|
2
|
50
|
|
|
|
12
|
$self->throw("I need a number, not [$value]") |
336
|
|
|
|
|
|
|
if $value !~ /^[+-]?[\d\.]+$/; |
337
|
2
|
|
|
|
|
3
|
$self->{'_threshold'} = $value; |
338
|
|
|
|
|
|
|
} |
339
|
8
|
|
100
|
|
|
22
|
return $self->{'_threshold'} || 3.5 ; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=head1 matrix |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
Title : matrix |
345
|
|
|
|
|
|
|
Usage : $value = $self->matrix('procaryotic') |
346
|
|
|
|
|
|
|
Purpose : Read/write method sigcleave matrix. |
347
|
|
|
|
|
|
|
Returns : float. |
348
|
|
|
|
|
|
|
Argument : new value: 'eucaryotic' or 'procaryotic' |
349
|
|
|
|
|
|
|
Throws : on non-number argument |
350
|
|
|
|
|
|
|
Comments : defaults to 3.5 |
351
|
|
|
|
|
|
|
See Also : n/a |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=cut |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
#---------------- |
356
|
|
|
|
|
|
|
sub matrix { |
357
|
|
|
|
|
|
|
#---------------- |
358
|
7
|
|
|
7
|
0
|
9
|
my ($self, $value) = @_; |
359
|
7
|
100
|
|
|
|
14
|
if( defined $value) { |
360
|
2
|
50
|
66
|
|
|
8
|
$self->throw("I need 'eucaryotic' or 'procaryotic', not [$value]") |
361
|
|
|
|
|
|
|
unless $value eq 'eucaryotic' or $value eq 'procaryotic'; |
362
|
2
|
|
|
|
|
3
|
$self->{'_matrix'} = $value; |
363
|
|
|
|
|
|
|
} |
364
|
7
|
|
100
|
|
|
25
|
return $self->{'_matrix'} || 'eucaryotic' ; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=head1 seq |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Title : seq |
370
|
|
|
|
|
|
|
Usage : $value = $self->seq($seq_object) |
371
|
|
|
|
|
|
|
Purpose : set the Seq object to be used |
372
|
|
|
|
|
|
|
Returns : Seq object |
373
|
|
|
|
|
|
|
Argument : protein sequence or Seq object |
374
|
|
|
|
|
|
|
See Also : n/a |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=cut |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
#---------------- |
379
|
|
|
|
|
|
|
sub seq { |
380
|
|
|
|
|
|
|
#---------------- |
381
|
14
|
|
|
14
|
0
|
12
|
my ($self, $value) = @_; |
382
|
14
|
100
|
|
|
|
24
|
if( defined $value) { |
383
|
2
|
100
|
|
|
|
15
|
if ($value->isa('Bio::PrimarySeqI')) { |
384
|
1
|
|
|
|
|
3
|
$self->{'_seq'} = $value; |
385
|
|
|
|
|
|
|
} else { |
386
|
1
|
|
|
|
|
5
|
$self->{'_seq'} = Bio::PrimarySeq->new(-seq => $value, |
387
|
|
|
|
|
|
|
-alphabet => 'protein'); |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
} |
390
|
14
|
|
|
|
|
31
|
return $self->{'_seq'}; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head1 _Analyze |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Title : _Analyze |
396
|
|
|
|
|
|
|
Usage : N/A This is an internal method. Not meant to be called from outside |
397
|
|
|
|
|
|
|
: the package |
398
|
|
|
|
|
|
|
: |
399
|
|
|
|
|
|
|
Purpose : calculates sigcleave score and amino acid position for the |
400
|
|
|
|
|
|
|
: given protein sequence. The score reporting threshold can |
401
|
|
|
|
|
|
|
: be adjusted by passing in the "threshold" parameter during |
402
|
|
|
|
|
|
|
: object construction. If no threshold is passed in, the code |
403
|
|
|
|
|
|
|
: defaults to reporting any scores equal to or above 3.5 |
404
|
|
|
|
|
|
|
: |
405
|
|
|
|
|
|
|
Returns : nothing. results are added to the object |
406
|
|
|
|
|
|
|
Argument : none. |
407
|
|
|
|
|
|
|
Throws : nothing. |
408
|
|
|
|
|
|
|
Comments : nothing. |
409
|
|
|
|
|
|
|
See Also : n/a |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=cut |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
#---------------- |
414
|
|
|
|
|
|
|
sub _Analyze { |
415
|
|
|
|
|
|
|
#---------------- |
416
|
4
|
|
|
4
|
|
6
|
my($self) = @_; |
417
|
|
|
|
|
|
|
|
418
|
4
|
|
|
|
|
2
|
my %signals; |
419
|
4
|
|
|
|
|
6
|
my @hitWeight = (); |
420
|
4
|
|
|
|
|
4
|
my @hitsort = (); |
421
|
4
|
|
|
|
|
5
|
my @hitpos = (); |
422
|
4
|
|
|
|
|
3
|
my $maxSite = ""; |
423
|
4
|
|
|
|
|
3
|
my $seqPos = ""; |
424
|
4
|
|
|
|
|
4
|
my $istart = ""; |
425
|
4
|
|
|
|
|
4
|
my $iend = ""; |
426
|
4
|
|
|
|
|
4
|
my $icol = ""; |
427
|
4
|
|
|
|
|
3
|
my $i = ""; |
428
|
4
|
|
|
|
|
3
|
my $weight = ""; |
429
|
4
|
|
|
|
|
4
|
my $k = 0; |
430
|
4
|
|
|
|
|
2
|
my $c = 0; |
431
|
4
|
|
|
|
|
5
|
my $seqBegin = 0; |
432
|
4
|
|
|
|
|
2
|
my $pVal = -13; |
433
|
4
|
|
|
|
|
3
|
my $nVal = 2; |
434
|
4
|
|
|
|
|
4
|
my $nHits = 0; |
435
|
4
|
|
|
|
|
6
|
my $seqEnd = $self->seq->length; |
436
|
4
|
|
|
|
|
8
|
my $pep = $self->seq->seq; |
437
|
4
|
|
|
|
|
6
|
my $minWeight = $self->threshold; |
438
|
4
|
|
|
|
|
6
|
my $matrix = $self->matrix; |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
## The weight table is keyed by UPPERCASE letters so we uppercase |
441
|
|
|
|
|
|
|
## the pep string because we don't want to alter the actual object |
442
|
|
|
|
|
|
|
## sequence. |
443
|
|
|
|
|
|
|
|
444
|
4
|
|
|
|
|
6
|
$pep =~ tr/a-z/A-Z/; |
445
|
|
|
|
|
|
|
|
446
|
4
|
|
|
|
|
9
|
for ($seqPos = $seqBegin; $seqPos < $seqEnd; $seqPos++) { |
447
|
264
|
100
|
|
|
|
269
|
$istart = (0 > $seqPos + $pVal)? 0 : $seqPos + $pVal; |
448
|
264
|
100
|
|
|
|
268
|
$iend = ($seqPos + $nVal - 1 < $seqEnd)? $seqPos + $nVal - 1 : $seqEnd; |
449
|
264
|
|
|
|
|
189
|
$icol= $iend - $istart + 1; |
450
|
264
|
|
|
|
|
135
|
$weight = 0.00; |
451
|
264
|
|
|
|
|
310
|
for ($k=0; $k<$icol; $k++) { |
452
|
3596
|
|
|
|
|
2567
|
$c = substr($pep, $istart + $k, 1); |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
## CD: The if(defined) stuff was put in here because Sigcleave.pm |
455
|
|
|
|
|
|
|
## CD: kept getting warnings about undefined vals during 'make test' ... |
456
|
3596
|
50
|
|
|
|
2911
|
if ($matrix eq 'eucaryotic') { |
457
|
3596
|
100
|
|
|
|
6450
|
$weight += $WeightTable_euc{$c}[$k] if defined $WeightTable_euc{$c}[$k]; |
458
|
|
|
|
|
|
|
} else { |
459
|
0
|
0
|
|
|
|
0
|
$weight += $WeightTable_pro{$c}[$k] if defined $WeightTable_pro{$c}[$k]; |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
} |
462
|
264
|
100
|
|
|
|
521
|
$signals{$seqPos+1} = sprintf ("%.1f", $weight) if $weight >= $minWeight; |
463
|
|
|
|
|
|
|
} |
464
|
4
|
|
|
|
|
33
|
$self->{"_signal_scores"} = { %signals }; |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=head1 signals |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
Title : signals |
471
|
|
|
|
|
|
|
Usage : %sigcleave_results = $sig->signals; |
472
|
|
|
|
|
|
|
: |
473
|
|
|
|
|
|
|
Purpose : Accessor method for sigcleave results |
474
|
|
|
|
|
|
|
: |
475
|
|
|
|
|
|
|
Returns : Associative array. The key value represents the amino acid position |
476
|
|
|
|
|
|
|
: and the value represents the score. Only scores that |
477
|
|
|
|
|
|
|
: are greater than or equal to the THRESHOLD value are reported. |
478
|
|
|
|
|
|
|
: |
479
|
|
|
|
|
|
|
Argument : none. |
480
|
|
|
|
|
|
|
Throws : none. |
481
|
|
|
|
|
|
|
Comments : none. |
482
|
|
|
|
|
|
|
See Also : THRESHOLD |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=cut |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
#---------------- |
487
|
|
|
|
|
|
|
sub signals { |
488
|
|
|
|
|
|
|
#---------------- |
489
|
3
|
|
|
3
|
0
|
4
|
my $self = shift; |
490
|
3
|
|
|
|
|
3
|
my %results; |
491
|
|
|
|
|
|
|
my $position; |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
# do the calculations |
494
|
3
|
|
|
|
|
5
|
$self->_Analyze; |
495
|
|
|
|
|
|
|
|
496
|
3
|
|
|
|
|
6
|
foreach $position ( sort keys %{ $self->{'_signal_scores'} } ) { |
|
3
|
|
|
|
|
17
|
|
497
|
15
|
|
|
|
|
16
|
$results{$position} = $self->{'_signal_scores'}{$position}; |
498
|
|
|
|
|
|
|
} |
499
|
3
|
|
|
|
|
24
|
return %results; |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=head1 result_count |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
Title : result_count |
506
|
|
|
|
|
|
|
Usage : $count = $sig->result_count; |
507
|
|
|
|
|
|
|
: |
508
|
|
|
|
|
|
|
Purpose : Accessor method for sigcleave results |
509
|
|
|
|
|
|
|
: |
510
|
|
|
|
|
|
|
Returns : Integer, number of results above the threshold |
511
|
|
|
|
|
|
|
: |
512
|
|
|
|
|
|
|
Argument : none. |
513
|
|
|
|
|
|
|
Throws : none. |
514
|
|
|
|
|
|
|
Comments : none. |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
See Also : THRESHOLD |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=cut |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
#---------------- |
521
|
|
|
|
|
|
|
sub result_count { |
522
|
|
|
|
|
|
|
#---------------- |
523
|
1
|
|
|
1
|
0
|
2
|
my $self = shift; |
524
|
1
|
|
|
|
|
3
|
$self->_Analyze; |
525
|
1
|
|
|
|
|
3
|
return keys %{ $self->{'_signal_scores'} }; |
|
1
|
|
|
|
|
6
|
|
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
=head1 pretty_print |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
Title : pretty_print |
532
|
|
|
|
|
|
|
Usage : $output = $sig->pretty_print; |
533
|
|
|
|
|
|
|
: print $sig->pretty_print; |
534
|
|
|
|
|
|
|
: |
535
|
|
|
|
|
|
|
Purpose : Emulates the output of the EGCG Sigcleave |
536
|
|
|
|
|
|
|
: utility. |
537
|
|
|
|
|
|
|
: |
538
|
|
|
|
|
|
|
Returns : A formatted string. |
539
|
|
|
|
|
|
|
Argument : none. |
540
|
|
|
|
|
|
|
Throws : none. |
541
|
|
|
|
|
|
|
Comments : none. |
542
|
|
|
|
|
|
|
See Also : n/a |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=cut |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
#---------------- |
547
|
|
|
|
|
|
|
sub pretty_print { |
548
|
|
|
|
|
|
|
#---------------- |
549
|
1
|
|
|
1
|
0
|
2
|
my $self = shift; |
550
|
1
|
|
|
|
|
1
|
my $pos; |
551
|
|
|
|
|
|
|
my $output; |
552
|
1
|
|
|
|
|
2
|
my $cnt = 1; |
553
|
1
|
|
|
|
|
10
|
my %results = $self->signals; |
554
|
1
|
|
|
|
|
3
|
my @hits = keys %results; |
555
|
1
|
|
|
|
|
2
|
my $hitcount = $#hits; $hitcount++; |
|
1
|
|
|
|
|
1
|
|
556
|
1
|
|
|
|
|
1
|
my $thresh = $self->threshold; |
557
|
1
|
|
50
|
|
|
2
|
my $seqlen = $self->seq->length || 0; |
558
|
1
|
|
50
|
|
|
4
|
my $name = $self->seq->id || 'NONAME'; |
559
|
1
|
|
|
|
|
2
|
my $pep = $self->seq->seq; |
560
|
1
|
|
|
|
|
2
|
$pep =~ tr/a-z/A-Z/; |
561
|
|
|
|
|
|
|
|
562
|
1
|
|
|
|
|
3
|
$output = "SIGCLEAVE of $name from: 1 to $seqlen\n\n"; |
563
|
|
|
|
|
|
|
|
564
|
1
|
50
|
|
|
|
2
|
if ($hitcount > 0) { |
565
|
1
|
|
|
|
|
2
|
$output .= "Report scores over $thresh\n"; |
566
|
1
|
|
|
|
|
4
|
foreach $pos ((sort { $results{$b} cmp $results{$a} } keys %results)) { |
|
5
|
|
|
|
|
6
|
|
567
|
5
|
|
|
|
|
5
|
my $start = $pos - 15; |
568
|
5
|
50
|
|
|
|
9
|
$start = 1 if $start < 1; |
569
|
5
|
|
|
|
|
7
|
my $sig = substr($pep,$start -1,$pos-$start ); |
570
|
|
|
|
|
|
|
|
571
|
5
|
|
|
|
|
20
|
$output .= sprintf ("Maximum score %1.1f at residue %3d\n",$results{$pos},$pos); |
572
|
5
|
|
|
|
|
2
|
$output .= "\n"; |
573
|
5
|
|
|
|
|
6
|
$output .= " Sequence: "; |
574
|
5
|
|
|
|
|
4
|
$output .= $sig; |
575
|
5
|
|
|
|
|
4
|
$output .= "-" x (15- length($sig)); |
576
|
5
|
|
|
|
|
5
|
$output .= "-"; |
577
|
5
|
|
|
|
|
6
|
$output .= substr($pep,$pos-1,50); |
578
|
5
|
|
|
|
|
2
|
$output .= "\n"; |
579
|
5
|
|
|
|
|
5
|
$output .= " " x 12; |
580
|
5
|
|
|
|
|
5
|
$output .= "| \(signal\) | \(mature peptide\)\n"; |
581
|
5
|
|
|
|
|
6
|
$output .= sprintf(" %3d %3d\n\n",$start,$pos); |
582
|
|
|
|
|
|
|
|
583
|
5
|
100
|
66
|
|
|
16
|
if (($hitcount > 1) && ($cnt == 1)) { |
584
|
1
|
|
|
|
|
2
|
$output .= " Other entries above $thresh\n\n"; |
585
|
|
|
|
|
|
|
} |
586
|
5
|
|
|
|
|
6
|
$cnt++; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
} |
589
|
1
|
|
|
|
|
10
|
$output; |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
1; |
594
|
|
|
|
|
|
|
__END__ |