File Coverage

Bio/Tools/pICalculator.pm
Criterion Covered Total %
statement 56 61 91.8
branch 11 20 55.0
condition 3 8 37.5
subroutine 13 13 100.0
pod 5 7 71.4
total 88 109 80.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::pICalculator
3             #
4             # Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved.
5             #
6             #
7             # You may distribute this module under the same terms as perl itself
8              
9             # POD documentation - main docs before the code
10              
11             =head1 NAME
12              
13             Bio::Tools::pICalculator - calculate the isoelectric point of a protein
14              
15             =head1 DESCRIPTION
16              
17             Calculates the isoelectric point of a protein, the pH at which there
18             is no overall charge on the protein. Calculates the charge on a protein
19             at a given pH. Can use built-in sets of pK values or custom pK sets.
20              
21             =head1 SYNOPSIS
22              
23             use Bio::Tools::pICalculator;
24             use Bio::SeqIO;
25              
26             my $in = Bio::SeqIO->new( -fh => \*STDIN ,
27             -format => 'Fasta' );
28              
29             my $calc = Bio::Tools::pICalculator->new(-places => 2,
30             -pKset => 'EMBOSS');
31              
32             while ( my $seq = $in->next_seq ) {
33             $calc->seq($seq);
34             my $iep = $calc->iep;
35             print sprintf( "%s\t%s\t%.2f\n",
36             $seq->id,
37             $iep,
38             $calc->charge_at_pH($iep) );
39              
40             for( my $i = 0; $i <= 14; $i += 0.5 ){
41             print sprintf( "pH = %.2f\tCharge = %.2f\n",
42             $i,
43             $calc->charge_at_pH($i) );
44             }
45             }
46              
47             =head1 SEE ALSO
48              
49             http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
50             http://emboss.sourceforge.net/apps/cvs/emboss/apps/iep.html
51             http://us.expasy.org/tools/pi_tool.html
52              
53             =head1 LIMITATIONS
54              
55             There are various sources for the pK values of the amino acids.
56             The set of pK values chosen will affect the pI reported.
57              
58             The charge state of each residue is assumed to be independent of
59             the others. Protein modifications (such as a phosphate group) that
60             have a charge are ignored.
61              
62             =head1 FEEDBACK
63              
64             =head2 Mailing Lists
65              
66             User feedback is an integral part of the evolution of this
67             and other Bioperl modules. Send your comments and suggestions
68             preferably to one of the Bioperl mailing lists.
69             Your participation is much appreciated.
70              
71             bioperl-l@bioperl.org - General discussion
72             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
73              
74             =head2 Bugs
75              
76             Report bugs to the Bioperl bug tracking system to help us keep track
77             the bugs and their resolution. Bug reports can be submitted via the
78             web:
79              
80             https://github.com/bioperl/bioperl-live/issues
81              
82             =head1 AUTHOR
83              
84             Mark Southern (mark_southern@merck.com). From an algorithm by David
85             Tabb found at http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf.
86             Modification for Bioperl, additional documentation by Brian Osborne.
87              
88             =head1 COPYRIGHT
89              
90             Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved. This module is
91             free software. It may be used, redistributed and/or modified under the terms
92             of the Perl Artistic License (see http://www.perl.com/perl/misc/Artistic.html)
93              
94             =head1 APPENDIX
95              
96             The rest of the documentation details each of the object methods.
97             Private methods are usually preceded by a _.
98              
99             =cut
100              
101             # Let the code begin...
102              
103             package Bio::Tools::pICalculator;
104 1     1   933 use strict;
  1         1  
  1         25  
105              
106              
107 1     1   2 use base qw(Bio::Root::Root);
  1         2  
  1         517  
108              
109             # pK values from the DTASelect program from Scripps
110             # http://fields.scripps.edu/DTASelect
111             my $DTASelect_pK = { N_term => 8.0,
112             K => 10.0, # Lys
113             R => 12.0, # Arg
114             H => 6.5, # His
115             D => 4.4, # Asp
116             E => 4.4, # Glu
117             C => 8.5, # Cys
118             Y => 10.0, # Tyr
119             C_term => 3.1
120             };
121              
122             # pK values from the iep program from EMBOSS
123             # http://emboss.sourceforge.net/apps/cvs/emboss/apps/iep.html
124             my $Emboss_pK = { N_term => 8.6,
125             K => 10.8, # Lys
126             R => 12.5, # Arg
127             H => 6.5, # His
128             D => 3.9, # Asp
129             E => 4.1, # Glu
130             C => 8.5, # Cys
131             Y => 10.1, # Tyr
132             C_term => 3.6
133             };
134              
135             =head2 desc
136              
137             Title : new
138             Usage : Bio::Tools::pICalculator->new
139             Function: Instantiates the Bio::Tools::pICalculator object
140             Example : $calc = Bio::Tools::pICalculator->new( -pKset => \%pKvalues,
141             # a Bio::Seq object
142             -seq => $seq,
143             -places => 2 );
144             or:
145              
146             $calc = Bio::Tools::pICalculator->new( -pKset => 'string',
147             # a Bio::Seq object
148             -seq => $seq,
149             -places => 1 );
150              
151             Constructs a new pICalculator. Arguments are a flattened hash.
152             Valid, optional keys are:
153              
154             pKset - A reference to a hash with key value pairs for the
155             pK values of the charged amino acids. Required keys
156             are:
157              
158             N_term C_term K R H D E C Y
159              
160             pKset - A string ( 'DTASelect' or 'EMBOSS' ) that will
161             specify an internal set of pK values to be used. The
162             default is 'EMBOSS'
163              
164             seq - A Bio::Seq sequence object to analyze
165              
166             places - The number of decimal places to use in the
167             isoelectric point calculation. The default is 2.
168              
169             Returns : The description
170             Args : The description or none
171              
172             =cut
173              
174             sub new {
175 1     1 1 4 my( $class, %opts ) = @_;
176 1         9 my $self = $class->SUPER::new(%opts);
177 1   33     6 $self = bless {}, ref $self || $self;
178 1 50       10 $self->seq( $opts{-seq} ) if exists $opts{-seq};
179 1   50     9 $self->pKset( $opts{-pKset} || 'EMBOSS' );
180 1 50       8 exists $opts{-places} ? $self->places( $opts{-places} ) :
181             $self->places(2);
182 1         5 return $self;
183             }
184              
185             =head2 seq
186              
187             Title : seq
188             Usage : $calc->seq($seqobj)
189             Function: Sets or returns the Bio::Seq used in the calculation
190             Example : $seqobj = Bio::Seq->new(-seq=>"gghhhmmm",-id=>"GHM");
191             $calc = Bio::Tools::pICalculator->new;
192             $calc->seq($seqobj);
193             Returns : Bio::Seq object
194             Args : Bio::Seq object or none
195              
196             =cut
197              
198             sub seq {
199 1     1 1 3 my( $this, $seq ) = @_;
200 1 50 33     12 unless( defined $seq && UNIVERSAL::isa($seq,'Bio::Seq') ){
201 0         0 $this->throw("$seq is not a valid Bio::Seq object");
202             }
203 1         4 $this->{-seq} = $seq;
204 1         3 $this->{-count} = count_charged_residues( $seq );
205 1         6 return $this->{-seq};
206             }
207              
208             =head2 pKset
209              
210             Title : pKset
211             Usage : $pkSet = $calc->pKSet(\%pKSet)
212             Function: Sets or returns the hash of pK values used in the calculation
213             Example : $calc->pKset('emboss')
214             Returns : reference to pKset hash
215             Args : The reference to a pKset hash, a string, or none. Examples:
216              
217             pKset - A reference to a hash with key value pairs for the
218             pK values of the charged amino acids. Required keys
219             are:
220              
221             N_term C_term K R H D E C Y
222              
223             pKset - A valid string ( 'DTASelect' or 'EMBOSS' ) that will
224             specify an internal set of pK values to be used. The
225             default is 'EMBOSS'
226              
227             =cut
228              
229             sub pKset {
230 1     1 1 3 my ( $this, $pKset ) = @_;
231 1 50       9 if( ref $pKset eq 'HASH' ){ # user defined pK values
    50          
    0          
232 0         0 $this->{-pKset} = $pKset;
233             }elsif( $pKset =~ /^emboss$/i ){ # from EMBOSS's iep program
234 1         4 $this->{-pKset} = $Emboss_pK;
235             }elsif( $pKset =~ /^dtaselect$/i ){ # from DTASelect (scripps)
236 0         0 $this->{-pKset} = $DTASelect_pK;
237             }else{ # default to EMBOSS
238 0         0 $this->{-pKset} = $Emboss_pK;
239             }
240 1         3 return $this->{-pKset};
241             }
242              
243             sub places {
244 1     1 0 3 my $this = shift;
245 1 50       5 $this->{-places} = shift if @_;
246 1         2 return $this->{-places};
247             }
248              
249             =head2 iep
250              
251             Title : iep
252             Usage : $calc->iep
253             Function: Returns the isoelectric point
254             Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
255             $calc->seq($seqobj);
256             $iep = $calc->iep;
257             Returns : The isoelectric point of the sequence in the Bio::Seq object
258             Args : None
259              
260             =cut
261              
262             sub iep {
263 2     2 1 406 my $this = shift;
264             return _calculate_iep($this->{-pKset},
265             $this->{-places},
266             $this->{-seq},
267             $this->{-count}
268 2         14 );
269             }
270              
271             =head2 charge_at_pH
272              
273             Title : charge_at_pH
274             Usage : $charge = $calc->charge_at_pH($pH)
275             Function: Sets or gets the description of the sequence
276             Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
277             $calc->seq($seqobj);
278             $charge = $calc->charge_at_ph("7");
279             Returns : The predicted charge at the given pH
280             Args : pH
281              
282             =cut
283              
284             sub charge_at_pH {
285 29     29 1 11533 my $this = shift;
286             return _calculate_charge_at_pH( shift, $this->{-pKset},
287 29         106 $this->{-count} );
288             }
289              
290             sub count_charged_residues {
291 1     1 0 2 my $seq = shift;
292 1         4 my $sequence = $seq->seq;
293 1         3 my $count;
294 1         3 for ( qw( K R H D E C Y ) ){ # charged AA's
295 7         109 $count->{$_}++ while $sequence =~ /$_/ig;
296             }
297 1         7 return $count;
298             }
299              
300             sub _calculate_iep {
301 2     2   3 my( $pK, $places, $seq, $count ) = @_;
302 2         4 my $pH = 7.0;
303 2         2 my $step = 3.5;
304 2         4 my $last_charge = 0.0;
305 2         5 my $format = "%.${places}f";
306              
307 2 50       8 unless( defined $count ){
308 0         0 $count = count_charged_residues($seq);
309             }
310 2         3 while(1){
311 26         40 my $charge = _calculate_charge_at_pH( $pH, $pK, $count );
312 26 100       239 last if sprintf($format,$charge) ==
313             sprintf($format,$last_charge);
314 24 100       48 $charge > 0 ? ( $pH += $step ) : ( $pH -= $step );
315 24         22 $step /= 2.0;
316 24         27 $last_charge = $charge;
317             }
318 2         20 return sprintf( $format, $pH );
319             }
320              
321             # it's the sum of all the partial charges for the
322             # termini and all of the charged aa's!
323             sub _calculate_charge_at_pH {
324 1     1   4 no warnings; # don't complain if a given key doesn't exist
  1         1  
  1         160  
325 55     55   77 my( $pH, $pK, $count ) = @_;
326             my $charge = _partial_charge( $pK->{N_term}, $pH )
327             + $count->{K} * _partial_charge( $pK->{K}, $pH )
328             + $count->{R} * _partial_charge( $pK->{R}, $pH )
329             + $count->{H} * _partial_charge( $pK->{H}, $pH )
330             - $count->{D} * _partial_charge( $pH, $pK->{D} )
331             - $count->{E} * _partial_charge( $pH, $pK->{E} )
332             - $count->{C} * _partial_charge( $pH, $pK->{C} )
333             - $count->{Y} * _partial_charge( $pH, $pK->{Y} )
334 55         96 - _partial_charge( $pH, $pK->{C_term} );
335 55         169 return $charge;
336             }
337              
338             # Concentration Ratio is 10**(pK - pH) for positive groups
339             # and 10**(pH - pK) for negative groups
340             sub _partial_charge {
341 495     495   670 my $cr = 10 ** ( $_[0] - $_[1] );
342 495         1065 return $cr / ( $cr + 1 );
343             }
344              
345             1;
346              
347             __END__