|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # BioPerl module for Bio::Align::PairwiseStatistics  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Please direct questions and support issues to    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Cared for by Jason Stajich   | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Copyright Jason Stajich  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # You may distribute this module under the same terms as perl itself  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # POD documentation - main docs before the code  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::Align::PairwiseStatistics - Base statistic object for Pairwise Alignments  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use strict;  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $stats = Bio::Align::PairwiseStatistics->new();  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # get alignment object of two sequences somehow  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $pwaln;  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print $stats->number_of_comparable_bases($pwaln);  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $score = $stats->score_nuc($pwaln);  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Calculate pairwise statistics.  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 FEEDBACK  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Mailing Lists  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 User feedback is an integral part of the evolution of this and other  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bioperl modules. Send your comments and suggestions preferably to  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the Bioperl mailing list.  Your participation is much appreciated.  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bioperl-l@bioperl.org                  - General discussion  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Support   | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Please direct usage questions or support issues to the mailing list:  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 rather than to the module maintainer directly. Many experienced and   | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reponsive experts will be able look at the problem and quickly   | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 address it. Please include a thorough description of the problem   | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with code and data examples if at all possible.  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Reporting Bugs  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Report bugs to the Bioperl bug tracking system to help us keep track  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the bugs and their resolution. Bug reports can be submitted via the  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 web:  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   https://github.com/bioperl/bioperl-live/issues  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR - Jason Stajich  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Email jason-at-bioperl-dot-org  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 APPENDIX  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The rest of the documentation details each of the object methods.  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Internal methods are usually preceded with a _  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Let the code begin...  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::Align::PairwiseStatistics;  | 
| 
79
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
13
 | 
 use vars qw($GapChars);  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
124
 | 
    | 
| 
80
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
11
 | 
 use strict;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
75
 | 
    | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
83
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
49
 | 
 BEGIN { $GapChars = '(\.|\-)'; }  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
85
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
11
 | 
 use base qw(Bio::Root::Root Bio::Align::StatisticsI);  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1126
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 number_of_comparable_bases  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : number_of_comparable_bases  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $bases = $stat->number_of_comparable_bases($aln);  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Returns the count of the number of bases that can be  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            compared (L) in this alignment ( length - gaps)  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : integer  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub number_of_comparable_bases{  | 
| 
100
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
  
1
  
 | 
11
 | 
    my ($self,$aln) = @_;  | 
| 
101
 | 
8
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
44
 | 
   if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
102
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->throw("Must provide a Bio::Align::AlignI compliant object to ".  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       "Bio::Align::PairwiseStatistics");  | 
| 
104
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        return 0;  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ( $aln->num_sequences != 2 ) {   | 
| 
106
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->throw("Only pairwise calculations supported. Found ".  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $aln->num_sequences." sequences in alignment\n");  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
109
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    my $L = $aln->length - $self->number_of_gaps($aln);  | 
| 
110
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    return $L;  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 number_of_differences  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : number_of_differences  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $nd = $stat->number_of_distances($aln);  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Returns the number of differences between two sequences  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : integer  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub number_of_differences{  | 
| 
125
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
3
 | 
    my ($self,$aln) = @_;  | 
| 
126
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
16
 | 
     if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
127
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->throw("Must provide a Bio::Align::AlignI compliant object to ".  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       "Bio::Align::PairwiseStatistics");  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ( $aln->num_sequences != 2 ) {   | 
| 
130
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->throw("Only pairwise calculations supported. Found ".  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $aln->num_sequences." sequences in alignment\n");  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
133
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    my (@seqs);  | 
| 
134
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   foreach my $seq ( $aln->each_seq ) {  | 
| 
135
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
        push @seqs, [ split(//,$seq->seq())];  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
137
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    my $firstseq = shift @seqs;  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #my $secondseq = shift @seqs;  | 
| 
139
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    my $diffcount = 0;  | 
| 
140
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    for (my $i = 0;$i<$aln->length; $i++ ) {  | 
| 
141
 | 
383
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1041
 | 
     next if ( $firstseq->[$i]  =~ /^$GapChars$/ );  | 
| 
142
 | 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
580
 | 
        foreach my $seq ( @seqs ) {  | 
| 
143
 | 
356
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
672
 | 
       next if ( $seq->[$i]  =~ /^$GapChars$/ );  | 
| 
144
 | 
343
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
808
 | 
 	   if( $firstseq->[$i] ne $seq->[$i] ) {  | 
| 
145
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
79
 | 
 	       $diffcount++;  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
149
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
    return $diffcount;  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 number_of_gaps  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : number_of_gaps  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $nd = $stat->number_of_gaps($aln);  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Returns the number of gapped positions among sequences in alignment  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : integer  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub number_of_gaps{  | 
| 
164
 | 
10
 | 
 
 | 
 
 | 
  
10
  
 | 
  
1
  
 | 
11
 | 
    my ($self,$aln) = @_;  | 
| 
165
 | 
10
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
54
 | 
   if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
166
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->throw("Must provide a Bio::Align::AlignI compliant object to ".  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       "Bio::Align::PairwiseStatistics");  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ( $aln->num_sequences != 2 ) {   | 
| 
169
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->throw("Only pairwise calculations supported. Found ".  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $aln->num_sequences." sequences in alignment\n");  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
172
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    my $gapline = $aln->gap_line;  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # this will count the number of '-' characters  | 
| 
174
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62
 | 
    return $gapline =~ tr/-/-/;  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 score_nuc  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : score_nuc  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $score = $stat->score_nuc($aln);  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              or  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            my $score = $stat->score_nuc(  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              -aln =>$aln,  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              -match    => 1,  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              -mismatch => -1,  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              -gap_open => -1,  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              -gap_ext  => -1  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            );  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculate the score of an alignment of 2 nucleic acid sequences. The  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            scoring parameters can be specified. Otherwise the blastn default  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            parameters are used: match = 2, mismatch = -3, gap opening = -5, gap  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            extension = -2  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : alignment score (number)  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            match score [optional]  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            mismatch score [optional]  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            gap opening score [optional]  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            gap extension score [optional]  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub score_nuc {  | 
| 
204
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
7
 | 
   my ($self, @args) = @_;  | 
| 
205
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
   my ( $aln, $match, $mismatch, $gap_open, $gap_ext) = $self->_rearrange( [qw(  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ALN MATCH MISMATCH GAP_OPEN GAP_EXT)], @args );  | 
| 
207
 | 
4
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
31
 | 
   if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
208
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->throw("Must provide a Bio::Align::AlignI compliant object to ".  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       "Bio::Align::PairwiseStatistics");  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ( $aln->num_sequences != 2 ) {   | 
| 
211
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->throw("Only pairwise calculations supported. Found ".  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $aln->num_sequences." sequences in alignment\n");  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
214
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
   my $seq1 = $aln->get_seq_by_pos(1);  | 
| 
215
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   my $seq2 = $aln->get_seq_by_pos(2);  | 
| 
216
 | 
4
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
9
 | 
   if (! ( ($seq1->alphabet eq 'dna' || $seq1->alphabet eq 'rna') &&  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ($seq2->alphabet eq 'dna' || $seq2->alphabet eq 'rna') )) {  | 
| 
218
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->throw("Can only score nucleic acid alignments");  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
220
 | 
4
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
11
 | 
   $match    ||=  2; # Blastn scoring defaults  | 
| 
221
 | 
4
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
7
 | 
   $mismatch ||= -3;  | 
| 
222
 | 
4
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
8
 | 
   $gap_open ||= -5;  | 
| 
223
 | 
4
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
7
 | 
   $gap_ext  ||= -2;  | 
| 
224
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   my $score = 0;  | 
| 
225
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   my $prevres1 = '-';  | 
| 
226
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   my $prevres2 = '-';  | 
| 
227
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
   for (my $pos = 1 ; $pos <= $aln->length ; $pos++) {  | 
| 
228
 | 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
906
 | 
     my $res1 = $seq1->subseq($pos, $pos);  | 
| 
229
 | 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
891
 | 
     my $res2 = $seq2->subseq($pos, $pos);  | 
| 
230
 | 
766
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1947
 | 
     if (!($res1 eq '-' || $res2 eq '-')) { # no gap  | 
| 
231
 | 
686
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
620
 | 
       if ($res1 eq $res2) { # same residue  | 
| 
232
 | 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
481
 | 
         $score += $match;  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else { # other residue  | 
| 
234
 | 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
         $score += $mismatch;  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else { # open or ext gap?  | 
| 
237
 | 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
       my $open = 0;  | 
| 
238
 | 
80
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
174
 | 
       if (!($res1 eq '-' && $res2 eq '-')) { # exactly one gap  | 
| 
239
 | 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
54
 | 
         my $prevres = $prevres1;  | 
| 
240
 | 
80
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
131
 | 
         $prevres = $prevres2 if $res2 eq '-';  | 
| 
241
 | 
80
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
104
 | 
         $open = 1 unless $prevres eq '-';  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else { # 2 gaps  | 
| 
243
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
         $open = 1 unless $prevres1 eq '-' && $prevres2 eq '-';  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
245
 | 
80
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
78
 | 
       if ($open) {  | 
| 
246
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
         $score += $gap_open; # gap opening  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
248
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
         $score += $gap_ext; # gap extension  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
251
 | 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
486
 | 
     $prevres1 = $res1;  | 
| 
252
 | 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1152
 | 
     $prevres2 = $res2;  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
254
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
   return $score;  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  |