| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Bio::RNA::Barriers::RateMatrix; |
|
2
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
|
3
|
|
|
|
|
|
|
|
|
4
|
11
|
|
|
11
|
|
310
|
use 5.012; |
|
|
11
|
|
|
|
|
46
|
|
|
5
|
11
|
|
|
11
|
|
73
|
use strict; |
|
|
11
|
|
|
|
|
25
|
|
|
|
11
|
|
|
|
|
291
|
|
|
6
|
11
|
|
|
11
|
|
59
|
use warnings; |
|
|
11
|
|
|
|
|
25
|
|
|
|
11
|
|
|
|
|
371
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
11
|
|
|
11
|
|
65
|
use Moose; |
|
|
11
|
|
|
|
|
25
|
|
|
|
11
|
|
|
|
|
97
|
|
|
9
|
11
|
|
|
11
|
|
80480
|
use MooseX::StrictConstructor; |
|
|
11
|
|
|
|
|
34
|
|
|
|
11
|
|
|
|
|
108
|
|
|
10
|
11
|
|
|
11
|
|
38874
|
use namespace::autoclean; |
|
|
11
|
|
|
|
|
33
|
|
|
|
11
|
|
|
|
|
130
|
|
|
11
|
11
|
|
|
11
|
|
1192
|
use Moose::Util::TypeConstraints qw(enum subtype as where message); |
|
|
11
|
|
|
|
|
36
|
|
|
|
11
|
|
|
|
|
125
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
11
|
|
|
11
|
|
10497
|
use autodie qw(:all); |
|
|
11
|
|
|
|
|
28
|
|
|
|
11
|
|
|
|
|
101
|
|
|
14
|
11
|
|
|
11
|
|
66691
|
use overload '""' => \&stringify; |
|
|
11
|
|
|
|
|
29
|
|
|
|
11
|
|
|
|
|
126
|
|
|
15
|
11
|
|
|
11
|
|
903
|
use Scalar::Util qw( reftype looks_like_number ); |
|
|
11
|
|
|
|
|
25
|
|
|
|
11
|
|
|
|
|
956
|
|
|
16
|
11
|
|
|
11
|
|
105
|
use List::Util qw( all uniqnum ); |
|
|
11
|
|
|
|
|
26
|
|
|
|
11
|
|
|
|
|
12852
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
enum __PACKAGE__ . 'RateMatrixType', [qw(TXT BIN)]; |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# Natural number type directly from the Moose docs. |
|
21
|
|
|
|
|
|
|
subtype 'PosInt', |
|
22
|
|
|
|
|
|
|
as 'Int', |
|
23
|
|
|
|
|
|
|
where { $_ > 0 }, |
|
24
|
|
|
|
|
|
|
message { "The number you provided, $_, was not a positive number" }; |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
has 'file_name' => ( |
|
27
|
|
|
|
|
|
|
is => 'ro', |
|
28
|
|
|
|
|
|
|
isa => 'Str', |
|
29
|
|
|
|
|
|
|
predicate => 'has_file_name', |
|
30
|
|
|
|
|
|
|
); |
|
31
|
|
|
|
|
|
|
has 'file_type' => ( |
|
32
|
|
|
|
|
|
|
is => 'rw', |
|
33
|
|
|
|
|
|
|
isa => __PACKAGE__ . 'RateMatrixType', |
|
34
|
|
|
|
|
|
|
required => 1, |
|
35
|
|
|
|
|
|
|
); |
|
36
|
|
|
|
|
|
|
has '_file_handle' => ( |
|
37
|
|
|
|
|
|
|
is => 'ro', |
|
38
|
|
|
|
|
|
|
isa => 'FileHandle', |
|
39
|
|
|
|
|
|
|
init_arg => 'file_handle', |
|
40
|
|
|
|
|
|
|
lazy => 1, |
|
41
|
|
|
|
|
|
|
builder => '_build_file_handle', |
|
42
|
|
|
|
|
|
|
); |
|
43
|
|
|
|
|
|
|
# Splice rate matrix directly when reading the data. This can read big |
|
44
|
|
|
|
|
|
|
# matrices when only keeping a few entries. |
|
45
|
|
|
|
|
|
|
has 'splice_on_parsing' => ( |
|
46
|
|
|
|
|
|
|
is => 'ro', |
|
47
|
|
|
|
|
|
|
isa => 'ArrayRef[PosInt]', |
|
48
|
|
|
|
|
|
|
predicate => 'was_spliced_on_parsing', |
|
49
|
|
|
|
|
|
|
); |
|
50
|
|
|
|
|
|
|
has '_data' => (is => 'ro', lazy => 1, builder => '_build_data'); |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub BUILD { |
|
53
|
11
|
|
|
11
|
0
|
106907
|
my $self = shift; |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Enforce data is read from handle immediately despite laziness. |
|
56
|
11
|
|
|
|
|
43
|
$self->dim; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# Read the actual rate data from the input file and construct the |
|
60
|
|
|
|
|
|
|
# matrix from it. |
|
61
|
|
|
|
|
|
|
sub _build_data { |
|
62
|
11
|
|
|
11
|
|
22
|
my $self = shift; |
|
63
|
|
|
|
|
|
|
|
|
64
|
11
|
|
|
|
|
23
|
my $rate_matrix; |
|
65
|
11
|
100
|
|
|
|
343
|
if ($self->file_type eq 'TXT') { |
|
|
|
50
|
|
|
|
|
|
|
66
|
6
|
|
|
|
|
190
|
$rate_matrix = __PACKAGE__->read_text_rate_matrix( |
|
67
|
|
|
|
|
|
|
$self->_file_handle, |
|
68
|
|
|
|
|
|
|
$self->splice_on_parsing, |
|
69
|
|
|
|
|
|
|
); |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
elsif ($self->file_type eq 'BIN') { |
|
72
|
5
|
|
|
|
|
156
|
$rate_matrix = __PACKAGE__->read_bin_rate_matrix( |
|
73
|
|
|
|
|
|
|
$self->_file_handle, |
|
74
|
|
|
|
|
|
|
$self->splice_on_parsing, |
|
75
|
|
|
|
|
|
|
); |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
else { |
|
78
|
0
|
|
|
|
|
0
|
confess "Unknown file type, that's a bug..."; |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
|
|
81
|
9
|
|
|
|
|
347
|
return $rate_matrix; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# Class method. Reads a rate matrix in text format from the passed file |
|
85
|
|
|
|
|
|
|
# handle and constructs a matrix (2-dim array) from it. Returns a |
|
86
|
|
|
|
|
|
|
# reference to the constructed rate matrix. |
|
87
|
|
|
|
|
|
|
# Arguments: |
|
88
|
|
|
|
|
|
|
# input_matrix_fh: file handle to text file containing rate matrix |
|
89
|
|
|
|
|
|
|
# splice_to: ORDERED set of states which are to be kept. The other |
|
90
|
|
|
|
|
|
|
# states are pruned from the matrix on-the-fly while parsing. |
|
91
|
|
|
|
|
|
|
# This saves time and memory. |
|
92
|
|
|
|
|
|
|
sub read_text_rate_matrix { |
|
93
|
6
|
|
|
6
|
1
|
22
|
my ($class, $input_matrix_fh, $splice_to_ref) = @_; |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# During parsing, splice the selected rows / columns. Make 0-based. |
|
96
|
6
|
|
50
|
|
|
15
|
my @splice_to_rows = @{ $splice_to_ref // [] }; # 1-based, modified |
|
|
6
|
|
|
|
|
55
|
|
|
97
|
6
|
|
|
|
|
21
|
my @splice_to_cols = map {$_ - 1} @splice_to_rows; # 0-based indices |
|
|
0
|
|
|
|
|
0
|
|
|
98
|
|
|
|
|
|
|
|
|
99
|
6
|
|
|
|
|
14
|
my (@rate_matrix, $matrix_dim); |
|
100
|
6
|
|
|
|
|
233
|
ROW: while (defined (my $line = <$input_matrix_fh>)) { |
|
101
|
16
|
50
|
|
|
|
54
|
if (defined $splice_to_ref) { |
|
102
|
0
|
0
|
|
|
|
0
|
last unless @splice_to_rows; # we're done! |
|
103
|
0
|
0
|
|
|
|
0
|
next ROW if $. != $splice_to_rows[0]; # this row is not kept |
|
104
|
0
|
|
|
|
|
0
|
shift @splice_to_rows; # remove the leading index |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
16
|
|
|
|
|
75
|
my @row = split q{ }, $line; # awk-style splitting |
|
108
|
|
|
|
|
|
|
# Since the diagonal element may be more or less anything, we need |
|
109
|
|
|
|
|
|
|
# to check it separately (e.g. to not choke on BHGbuilder output). |
|
110
|
16
|
|
|
|
|
91
|
my @row_no_diag = @row[0..($.-2), ($.)..$#row]; # $. is 1-based |
|
111
|
|
|
|
|
|
|
confess 'Input file contains non-numeric or negative input on ', |
|
112
|
|
|
|
|
|
|
"line $.:\n$line" |
|
113
|
|
|
|
|
|
|
unless looks_like_number $row[$.-1] # diag elem can be <0 |
|
114
|
16
|
50
|
66
|
30
|
|
162
|
and all {looks_like_number $_ and $_ >= 0} @row_no_diag; |
|
|
30
|
100
|
|
|
|
203
|
|
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# Check that element count is equal in all rows. |
|
117
|
15
|
|
66
|
|
|
79
|
$matrix_dim //= @row; # first-time init |
|
118
|
15
|
50
|
|
|
|
40
|
confess 'Lines of input file have varying number of elements' |
|
119
|
|
|
|
|
|
|
unless $matrix_dim == @row; |
|
120
|
|
|
|
|
|
|
|
|
121
|
15
|
50
|
|
|
|
40
|
@row = @row[@splice_to_cols] if defined $splice_to_ref; |
|
122
|
15
|
|
|
|
|
29
|
push @rate_matrix, \@row; |
|
123
|
15
|
50
|
|
|
|
142
|
confess 'Input file contains more lines than there are columns' |
|
124
|
|
|
|
|
|
|
if @rate_matrix > $matrix_dim; |
|
125
|
|
|
|
|
|
|
} |
|
126
|
5
|
50
|
33
|
|
|
38
|
confess 'End of file reached before finding all states requested by ', |
|
127
|
|
|
|
|
|
|
'splicing operation' |
|
128
|
|
|
|
|
|
|
if defined $splice_to_ref and @splice_to_rows > 0; |
|
129
|
|
|
|
|
|
|
confess 'Requested splicing of non-contained state' |
|
130
|
5
|
50
|
|
0
|
|
55
|
unless all {$_ < $matrix_dim} @splice_to_cols; |
|
|
0
|
|
|
|
|
0
|
|
|
131
|
5
|
50
|
|
|
|
27
|
confess 'Input file is empty' |
|
132
|
|
|
|
|
|
|
unless @rate_matrix; |
|
133
|
|
|
|
|
|
|
# Adjust dimension if splicing was applied. |
|
134
|
5
|
50
|
|
|
|
27
|
confess 'Input file contains less lines than there are columns' |
|
|
|
50
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
if @rate_matrix < (defined $splice_to_ref ? @splice_to_cols |
|
136
|
|
|
|
|
|
|
: $matrix_dim ); |
|
137
|
|
|
|
|
|
|
|
|
138
|
5
|
|
|
|
|
20
|
return \@rate_matrix; |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub _transpose_matrix { |
|
142
|
4
|
|
|
4
|
|
15
|
my ($class, $matrix_ref) = @_; |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# Determine dimnensions |
|
145
|
4
|
|
|
|
|
12
|
my $max_row = @$matrix_ref - 1; |
|
146
|
4
|
50
|
|
|
|
16
|
return unless $max_row >= 0; |
|
147
|
4
|
|
|
|
|
8
|
my $max_col = @{ $matrix_ref->[0] } - 1; # check elems of first row |
|
|
4
|
|
|
|
|
14
|
|
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# Swap values |
|
150
|
4
|
|
|
|
|
18
|
for my $row (0..$max_row) { |
|
151
|
12
|
|
|
|
|
35
|
for my $col (($row+1)..$max_col) { |
|
152
|
12
|
|
|
|
|
31
|
my $temp = $matrix_ref->[$row][$col]; |
|
153
|
12
|
|
|
|
|
30
|
$matrix_ref->[$row][$col] = $matrix_ref->[$col][$row]; |
|
154
|
12
|
|
|
|
|
36
|
$matrix_ref->[$col][$row] = $temp; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# Class method. Reads a rate matrix in binary format from the passed file |
|
160
|
|
|
|
|
|
|
# handle and constructs a matrix (2-dim array) from it. Returns a |
|
161
|
|
|
|
|
|
|
# reference to the constructed rate matrix. |
|
162
|
|
|
|
|
|
|
sub read_bin_rate_matrix { |
|
163
|
5
|
|
|
5
|
1
|
20
|
my ($class, $input_matrix_fh, $splice_to_ref) = @_; |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# During parsing, splice the selected rows / columns. Make 0-based. |
|
166
|
5
|
|
50
|
|
|
10
|
my @splice_to_cols = @{ $splice_to_ref // [] }; # 1-based, modified |
|
|
5
|
|
|
|
|
36
|
|
|
167
|
5
|
|
|
|
|
18
|
my @splice_to_rows = map {$_ - 1} @splice_to_cols; # 0-based indices |
|
|
0
|
|
|
|
|
0
|
|
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# Set read mode to binary |
|
170
|
5
|
|
|
|
|
27
|
binmode $input_matrix_fh; |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
##### Read out matrix dimension |
|
173
|
11
|
|
|
11
|
|
111
|
my $size_of_int = do {use Config; $Config{intsize}}; |
|
|
11
|
|
|
|
|
36
|
|
|
|
11
|
|
|
|
|
1938
|
|
|
|
5
|
|
|
|
|
4475
|
|
|
|
5
|
|
|
|
|
82
|
|
|
174
|
5
|
|
|
|
|
32
|
my $read_count |
|
175
|
|
|
|
|
|
|
= read($input_matrix_fh, my $raw_matrix_dim, $size_of_int); |
|
176
|
5
|
50
|
|
|
|
4738
|
confess "Could not read dimension from file, ", |
|
177
|
|
|
|
|
|
|
"expected $size_of_int bytes, got $read_count" |
|
178
|
|
|
|
|
|
|
if $read_count != $size_of_int; |
|
179
|
|
|
|
|
|
|
|
|
180
|
5
|
|
|
|
|
43
|
my $matrix_dim = unpack 'i', $raw_matrix_dim; # unpack integer |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
confess 'Requested splicing of non-contained state' |
|
183
|
5
|
50
|
|
0
|
|
53
|
unless all {$_ < $matrix_dim} @splice_to_rows; |
|
|
0
|
|
|
|
|
0
|
|
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
##### Read rate matrix |
|
186
|
5
|
|
|
|
|
22
|
my @rate_matrix; |
|
187
|
11
|
|
|
11
|
|
95
|
my $size_of_double = do {use Config; $Config{doublesize}}; |
|
|
11
|
|
|
|
|
33
|
|
|
|
11
|
|
|
|
|
21308
|
|
|
|
5
|
|
|
|
|
10
|
|
|
|
5
|
|
|
|
|
245
|
|
|
188
|
5
|
|
|
|
|
33
|
my $bytes_per_column = $size_of_double * $matrix_dim; |
|
189
|
5
|
|
|
|
|
28
|
COL: for my $i (1..$matrix_dim) { |
|
190
|
|
|
|
|
|
|
# Each column consists of n=matrix_dim doubles. |
|
191
|
13
|
|
|
|
|
58
|
$read_count |
|
192
|
|
|
|
|
|
|
= read($input_matrix_fh, my $raw_column, $bytes_per_column); |
|
193
|
13
|
100
|
|
|
|
955
|
confess "Could not read column $i of file, ", |
|
194
|
|
|
|
|
|
|
"expected $bytes_per_column bytes, got $read_count" |
|
195
|
|
|
|
|
|
|
if $read_count != $bytes_per_column; |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# Skip column if splicing and column not requested. |
|
198
|
12
|
50
|
|
|
|
36
|
if (defined $splice_to_ref) { |
|
199
|
0
|
0
|
|
|
|
0
|
last unless @splice_to_cols; # we're done! |
|
200
|
0
|
0
|
|
|
|
0
|
next COL if $i != $splice_to_cols[0]; # this col is not kept |
|
201
|
0
|
|
|
|
|
0
|
shift @splice_to_cols; # remove the leading index |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# Decode raw doubles. |
|
205
|
12
|
|
|
|
|
76
|
my @matrix_column = unpack "d$matrix_dim", $raw_column; |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# Splice parsed column if requested. |
|
208
|
12
|
50
|
|
|
|
35
|
@matrix_column = @matrix_column[@splice_to_rows] |
|
209
|
|
|
|
|
|
|
if defined $splice_to_ref; |
|
210
|
|
|
|
|
|
|
|
|
211
|
12
|
|
|
|
|
45
|
push @rate_matrix, \@matrix_column; |
|
212
|
|
|
|
|
|
|
} |
|
213
|
4
|
50
|
33
|
|
|
23
|
confess 'End of file reached before finding all states requested by ', |
|
214
|
|
|
|
|
|
|
'splicing operation' |
|
215
|
|
|
|
|
|
|
if defined $splice_to_ref and @splice_to_cols > 0; |
|
216
|
4
|
50
|
33
|
|
|
66
|
confess 'Read data as suggested by dimension, but end of file ', |
|
217
|
|
|
|
|
|
|
'not reached' |
|
218
|
|
|
|
|
|
|
unless defined $splice_to_ref or eof $input_matrix_fh; |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# For whatever reasons, binary rates are stored column-wise instead of |
|
221
|
|
|
|
|
|
|
# row-wise. Transpose to fix that. |
|
222
|
4
|
|
|
|
|
32
|
__PACKAGE__->_transpose_matrix(\@rate_matrix); |
|
223
|
|
|
|
|
|
|
|
|
224
|
4
|
|
|
|
|
19
|
return \@rate_matrix; |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub _build_file_handle { |
|
228
|
6
|
|
|
6
|
|
18
|
my $self = shift; |
|
229
|
|
|
|
|
|
|
|
|
230
|
6
|
50
|
|
|
|
246
|
confess 'File required if no file handle is passed' |
|
231
|
|
|
|
|
|
|
unless $self->has_file_name; |
|
232
|
|
|
|
|
|
|
|
|
233
|
6
|
|
|
|
|
206
|
open my $handle, '<', $self->file_name; |
|
234
|
6
|
|
|
|
|
9197
|
return $handle; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
# Get the dimension (= number of rows = number of columns) of the matrix. |
|
238
|
|
|
|
|
|
|
sub dim { |
|
239
|
54
|
|
|
54
|
1
|
1313
|
my $self = shift; |
|
240
|
|
|
|
|
|
|
|
|
241
|
54
|
|
|
|
|
80
|
my $dimension = @{ $self->_data }; |
|
|
54
|
|
|
|
|
1865
|
|
|
242
|
52
|
|
|
|
|
190
|
return $dimension; |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# Get the rate from state i to state j. States are 1-based (first state = |
|
246
|
|
|
|
|
|
|
# state 1) just as in the results file. |
|
247
|
|
|
|
|
|
|
sub rate_from_to { |
|
248
|
9
|
|
|
9
|
1
|
4965
|
my ($self, $from_state, $to_state) = @_; |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
# Check states are within bounds |
|
251
|
9
|
50
|
|
|
|
28
|
confess "from_state $from_state is out of bounds" |
|
252
|
|
|
|
|
|
|
unless $self->_state_is_in_bounds($from_state); |
|
253
|
9
|
50
|
|
|
|
25
|
confess "to_state $to_state is out of bounds" |
|
254
|
|
|
|
|
|
|
unless $self->_state_is_in_bounds($to_state); |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# Retrieve rate. |
|
257
|
9
|
|
|
|
|
260
|
my $rate = $self->_data->[$from_state-1][$to_state-1]; |
|
258
|
9
|
|
|
|
|
61
|
return $rate; |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# Check whether given state is contained in the rate matrix. |
|
262
|
|
|
|
|
|
|
sub _state_is_in_bounds { |
|
263
|
26
|
|
|
26
|
|
52
|
my ($self, $state) = @_; |
|
264
|
|
|
|
|
|
|
|
|
265
|
26
|
|
33
|
|
|
77
|
my $is_in_bounds = ($state >= 1 && $state <= $self->dim); |
|
266
|
26
|
|
|
|
|
74
|
return $is_in_bounds; |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# Returns a sorted list of all states connected to the (mfe) state 1. |
|
270
|
|
|
|
|
|
|
# Assumes a symmetric transition matrix (only checks path *from* state 1 |
|
271
|
|
|
|
|
|
|
# *to* the other states). Quadratic runtime. |
|
272
|
|
|
|
|
|
|
sub connected_states { |
|
273
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# Starting at state 1, perform a traversal of the transition graph and |
|
276
|
|
|
|
|
|
|
# remember all nodes seen. |
|
277
|
0
|
|
|
|
|
0
|
my $dim = $self->dim; |
|
278
|
0
|
|
|
|
|
0
|
my @cue = (1); |
|
279
|
0
|
|
|
|
|
0
|
my %connected = (1 => 1); # state 1 is connected |
|
280
|
0
|
|
|
|
|
0
|
while (my $i = shift @cue) { |
|
281
|
0
|
|
|
|
|
0
|
foreach my $j (1..$dim) { |
|
282
|
0
|
0
|
0
|
|
|
0
|
next if $connected{$j} or $self->rate_from_to($i, $j) <= 0; |
|
283
|
0
|
|
|
|
|
0
|
$connected{$j} = 1; # j is connected to 1 via i |
|
284
|
0
|
|
|
|
|
0
|
push @cue, $j; |
|
285
|
|
|
|
|
|
|
} |
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
# Sort in linear time. |
|
289
|
0
|
|
|
|
|
0
|
my @sorted_connected = grep {$connected{$_}} 1..$dim; |
|
|
0
|
|
|
|
|
0
|
|
|
290
|
0
|
|
|
|
|
0
|
return @sorted_connected; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# Only keep the states connected to the mfe (as determined by |
|
294
|
|
|
|
|
|
|
# connected_states()). Returns a list of all connected (and thus preserved) |
|
295
|
|
|
|
|
|
|
# minima. |
|
296
|
|
|
|
|
|
|
sub keep_connected { |
|
297
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
298
|
0
|
|
|
|
|
0
|
my @connected_indices = map {$_ - 1} $self->connected_states; |
|
|
0
|
|
|
|
|
0
|
|
|
299
|
0
|
0
|
|
|
|
0
|
return map {$_ + 1} @connected_indices # none removed. |
|
|
0
|
|
|
|
|
0
|
|
|
300
|
|
|
|
|
|
|
if $self->dim == @connected_indices; |
|
301
|
|
|
|
|
|
|
|
|
302
|
0
|
|
|
|
|
0
|
$self->_splice_indices(\@connected_indices); |
|
303
|
|
|
|
|
|
|
|
|
304
|
0
|
|
|
|
|
0
|
return map {$_ + 1} @connected_indices; # turn into states again |
|
|
0
|
|
|
|
|
0
|
|
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
# Remove all but the passed states from this rate matrix. States are |
|
308
|
|
|
|
|
|
|
# 1-based (first state = state 1) just as in the results file. |
|
309
|
|
|
|
|
|
|
sub keep_states { |
|
310
|
6
|
|
|
6
|
1
|
25
|
my ($self, @states_to_keep) = @_; |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
# We need a sorted, unique list. |
|
313
|
6
|
|
|
|
|
24
|
@states_to_keep = uniqnum sort {$a <=> $b} @states_to_keep; |
|
|
6
|
|
|
|
|
22
|
|
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# Check whether states are within bounds. |
|
316
|
6
|
|
|
|
|
17
|
foreach my $state (@states_to_keep) { |
|
317
|
8
|
50
|
|
|
|
18
|
confess "State $state is out of bounds" |
|
318
|
|
|
|
|
|
|
unless $self->_state_is_in_bounds($state); |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
|
|
321
|
6
|
100
|
|
|
|
19
|
return if @states_to_keep == $self->dim; # keep all == no op |
|
322
|
|
|
|
|
|
|
|
|
323
|
4
|
|
|
|
|
11
|
$_-- foreach @states_to_keep; # states are now 0-based |
|
324
|
4
|
|
|
|
|
15
|
$self->_splice_indices(\@states_to_keep); |
|
325
|
|
|
|
|
|
|
|
|
326
|
4
|
|
|
|
|
9
|
return $self; |
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# Only keep the passed states and reorder them as in the passed list. In |
|
330
|
|
|
|
|
|
|
# particular, the same state can be passed multiple times and will then be |
|
331
|
|
|
|
|
|
|
# deep-copied. |
|
332
|
|
|
|
|
|
|
# Arguments: |
|
333
|
|
|
|
|
|
|
# states: Ordered list of states defining the resulting matrix. May |
|
334
|
|
|
|
|
|
|
# contain duplicates. |
|
335
|
|
|
|
|
|
|
sub splice { |
|
336
|
0
|
|
|
0
|
1
|
0
|
my ($self, @states) = @_; |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# Check whether states are within bounds. |
|
339
|
0
|
|
|
|
|
0
|
foreach my $state (@states) { |
|
340
|
0
|
0
|
|
|
|
0
|
confess "State $state is out of bounds" |
|
341
|
|
|
|
|
|
|
unless $self->_state_is_in_bounds($state); |
|
342
|
|
|
|
|
|
|
} |
|
343
|
|
|
|
|
|
|
|
|
344
|
0
|
|
|
|
|
0
|
$_-- foreach @states; # states are now 0-based |
|
345
|
0
|
|
|
|
|
0
|
$self->_splice_indices(\@states); |
|
346
|
|
|
|
|
|
|
|
|
347
|
0
|
|
|
|
|
0
|
return $self; |
|
348
|
|
|
|
|
|
|
} |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
# Internal version which performs no boundary checks and assumes REFERENCE |
|
351
|
|
|
|
|
|
|
# to state list. |
|
352
|
|
|
|
|
|
|
sub _splice_indices { |
|
353
|
4
|
|
|
4
|
|
9
|
my ($self, $kept_indices_ref) = @_; |
|
354
|
|
|
|
|
|
|
|
|
355
|
4
|
|
|
|
|
116
|
my $matrix_ref = $self->_data; |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# If no entries are kept, make matrix empty. |
|
358
|
4
|
100
|
|
|
|
16
|
if (@$kept_indices_ref == 0) { |
|
359
|
2
|
|
|
|
|
5
|
@$matrix_ref = (); |
|
360
|
2
|
|
|
|
|
5
|
return; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Splice the matrix. |
|
364
|
|
|
|
|
|
|
# WARNING: This makes a shallow copy of the rows if the same index is |
|
365
|
|
|
|
|
|
|
# passed more than once (e.g. from splice()). |
|
366
|
2
|
|
|
|
|
12
|
@$matrix_ref = @{$matrix_ref}[@$kept_indices_ref]; # rows |
|
|
2
|
|
|
|
|
11
|
|
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
# Deep-copy duplicated rows (if any). |
|
369
|
2
|
|
|
|
|
3
|
my %row_seen; |
|
370
|
2
|
|
|
|
|
7
|
foreach my $row (@$matrix_ref) { |
|
371
|
2
|
50
|
|
|
|
7
|
$row = [@$row] if $row_seen{$row}; # deep-copy array |
|
372
|
2
|
|
|
|
|
8
|
$row_seen{$row} = 1; |
|
373
|
|
|
|
|
|
|
} |
|
374
|
2
|
|
|
|
|
8
|
@$_ = @{$_}[@$kept_indices_ref] # columns |
|
375
|
2
|
|
|
|
|
7
|
foreach @$matrix_ref; |
|
376
|
|
|
|
|
|
|
|
|
377
|
2
|
|
|
|
|
6
|
return $self; |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# Remove the passed states from this rate matrix. States are 1-based |
|
381
|
|
|
|
|
|
|
# (first state = state 1) just as in the results file. |
|
382
|
|
|
|
|
|
|
sub remove_states { |
|
383
|
0
|
|
|
0
|
1
|
0
|
my ($self, @states_to_remove) = @_; |
|
384
|
|
|
|
|
|
|
|
|
385
|
0
|
0
|
|
|
|
0
|
return unless @states_to_remove; # removing no states at all |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# Check states are within bounds. |
|
388
|
0
|
|
|
|
|
0
|
foreach my $state (@states_to_remove) { |
|
389
|
0
|
0
|
|
|
|
0
|
confess "State $state is out of bounds" |
|
390
|
|
|
|
|
|
|
unless $self->_state_is_in_bounds($state); |
|
391
|
|
|
|
|
|
|
} |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# Invert state list via look-up hash. |
|
394
|
0
|
|
|
|
|
0
|
my %states_to_remove = map {$_ => 1} @states_to_remove; |
|
|
0
|
|
|
|
|
0
|
|
|
395
|
|
|
|
|
|
|
my @states_to_keep |
|
396
|
0
|
|
|
|
|
0
|
= grep {not $states_to_remove{$_}} 1..$self->dim; |
|
|
0
|
|
|
|
|
0
|
|
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
# Let _keep_indices() do the work. |
|
399
|
0
|
|
|
|
|
0
|
$_-- foreach @states_to_keep; # states are now 0-based |
|
400
|
0
|
|
|
|
|
0
|
$self->_splice_indices(\@states_to_keep); |
|
401
|
|
|
|
|
|
|
|
|
402
|
0
|
|
|
|
|
0
|
return $self; |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# Print this matrix as text, either to the passed handle, or to STDOUT. |
|
406
|
|
|
|
|
|
|
sub print_as_text { |
|
407
|
6
|
|
|
6
|
1
|
16
|
my ($self, $text_matrix_out_fh) = @_; |
|
408
|
6
|
|
50
|
|
|
20
|
$text_matrix_out_fh //= \*STDOUT; # write to STDOUT by default |
|
409
|
|
|
|
|
|
|
|
|
410
|
6
|
|
|
|
|
11
|
my $rate_format = '%10.4g '; # as in Barriers code |
|
411
|
|
|
|
|
|
|
|
|
412
|
6
|
|
|
|
|
12
|
foreach my $row (@{ $self->_data }) { |
|
|
6
|
|
|
|
|
190
|
|
|
413
|
18
|
|
|
|
|
40
|
printf {$text_matrix_out_fh} $rate_format, $_ foreach @$row; |
|
|
54
|
|
|
|
|
306
|
|
|
414
|
18
|
|
|
|
|
29
|
print {$text_matrix_out_fh} "\n"; |
|
|
18
|
|
|
|
|
45
|
|
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
# Print this matrix as binary data, either to the passed handle or to |
|
419
|
|
|
|
|
|
|
# STDOUT. Data format: matrix dimension as integer, then column by column |
|
420
|
|
|
|
|
|
|
# as double. |
|
421
|
|
|
|
|
|
|
sub print_as_bin { |
|
422
|
1
|
|
|
1
|
1
|
4
|
my ($self, $rate_matrix_out_fh ) = @_; |
|
423
|
|
|
|
|
|
|
|
|
424
|
1
|
|
|
|
|
35
|
my $rate_matrix_ref = $self->_data; |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
# Set write mode to binary |
|
427
|
1
|
|
|
|
|
8
|
binmode $rate_matrix_out_fh; |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
##### Print out matrix dimension |
|
430
|
1
|
|
|
|
|
52
|
my $matrix_dim = @$rate_matrix_ref; |
|
431
|
1
|
|
|
|
|
6
|
my $packed_dim = pack 'i', $matrix_dim; # machine representation, int |
|
432
|
1
|
|
|
|
|
3
|
print {$rate_matrix_out_fh} $packed_dim; |
|
|
1
|
|
|
|
|
3
|
|
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
##### Print columns of rate matrix |
|
435
|
|
|
|
|
|
|
# For whatever reasons, binary rates are stored column-wise instead of |
|
436
|
|
|
|
|
|
|
# row-wise (Treekin works with the transposed matrix and this way it's |
|
437
|
|
|
|
|
|
|
# easier to slurp the entire file. Treekin transposes the text rates |
|
438
|
|
|
|
|
|
|
# during reading). |
|
439
|
|
|
|
|
|
|
#_transpose_matrix $rate_matrix_ref; |
|
440
|
1
|
|
|
|
|
6
|
foreach my $col (0..($matrix_dim-1)) { |
|
441
|
3
|
|
|
|
|
9
|
foreach my $row (0..($matrix_dim-1)) { |
|
442
|
|
|
|
|
|
|
# Pack rate as double |
|
443
|
9
|
|
|
|
|
22
|
my $packed_rate = pack 'd', $rate_matrix_ref->[$row][$col]; |
|
444
|
9
|
|
|
|
|
10
|
print {$rate_matrix_out_fh} $packed_rate; |
|
|
9
|
|
|
|
|
21
|
|
|
445
|
|
|
|
|
|
|
} |
|
446
|
|
|
|
|
|
|
# my $column = map {$_->[$i]} @$rate_matrix_ref; |
|
447
|
|
|
|
|
|
|
# my $packed_column = pack "d$matrix_dim", @column; |
|
448
|
|
|
|
|
|
|
} |
|
449
|
|
|
|
|
|
|
} |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# Return string containing binary the representation of the matrix (cf. |
|
452
|
|
|
|
|
|
|
# print_as_bin). |
|
453
|
|
|
|
|
|
|
sub serialize { |
|
454
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
# Use print function and capture matrix in a string. |
|
457
|
1
|
|
|
|
|
3
|
my $matrix_string; |
|
458
|
1
|
|
|
|
|
5
|
open my $matrix_string_fh, '>', \$matrix_string; |
|
459
|
1
|
|
|
|
|
122
|
$self->print_as_bin($matrix_string_fh); |
|
460
|
|
|
|
|
|
|
|
|
461
|
1
|
|
|
|
|
7
|
return $matrix_string; |
|
462
|
|
|
|
|
|
|
} |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
# Returns a string containing the text representation of the matrix. The |
|
465
|
|
|
|
|
|
|
# overloaded double-quote operator calls this method. |
|
466
|
|
|
|
|
|
|
sub stringify { |
|
467
|
6
|
|
|
6
|
1
|
816
|
my $self = shift; |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
# Use print function and capture matrix in a string. Empty matrices |
|
470
|
|
|
|
|
|
|
# give an empty string (not undef). |
|
471
|
6
|
|
|
|
|
12
|
my $matrix_string = q{}; |
|
472
|
6
|
|
|
|
|
24
|
open my $matrix_string_fh, '>', \$matrix_string; |
|
473
|
6
|
|
|
|
|
3452
|
$self->print_as_text($matrix_string_fh); |
|
474
|
|
|
|
|
|
|
|
|
475
|
6
|
|
|
|
|
48
|
return $matrix_string; |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
1; |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
__END__ |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
=pod |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=encoding UTF-8 |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=head1 NAME |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
Bio::RNA::Barriers::RateMatrix - Store and manipulate a I<Barriers> |
|
490
|
|
|
|
|
|
|
transition rate matrix. |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
use Bio::RNA::Barriers; |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
# Functional interface using plain Perl lists to store the matrix. |
|
497
|
|
|
|
|
|
|
my $list_mat |
|
498
|
|
|
|
|
|
|
= Bio::RNA::Barriers::RateMatrix->read_text_rate_matrix($input_handle); |
|
499
|
|
|
|
|
|
|
$list_mat |
|
500
|
|
|
|
|
|
|
= Bio::RNA::Barriers::RateMatrix->read_bin_rate_matrix($input_handle); |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# Read a binary rate matrix directly from file. Binary matrices are more |
|
503
|
|
|
|
|
|
|
# precise and smaller than text matrices. |
|
504
|
|
|
|
|
|
|
my $rate_matrix = Bio::RNA::Barriers::RateMatrix->new( |
|
505
|
|
|
|
|
|
|
file_name => '/path/to/rates.bin', |
|
506
|
|
|
|
|
|
|
file_type => 'BIN', |
|
507
|
|
|
|
|
|
|
); |
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
# Read a text rate matrix from an opened handle. |
|
510
|
|
|
|
|
|
|
open my $rate_matrix_fh_txt, '<', '/path/to/rates.out'; |
|
511
|
|
|
|
|
|
|
my $rate_matrix = Bio::RNA::Barriers::RateMatrix->new( |
|
512
|
|
|
|
|
|
|
file_handle => $rate_matrix_fh_txt, |
|
513
|
|
|
|
|
|
|
file_type => 'TXT', |
|
514
|
|
|
|
|
|
|
); |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
# Print matrix, dimension, and a single rate. |
|
517
|
|
|
|
|
|
|
print "$rate_matrix"; |
|
518
|
|
|
|
|
|
|
print 'Dimension of rate matrix is ', $rate_matrix->dim, "\n"; |
|
519
|
|
|
|
|
|
|
print 'Rate from state 1 to state 3 is ', |
|
520
|
|
|
|
|
|
|
$rate_matrix->rate_from_to(1, 3), |
|
521
|
|
|
|
|
|
|
"\n"; |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# Remove entries for a list of states {1, 3, 5} (1-based as in bar file). |
|
524
|
|
|
|
|
|
|
$rate_matrix->remove_states(1, 5, 5, 3); # de-dupes automatically |
|
525
|
|
|
|
|
|
|
# Note: former state 2 is now state 1 etc. |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
# Keep only states {1, 2, 3}, remove all others. Can also de-dupe. |
|
528
|
|
|
|
|
|
|
$rate_matrix->keep_states(1..3); |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
# Write binary matrix to file. |
|
531
|
|
|
|
|
|
|
open my $out_fh_bin, '>', '/path/to/output/rates.bin'; |
|
532
|
|
|
|
|
|
|
$rate_matrix->print_as_bin($out_fh_bin); |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Parse, modify and print/write rate matrix files written by I<Barriers>, both |
|
537
|
|
|
|
|
|
|
in text and binary format. |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=head1 METHODS |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=head3 Bio::RNA::Barriers::RateMatrix->new(arg_name => $arg, ...) |
|
542
|
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
Constructor. Reads a rate matrix from a file / handle and creates a new rate |
|
544
|
|
|
|
|
|
|
matrix object. |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=over 4 |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=item Arguments: |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=over 4 |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
=item file_name | file_handle |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
Source of the data to read. Pass either or both. |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=item file_type |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
Specifies whether the input data is in binary or text format. Must be either |
|
559
|
|
|
|
|
|
|
C<'TXT'> or C<'BIN'>. |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=item splice_on_parsing (optional) |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
Array ref of integers denoting states for which the transition rates should be |
|
564
|
|
|
|
|
|
|
parsed. All other states are skipped. This dramatically improves the |
|
565
|
|
|
|
|
|
|
performance and memory efficiency for large matrices if only a few states are |
|
566
|
|
|
|
|
|
|
relevant (e.g. only connected states). |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=back |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
=back |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=head3 $mat->file_name() |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
File from which the data was read. May be undef if it was read from a file |
|
575
|
|
|
|
|
|
|
handle. |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=head3 $mat->file_type() |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
Specifies whether the input data is in binary or text format. Must be either |
|
580
|
|
|
|
|
|
|
C<'TXT'> or C<'BIN'>. |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
=head3 Bio::RNA::RateMatrix->read_text_rate_matrix($input_matrix_filehandle) |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
Class method. Reads a rate matrix in text format from the passed file |
|
585
|
|
|
|
|
|
|
handle and constructs a matrix (2-dim array) from it. Returns an array |
|
586
|
|
|
|
|
|
|
reference containing the parsed rate matrix. |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
Use this function if you do not want to use the object-oriented interface. |
|
589
|
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
=head3 Bio::RNA::RateMatrix->read_bin_rate_matrix($input_matrix_filehandle) |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
Class method. Reads a rate matrix in binary format from the passed file |
|
593
|
|
|
|
|
|
|
handle and constructs a matrix (2-dim array) from it. Returns an array |
|
594
|
|
|
|
|
|
|
reference containing the parsed rate matrix. |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
Use this function if you do not want to use the object-oriented interface. |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=head3 $mat->dim() |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
Get the dimension (= number of rows = number of columns) of the matrix. |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=head3 $mat->rate_from_to($i, $j) |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
Get the rate from state i to state j. States are 1-based (first state = state |
|
605
|
|
|
|
|
|
|
1) just as in the results file. |
|
606
|
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
=head3 $mat->remove_states(@indices) |
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
Remove the passed states from this rate matrix. States are 1-based (first |
|
610
|
|
|
|
|
|
|
state = state 1) just as in the results file. |
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=head3 $mat->connected_states() |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
Returns a sorted list of all states connected to the (mfe) state 1. |
|
615
|
|
|
|
|
|
|
Assumes a symmetric transition matrix (only checks path B<from> state 1 |
|
616
|
|
|
|
|
|
|
B<to> the other states). Quadratic runtime. |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=head3 $mat->keep_connected() |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Only keep the states connected to the mfe (as determined by |
|
621
|
|
|
|
|
|
|
C<connected_states()>). |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=head3 $mat->keep_states(@indices) |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
Remove all but the passed states from this rate matrix. States are 1-based |
|
626
|
|
|
|
|
|
|
(first state = state 1) just as in the results file. C<@indices> may be |
|
627
|
|
|
|
|
|
|
unordered and contain duplicates. |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
=head3 $mat->splice(@indices) |
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
Only keep the passed states and reorder them to match the order of |
|
632
|
|
|
|
|
|
|
C<@indices>. In particular, the same state can be passed multiple times and |
|
633
|
|
|
|
|
|
|
will then be deep-copied. C<@indices> may be unordered and contain duplicates. |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=head3 $mat->print_as_text($out_handle) |
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
Print this matrix as text, either to the passed handle, or to STDOUT if |
|
638
|
|
|
|
|
|
|
C<$out_handle> is not provided. |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
=head3 $mat->print_as_bin() |
|
641
|
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
Print this matrix as binary data, either to the passed handle, or to STDOUT if |
|
643
|
|
|
|
|
|
|
C<$out_handle> is not provided. |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
Data format: matrix dimension as integer, then column by column as double. |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
=head3 $mat->serialize() |
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
Return string containing binary representation of the matrix (cf. |
|
650
|
|
|
|
|
|
|
print_as_bin). |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=head3 $mat->stringify() |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
Returns a string containing the text representation of the matrix. The |
|
655
|
|
|
|
|
|
|
overloaded double-quote operator calls this method. |
|
656
|
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=head1 AUTHOR |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Felix Kuehnl, C<< <felix at bioinf.uni-leipzig.de> >> |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=head1 BUGS |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
Please report any bugs or feature requests by raising an issue at |
|
665
|
|
|
|
|
|
|
L<https://github.com/xileF1337/Bio-RNA-Barriers/issues>. |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
You can also do so by mailing to C<bug-bio-rna-barmap at rt.cpan.org>, |
|
668
|
|
|
|
|
|
|
or through the web interface at |
|
669
|
|
|
|
|
|
|
L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-BarMap>. I will be |
|
670
|
|
|
|
|
|
|
notified, and then you'll automatically be notified of progress on your bug as |
|
671
|
|
|
|
|
|
|
I make changes. |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=head1 SUPPORT |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
perldoc Bio::RNA::Barriers |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
You can also look for information at the official Barriers website: |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
L<https://www.tbi.univie.ac.at/RNA/Barriers/> |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=over 4 |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
=item * Github: the official repository |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
L<https://github.com/xileF1337/Bio-RNA-Barriers> |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-Barriers> |
|
695
|
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
L<http://annocpan.org/dist/Bio-RNA-Barriers> |
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
=item * CPAN Ratings |
|
701
|
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
L<https://cpanratings.perl.org/d/Bio-RNA-Barriers> |
|
703
|
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
=item * Search CPAN |
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
L<https://metacpan.org/release/Bio-RNA-Barriers> |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
=back |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
Copyright 2019-2021 Felix Kuehnl. |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify |
|
716
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
|
717
|
|
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or |
|
718
|
|
|
|
|
|
|
(at your option) any later version. |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
|
721
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
722
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
723
|
|
|
|
|
|
|
GNU General Public License for more details. |
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
|
726
|
|
|
|
|
|
|
along with this program. If not, see L<http://www.gnu.org/licenses/>. |
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
=cut |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
# End of Bio/RNA/Barriers/RateMatrix.pm |