line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*-CPerl-*- |
2
|
|
|
|
|
|
|
# Last changed Time-stamp: <2017-03-09 20:50:15 michl> |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# Bio::RNA::RNAaliSplit.pm: Handler for horizontally splitting alignments |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package Bio::RNA::RNAaliSplit; |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
101496
|
use version; our $VERSION = qv('0.04'); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
7
|
|
9
|
1
|
|
|
1
|
|
72
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
47
|
|
10
|
1
|
|
|
1
|
|
487
|
use Data::Dumper; |
|
1
|
|
|
|
|
6671
|
|
|
1
|
|
|
|
|
56
|
|
11
|
1
|
|
|
1
|
|
473
|
use Moose; |
|
1
|
|
|
|
|
324777
|
|
|
1
|
|
|
|
|
6
|
|
12
|
1
|
|
|
1
|
|
4675
|
use Moose::Util::TypeConstraints; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
8
|
|
13
|
1
|
|
|
1
|
|
1708
|
use Path::Class::File; |
|
1
|
|
|
|
|
19445
|
|
|
1
|
|
|
|
|
24
|
|
14
|
1
|
|
|
1
|
|
7
|
use Path::Class::Dir; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
14
|
|
15
|
1
|
|
|
1
|
|
375
|
use Path::Class; |
|
1
|
|
|
|
|
257
|
|
|
1
|
|
|
|
|
40
|
|
16
|
1
|
|
|
1
|
|
6
|
use File::Basename; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
102
|
|
17
|
1
|
|
|
1
|
|
4
|
use IPC::Cmd qw(can_run run); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
40
|
|
18
|
1
|
|
|
1
|
|
453
|
use Bio::AlignIO; |
|
1
|
|
|
|
|
90748
|
|
|
1
|
|
|
|
|
27
|
|
19
|
1
|
|
|
1
|
|
572
|
use Storable 'dclone'; |
|
1
|
|
|
|
|
2443
|
|
|
1
|
|
|
|
|
60
|
|
20
|
1
|
|
|
1
|
|
7
|
use File::Path qw(make_path); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
833
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
subtype 'MyAln' => as class_type('Bio::AlignIO'); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
coerce 'MyAln' |
25
|
|
|
|
|
|
|
=> from 'HashRef' |
26
|
|
|
|
|
|
|
=> via { Bio::AlignIO->new( %{ $_ } ) }; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
has 'format' => ( |
29
|
|
|
|
|
|
|
is => 'ro', |
30
|
|
|
|
|
|
|
isa => 'Str', |
31
|
|
|
|
|
|
|
predicate => 'has_format', |
32
|
|
|
|
|
|
|
default => 'ClustalW', |
33
|
|
|
|
|
|
|
required => 1, |
34
|
|
|
|
|
|
|
); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
has 'infilebasename' => ( |
37
|
|
|
|
|
|
|
is => 'rw', |
38
|
|
|
|
|
|
|
isa => 'Str', |
39
|
|
|
|
|
|
|
predicate => 'has_basename', |
40
|
|
|
|
|
|
|
init_arg => undef, |
41
|
|
|
|
|
|
|
); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
has 'alignment' => ( |
44
|
|
|
|
|
|
|
is => 'rw', |
45
|
|
|
|
|
|
|
isa => 'MyAln', |
46
|
|
|
|
|
|
|
predicate => 'has_aln', |
47
|
|
|
|
|
|
|
coerce => 1, |
48
|
|
|
|
|
|
|
); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
has 'next_aln' => ( |
51
|
|
|
|
|
|
|
is => 'rw', |
52
|
|
|
|
|
|
|
isa => 'Bio::SimpleAlign', |
53
|
|
|
|
|
|
|
predicate => 'has_next_aln', |
54
|
|
|
|
|
|
|
init_arg => undef, |
55
|
|
|
|
|
|
|
); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
has 'dump' => ( |
58
|
|
|
|
|
|
|
is => 'rw', |
59
|
|
|
|
|
|
|
isa => 'Num', |
60
|
|
|
|
|
|
|
predicate => 'has_dump_flag', |
61
|
|
|
|
|
|
|
); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
has 'hammingdistN' => ( |
64
|
|
|
|
|
|
|
is => 'rw', |
65
|
|
|
|
|
|
|
isa => 'Num', |
66
|
|
|
|
|
|
|
default => '-1', |
67
|
|
|
|
|
|
|
predicate => 'has_hammingN', |
68
|
|
|
|
|
|
|
init_arg => undef, |
69
|
|
|
|
|
|
|
); |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
has 'hammingdistX' => ( |
72
|
|
|
|
|
|
|
is => 'rw', |
73
|
|
|
|
|
|
|
isa => 'Num', |
74
|
|
|
|
|
|
|
default => '-1', |
75
|
|
|
|
|
|
|
predicate => 'has_hammingX', |
76
|
|
|
|
|
|
|
init_arg => undef, |
77
|
|
|
|
|
|
|
); |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
with 'Bio::RNA::RNAaliSplit::FileDir'; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub BUILD { |
82
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
83
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
84
|
0
|
0
|
|
|
|
|
confess "ERROR [$this_function] \$self->ifile not available" |
85
|
|
|
|
|
|
|
unless ($self->has_ifile); |
86
|
0
|
|
|
|
|
|
$self->alignment({-file => $self->ifile, |
87
|
|
|
|
|
|
|
-format => $self->format, |
88
|
|
|
|
|
|
|
-displayname_flat => 1} ); # discard position in sequence IDs |
89
|
0
|
|
|
|
|
|
$self->next_aln($self->alignment->next_aln); |
90
|
0
|
|
|
|
|
|
$self->odir( [$self->ifile->dir,$self->odirn] ); |
91
|
0
|
|
|
|
|
|
my @created = make_path($self->odir, {error => \my $err}); |
92
|
0
|
0
|
|
|
|
|
if (@$err) { |
93
|
0
|
|
|
|
|
|
confess "ERROR [$this_function] could not create output directory $self->odir"; |
94
|
|
|
|
|
|
|
} |
95
|
0
|
|
|
|
|
|
$self->infilebasename(fileparse($self->ifile->basename, qr/\.[^.]*/)); |
96
|
|
|
|
|
|
|
|
97
|
0
|
0
|
|
|
|
|
if ($self->has_dump_flag){ |
98
|
|
|
|
|
|
|
# dump ifile as aln in ClustalW format to odir/input |
99
|
0
|
|
|
|
|
|
my $iodir = $self->odir->subdir('input'); |
100
|
0
|
|
|
|
|
|
mkdir($iodir); |
101
|
0
|
|
|
|
|
|
my $ialnfile = file($iodir,$self->infilebasename.".aln"); |
102
|
0
|
|
|
|
|
|
my $alnio = Bio::AlignIO->new(-file => ">$ialnfile", |
103
|
|
|
|
|
|
|
-format => "ClustalW", |
104
|
|
|
|
|
|
|
-flush => 0, |
105
|
|
|
|
|
|
|
-displayname_flat => 1 ); |
106
|
0
|
|
|
|
|
|
my $aln2 = $self->next_aln->select_noncont((1..$self->next_aln->num_sequences)); |
107
|
0
|
|
|
|
|
|
$alnio->write_aln($aln2); |
108
|
|
|
|
|
|
|
# end dump input aln file |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
0
|
0
|
|
|
|
|
if ($self->next_aln->num_sequences == 2){ $self->_hamming() } |
|
0
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub dump_subalignment { |
115
|
0
|
|
|
0
|
0
|
|
my ($self,$alipathsegment,$token,$what) = @_; |
116
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
117
|
0
|
|
|
|
|
|
my ($aln,$aln2,$name); |
118
|
|
|
|
|
|
|
|
119
|
0
|
0
|
|
|
|
|
croak "ERROR [$this_function] argument 'token' not provided" |
120
|
|
|
|
|
|
|
unless (defined($token)); |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# create output path |
123
|
0
|
|
|
|
|
|
my $ids = join "_", @$what; |
124
|
0
|
0
|
|
|
|
|
unless (defined($alipathsegment)){$alipathsegment = "tmp"} |
|
0
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my $oodir = $self->odir->subdir($alipathsegment); |
126
|
0
|
|
|
|
|
|
mkdir($oodir); |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# create info file |
129
|
0
|
|
|
|
|
|
my $oinfofile = file($oodir,$token.".info"); |
130
|
0
|
0
|
|
|
|
|
open my $oinfo, ">", $oinfofile or die $!; |
131
|
0
|
|
|
|
|
|
foreach my $entry (@$what){ |
132
|
0
|
|
|
|
|
|
my $key = $entry-1; |
133
|
0
|
|
|
|
|
|
my $val = ${$self->next_aln}{_order}->{$key}; |
|
0
|
|
|
|
|
|
|
134
|
0
|
|
|
|
|
|
print $oinfo join "\t", ($entry, $val, "\n"); |
135
|
|
|
|
|
|
|
} |
136
|
0
|
|
|
|
|
|
close($oinfo); |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# create subalignment in Clustal and Stockholm format |
139
|
0
|
|
|
|
|
|
my $oalifile_clustal = file($oodir,$token.".aln"); |
140
|
0
|
|
|
|
|
|
my $oalifile_stockholm = file($oodir,$token.".stk"); |
141
|
0
|
|
|
|
|
|
my $oali_clustal = Bio::AlignIO->new(-file => ">$oalifile_clustal", |
142
|
|
|
|
|
|
|
-format => "ClustalW", |
143
|
|
|
|
|
|
|
-flush => 0, |
144
|
|
|
|
|
|
|
-displayname_flat => 1 ); |
145
|
0
|
|
|
|
|
|
my $oali_stockholm = Bio::AlignIO->new(-file => ">$oalifile_stockholm", |
146
|
|
|
|
|
|
|
-format => "Stockholm", |
147
|
|
|
|
|
|
|
-flush => 0, |
148
|
|
|
|
|
|
|
-displayname_flat => 1 ); |
149
|
0
|
|
|
|
|
|
$aln = $self->next_aln->select_noncont(@$what); |
150
|
0
|
|
|
|
|
|
$oali_clustal->write_aln( $aln ); |
151
|
0
|
|
|
|
|
|
$oali_stockholm->write_aln( $aln ); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# create subalignment fasta file |
154
|
0
|
|
|
|
|
|
my $ofafile = file($oodir,$token.".fa"); |
155
|
0
|
|
|
|
|
|
my $ofa = Bio::AlignIO->new(-file => ">$ofafile", |
156
|
|
|
|
|
|
|
-format => "fasta", |
157
|
|
|
|
|
|
|
-flush => 0, |
158
|
|
|
|
|
|
|
-displayname_flat => 1 ); |
159
|
0
|
|
|
|
|
|
$aln2 = $aln->remove_gaps; |
160
|
0
|
|
|
|
|
|
$ofa->write_aln( $aln2 ); |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# extract sequences from alignment and dump to .seq file |
163
|
|
|
|
|
|
|
# NOTE that these sequences do contain gap symbols, intentionally (!) |
164
|
|
|
|
|
|
|
# these can then be replaced by Ns to compute eg hamming distance |
165
|
0
|
|
|
|
|
|
my $oseqfile = file($oodir,$token.".seq"); |
166
|
0
|
0
|
|
|
|
|
open my $seqfile, ">", $oseqfile or die $!; |
167
|
0
|
|
|
|
|
|
foreach my $seq ($aln->each_seq) { |
168
|
0
|
|
|
|
|
|
print $seqfile $seq->seq,"\n"; |
169
|
|
|
|
|
|
|
} |
170
|
0
|
|
|
|
|
|
close($seqfile); |
171
|
|
|
|
|
|
|
|
172
|
0
|
|
|
|
|
|
return ( $oalifile_clustal,$oalifile_stockholm ); |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub _hamming { |
176
|
0
|
|
|
0
|
|
|
my $self = shift; |
177
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
178
|
0
|
|
|
|
|
|
my $hamming = -1; |
179
|
0
|
0
|
|
|
|
|
croak "ERROR [$this_function] cannot compute Hamming distance for $self->next_aln->num_sequences sequences" |
180
|
|
|
|
|
|
|
if ($self->next_aln->num_sequences != 2); |
181
|
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
|
my $aln = $self->next_aln->select_noncont((1,2)); |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# compute Hamming distance of the aligned sequences, replacing gaps with Ns |
185
|
0
|
|
|
|
|
|
my $alnN = dclone($aln); |
186
|
0
|
0
|
|
|
|
|
croak("ERROR [$this_function] cannot replace gaps with Ns") |
187
|
|
|
|
|
|
|
unless ($alnN->map_chars('-','N') == 1); |
188
|
0
|
|
|
|
|
|
my $seq1 = $alnN->get_seq_by_pos(1)->seq; |
189
|
0
|
|
|
|
|
|
my $seq2 = $alnN->get_seq_by_pos(2)->seq; |
190
|
0
|
0
|
|
|
|
|
croak "ERROR [$this_function] sequence length differs" |
191
|
|
|
|
|
|
|
unless(length($seq1)==length($seq2)); |
192
|
0
|
|
|
|
|
|
my $hammingN = ($seq1 ^ $seq2) =~ tr/\001-\255//; |
193
|
0
|
|
|
|
|
|
$self->hammingdistN($hammingN); |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# print $self->infilebasename,":\n"; |
196
|
|
|
|
|
|
|
# print ">>s1: $seq1\n"; |
197
|
|
|
|
|
|
|
# print ">>s2: $seq2\n"; |
198
|
|
|
|
|
|
|
# print "** dhN = ".$self->hammingdistN."\n"; |
199
|
|
|
|
|
|
|
# print "+++\n"; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
1
|
|
|
1
|
|
13
|
no Moose; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
9
|
|
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head1 NAME |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Bio::RNA::RNAaliSplit - Split and deconvolute structural RNA multiple |
209
|
|
|
|
|
|
|
sequence alignments |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head1 VERSION |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
Version 0.04 |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=cut |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head1 SYNOPSIS |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
This module is a L<Moose> handler for horizontal splitting of |
220
|
|
|
|
|
|
|
structural RNA multiple sequence alignments (MSA). It employs third |
221
|
|
|
|
|
|
|
party tools (RNAalifold, RNAz, R-scape) for classification of |
222
|
|
|
|
|
|
|
subalignments, each folding into a common consensus structure. |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head1 AUTHOR |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Michael T. Wolfinger, C<< <michael at wolfinger.eu> >> |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=head1 BUGS |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Please report any bugs or feature requests to |
231
|
|
|
|
|
|
|
C<bug-bio-rna-rnaalisplit at rt.cpan.org>, or through the web |
232
|
|
|
|
|
|
|
interface at |
233
|
|
|
|
|
|
|
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-RNAaliSplit>. |
234
|
|
|
|
|
|
|
I will be notified, and then you'll automatically be notified of |
235
|
|
|
|
|
|
|
progress on your bug as I make changes. |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=head1 SUPPORT |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
perldoc Bio::RNA::RNAaliSplit |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
You can also look for information at: |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=over 4 |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-RNAaliSplit> |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
L<http://annocpan.org/dist/Bio-RNA-RNAaliSplit> |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=item * CPAN Ratings |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
L<http://cpanratings.perl.org/d/Bio-RNA-RNAaliSplit> |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=item * Search CPAN |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
L<http://search.cpan.org/dist/Bio-RNA-RNAaliSplit/> |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=back |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
Copyright 2017 Michael T. Wolfinger <michael@wolfinger.eu> and <michael.wolfinger@univie.ac.at> |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or |
273
|
|
|
|
|
|
|
modify it under the terms of the GNU Affero General Public |
274
|
|
|
|
|
|
|
License as published by the Free Software Foundation; either |
275
|
|
|
|
|
|
|
version 3 of the License, or (at your option) any later version. |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
278
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
279
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
280
|
|
|
|
|
|
|
Affero General Public License for more details. |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
You should have received a copy of the GNU Affero General Public |
283
|
|
|
|
|
|
|
License along with this program. If not, see |
284
|
|
|
|
|
|
|
L<http://www.gnu.org/licenses/>. |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=cut |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
1; # End of Bio::RNA::RNAaliSplit |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
|