File Coverage

blib/lib/Bio/RNA/RNAaliSplit.pm
Criterion Covered Total %
statement 42 107 39.2
branch 0 22 0.0
condition n/a
subroutine 14 17 82.3
pod 0 2 0.0
total 56 148 37.8


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2017-03-09 20:50:15 michl>
3              
4             # Bio::RNA::RNAaliSplit.pm: Handler for horizontally splitting alignments
5              
6             package Bio::RNA::RNAaliSplit;
7              
8 1     1   101496 use version; our $VERSION = qv('0.04');
  1         1  
  1         7  
9 1     1   72 use Carp;
  1         1  
  1         47  
10 1     1   487 use Data::Dumper;
  1         6671  
  1         56  
11 1     1   473 use Moose;
  1         324777  
  1         6  
12 1     1   4675 use Moose::Util::TypeConstraints;
  1         1  
  1         8  
13 1     1   1708 use Path::Class::File;
  1         19445  
  1         24  
14 1     1   7 use Path::Class::Dir;
  1         1  
  1         14  
15 1     1   375 use Path::Class;
  1         257  
  1         40  
16 1     1   6 use File::Basename;
  1         1  
  1         102  
17 1     1   4 use IPC::Cmd qw(can_run run);
  1         1  
  1         40  
18 1     1   453 use Bio::AlignIO;
  1         90748  
  1         27  
19 1     1   572 use Storable 'dclone';
  1         2443  
  1         60  
20 1     1   7 use File::Path qw(make_path);
  1         1  
  1         833  
21              
22             subtype 'MyAln' => as class_type('Bio::AlignIO');
23              
24             coerce 'MyAln'
25             => from 'HashRef'
26             => via { Bio::AlignIO->new( %{ $_ } ) };
27              
28             has 'format' => (
29             is => 'ro',
30             isa => 'Str',
31             predicate => 'has_format',
32             default => 'ClustalW',
33             required => 1,
34             );
35              
36             has 'infilebasename' => (
37             is => 'rw',
38             isa => 'Str',
39             predicate => 'has_basename',
40             init_arg => undef,
41             );
42              
43             has 'alignment' => (
44             is => 'rw',
45             isa => 'MyAln',
46             predicate => 'has_aln',
47             coerce => 1,
48             );
49              
50             has 'next_aln' => (
51             is => 'rw',
52             isa => 'Bio::SimpleAlign',
53             predicate => 'has_next_aln',
54             init_arg => undef,
55             );
56              
57             has 'dump' => (
58             is => 'rw',
59             isa => 'Num',
60             predicate => 'has_dump_flag',
61             );
62              
63             has 'hammingdistN' => (
64             is => 'rw',
65             isa => 'Num',
66             default => '-1',
67             predicate => 'has_hammingN',
68             init_arg => undef,
69             );
70              
71             has 'hammingdistX' => (
72             is => 'rw',
73             isa => 'Num',
74             default => '-1',
75             predicate => 'has_hammingX',
76             init_arg => undef,
77             );
78              
79             with 'Bio::RNA::RNAaliSplit::FileDir';
80              
81             sub BUILD {
82 0     0 0   my $self = shift;
83 0           my $this_function = (caller(0))[3];
84 0 0         confess "ERROR [$this_function] \$self->ifile not available"
85             unless ($self->has_ifile);
86 0           $self->alignment({-file => $self->ifile,
87             -format => $self->format,
88             -displayname_flat => 1} ); # discard position in sequence IDs
89 0           $self->next_aln($self->alignment->next_aln);
90 0           $self->odir( [$self->ifile->dir,$self->odirn] );
91 0           my @created = make_path($self->odir, {error => \my $err});
92 0 0         if (@$err) {
93 0           confess "ERROR [$this_function] could not create output directory $self->odir";
94             }
95 0           $self->infilebasename(fileparse($self->ifile->basename, qr/\.[^.]*/));
96              
97 0 0         if ($self->has_dump_flag){
98             # dump ifile as aln in ClustalW format to odir/input
99 0           my $iodir = $self->odir->subdir('input');
100 0           mkdir($iodir);
101 0           my $ialnfile = file($iodir,$self->infilebasename.".aln");
102 0           my $alnio = Bio::AlignIO->new(-file => ">$ialnfile",
103             -format => "ClustalW",
104             -flush => 0,
105             -displayname_flat => 1 );
106 0           my $aln2 = $self->next_aln->select_noncont((1..$self->next_aln->num_sequences));
107 0           $alnio->write_aln($aln2);
108             # end dump input aln file
109             }
110              
111 0 0         if ($self->next_aln->num_sequences == 2){ $self->_hamming() }
  0            
112             }
113              
114             sub dump_subalignment {
115 0     0 0   my ($self,$alipathsegment,$token,$what) = @_;
116 0           my $this_function = (caller(0))[3];
117 0           my ($aln,$aln2,$name);
118              
119 0 0         croak "ERROR [$this_function] argument 'token' not provided"
120             unless (defined($token));
121              
122             # create output path
123 0           my $ids = join "_", @$what;
124 0 0         unless (defined($alipathsegment)){$alipathsegment = "tmp"}
  0            
125 0           my $oodir = $self->odir->subdir($alipathsegment);
126 0           mkdir($oodir);
127              
128             # create info file
129 0           my $oinfofile = file($oodir,$token.".info");
130 0 0         open my $oinfo, ">", $oinfofile or die $!;
131 0           foreach my $entry (@$what){
132 0           my $key = $entry-1;
133 0           my $val = ${$self->next_aln}{_order}->{$key};
  0            
134 0           print $oinfo join "\t", ($entry, $val, "\n");
135             }
136 0           close($oinfo);
137              
138             # create subalignment in Clustal and Stockholm format
139 0           my $oalifile_clustal = file($oodir,$token.".aln");
140 0           my $oalifile_stockholm = file($oodir,$token.".stk");
141 0           my $oali_clustal = Bio::AlignIO->new(-file => ">$oalifile_clustal",
142             -format => "ClustalW",
143             -flush => 0,
144             -displayname_flat => 1 );
145 0           my $oali_stockholm = Bio::AlignIO->new(-file => ">$oalifile_stockholm",
146             -format => "Stockholm",
147             -flush => 0,
148             -displayname_flat => 1 );
149 0           $aln = $self->next_aln->select_noncont(@$what);
150 0           $oali_clustal->write_aln( $aln );
151 0           $oali_stockholm->write_aln( $aln );
152              
153             # create subalignment fasta file
154 0           my $ofafile = file($oodir,$token.".fa");
155 0           my $ofa = Bio::AlignIO->new(-file => ">$ofafile",
156             -format => "fasta",
157             -flush => 0,
158             -displayname_flat => 1 );
159 0           $aln2 = $aln->remove_gaps;
160 0           $ofa->write_aln( $aln2 );
161              
162             # extract sequences from alignment and dump to .seq file
163             # NOTE that these sequences do contain gap symbols, intentionally (!)
164             # these can then be replaced by Ns to compute eg hamming distance
165 0           my $oseqfile = file($oodir,$token.".seq");
166 0 0         open my $seqfile, ">", $oseqfile or die $!;
167 0           foreach my $seq ($aln->each_seq) {
168 0           print $seqfile $seq->seq,"\n";
169             }
170 0           close($seqfile);
171              
172 0           return ( $oalifile_clustal,$oalifile_stockholm );
173             }
174              
175             sub _hamming {
176 0     0     my $self = shift;
177 0           my $this_function = (caller(0))[3];
178 0           my $hamming = -1;
179 0 0         croak "ERROR [$this_function] cannot compute Hamming distance for $self->next_aln->num_sequences sequences"
180             if ($self->next_aln->num_sequences != 2);
181              
182 0           my $aln = $self->next_aln->select_noncont((1,2));
183              
184             # compute Hamming distance of the aligned sequences, replacing gaps with Ns
185 0           my $alnN = dclone($aln);
186 0 0         croak("ERROR [$this_function] cannot replace gaps with Ns")
187             unless ($alnN->map_chars('-','N') == 1);
188 0           my $seq1 = $alnN->get_seq_by_pos(1)->seq;
189 0           my $seq2 = $alnN->get_seq_by_pos(2)->seq;
190 0 0         croak "ERROR [$this_function] sequence length differs"
191             unless(length($seq1)==length($seq2));
192 0           my $hammingN = ($seq1 ^ $seq2) =~ tr/\001-\255//;
193 0           $self->hammingdistN($hammingN);
194              
195             # print $self->infilebasename,":\n";
196             # print ">>s1: $seq1\n";
197             # print ">>s2: $seq2\n";
198             # print "** dhN = ".$self->hammingdistN."\n";
199             # print "+++\n";
200             }
201              
202 1     1   13 no Moose;
  1         1  
  1         9  
203              
204              
205              
206             =head1 NAME
207              
208             Bio::RNA::RNAaliSplit - Split and deconvolute structural RNA multiple
209             sequence alignments
210              
211             =head1 VERSION
212              
213             Version 0.04
214              
215             =cut
216              
217             =head1 SYNOPSIS
218              
219             This module is a L<Moose> handler for horizontal splitting of
220             structural RNA multiple sequence alignments (MSA). It employs third
221             party tools (RNAalifold, RNAz, R-scape) for classification of
222             subalignments, each folding into a common consensus structure.
223              
224             =head1 AUTHOR
225              
226             Michael T. Wolfinger, C<< <michael at wolfinger.eu> >>
227              
228             =head1 BUGS
229              
230             Please report any bugs or feature requests to
231             C<bug-bio-rna-rnaalisplit at rt.cpan.org>, or through the web
232             interface at
233             L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-RNAaliSplit>.
234             I will be notified, and then you'll automatically be notified of
235             progress on your bug as I make changes.
236              
237              
238             =head1 SUPPORT
239              
240             You can find documentation for this module with the perldoc command.
241              
242             perldoc Bio::RNA::RNAaliSplit
243              
244              
245             You can also look for information at:
246              
247             =over 4
248              
249             =item * RT: CPAN's request tracker (report bugs here)
250              
251             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-RNAaliSplit>
252              
253             =item * AnnoCPAN: Annotated CPAN documentation
254              
255             L<http://annocpan.org/dist/Bio-RNA-RNAaliSplit>
256              
257             =item * CPAN Ratings
258              
259             L<http://cpanratings.perl.org/d/Bio-RNA-RNAaliSplit>
260              
261             =item * Search CPAN
262              
263             L<http://search.cpan.org/dist/Bio-RNA-RNAaliSplit/>
264              
265             =back
266              
267              
268             =head1 LICENSE AND COPYRIGHT
269              
270             Copyright 2017 Michael T. Wolfinger <michael@wolfinger.eu> and <michael.wolfinger@univie.ac.at>
271              
272             This program is free software; you can redistribute it and/or
273             modify it under the terms of the GNU Affero General Public
274             License as published by the Free Software Foundation; either
275             version 3 of the License, or (at your option) any later version.
276              
277             This program is distributed in the hope that it will be useful,
278             but WITHOUT ANY WARRANTY; without even the implied warranty of
279             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
280             Affero General Public License for more details.
281              
282             You should have received a copy of the GNU Affero General Public
283             License along with this program. If not, see
284             L<http://www.gnu.org/licenses/>.
285              
286             =cut
287              
288             1; # End of Bio::RNA::RNAaliSplit
289              
290