File Coverage

blib/lib/Bio/RNA/Barriers/Minimum.pm
Criterion Covered Total %
statement 62 83 74.7
branch 11 18 61.1
condition 3 6 50.0
subroutine 18 22 81.8
pod 7 8 87.5
total 101 137 73.7


line stmt bran cond sub pod time code
1             package Bio::RNA::Barriers::Minimum;
2             our $VERSION = '0.03';
3              
4 12     12   196 use 5.012;
  12         43  
5 12     12   101 use strict;
  12         43  
  12         313  
6 12     12   73 use warnings;
  12         26  
  12         392  
7              
8 12     12   6853 use Moose;
  12         5763197  
  12         74  
9 12     12   101216 use MooseX::StrictConstructor;
  12         386396  
  12         52  
10 12     12   120855 use Moose::Util::TypeConstraints;
  12         33  
  12         105  
11 12     12   28693 use namespace::autoclean;
  12         31  
  12         63  
12              
13 12     12   7570 use autodie qw(:all);
  12         175982  
  12         57  
14 12     12   239122 use overload q{""} => 'stringify';
  12         31  
  12         51  
15              
16 12     12   857 use Scalar::Util qw(blessed);
  12         28  
  12         610  
17 12     12   89 use List::Util qw(max);
  12         24  
  12         592  
18 12     12   7340 use List::MoreUtils qw(zip);
  12         164699  
  12         84  
19              
20             #### Special types for attribute checking.
21             subtype 'RNAStruct' => (
22             as 'Str',
23             where { m{ ^ [(.)]+ $ }x },
24             message {
25             "Only '(', ')', and '.' allowed in structure string, found '$_'"
26             },
27             );
28              
29             subtype 'DisconSaddle' => (
30             as 'Str',
31             where { m{ ^ ~+ $ }x },
32             message {
33             "Only '~' allowed in disconnected saddle string, found '$_'"
34             },
35             );
36              
37             # index - index of basins ordered by energy; 1 is lowest
38             # struct - struct of lowest energy in minimums
39             # mfe - free energy of the basin's local minimum
40             # father_index - index of father basin (the basin this one is merged to)
41             # barrier_height - height of energy barrier (in kcal/mol) to minimum this
42             # one is merged to (relative to this minimum)
43             my @default_attribs = qw( index struct mfe father_index barrier_height);
44             my @default_attrib_args = (is => 'rw', required => 1);
45             my %default_attrib_isa
46             = &zip(\@default_attribs, [qw(Int RNAStruct Num Int Num)]);
47             has $_ => (@default_attrib_args, isa => $default_attrib_isa{$_})
48             foreach @default_attribs;
49              
50             # Return true iff this is the mfe basin 1.
51             sub is_global_min {
52 0     0 1 0 my $self = shift;
53 0         0 my $is_global_min = $self->index == 1;
54 0         0 return $is_global_min;
55             }
56              
57             # Optional attributes generated by Barriers options --bsize and --saddle.
58             # Descriptions in quotes are from Barriers tutorial at
59             # https://www.tbi.univie.ac.at/RNA/tutorial/#sec4_2
60             # merged_struct_count - 'numbers of structures in the basin we merge with'
61             # Given is the number of structures in the *current* basin
62             # (including merged ones) *at the time of merging*. For minimum 1,
63             # this is close to the total number of input structures (except for
64             # disconnected structures and other missing ones (???).
65             # father_struct_count - 'number of basin which we merge to'
66             # Actually, it's the number of *structures* in the basin that we
67             # merge to (father basin) *at the time of merging*.
68             # merged_basin_energy - 'free energy of the basin'
69             # This seems to be the free energy of (the partition function of)
70             # the basin---including all merged basins---at the time this basin
71             # is merged. For minimum 1, this corresponds to the ensemble free
72             # energy as far as it was enumerated by RNAsubopt (excluding
73             # disconnected structures).
74             # grad_struct_count - 'number of structures in this basin using gradient walk'
75             # This seems to be the actual number of structures only in this
76             # basin, excluding merged basins. What about --minh merging? Why
77             # doesn't this column sum up to exactly to the total number of
78             # structs if --max==Inf (some are missing)? Issues due to degenerate
79             # energies?
80             # grad_basin_energy - 'gradient basin (consisting of all structures where
81             # gradientwalk ends in the minimum)'
82             # This seems to be free energy of the basin without any merged
83             # basins. Summing up the partition functions corresponding to these
84             # energies, one obtains a free energy almost equal to the ensemble
85             # energy (up to rounding errors due to 6 digit precision).
86             my @bsize_attributes = qw(
87             merged_struct_count father_struct_count merged_basin_energy
88             grad_struct_count grad_basin_energy
89             );
90             my @opt_attributes = (@bsize_attributes, qw(saddle_struct));
91             # Define a bsize predicate only for the first bsize attribute. Ensure in
92             # BUILD that either all or none of the attributes are set.
93             my @common_attrib_args = (
94             is => 'ro',
95             lazy => 1,
96             default => sub { confess 'attribute undefined, did you use --bsize/--saddle?' },
97             );
98             # has $bsize_attributes[0] => (is => 'ro', predicate => 'has_bsize' );
99             # has $_ => (is => 'ro') foreach @bsize_attributes[1..$#bsize_attributes];
100             # has 'saddle_struct' => (is => 'ro', predicate => 'has_saddle_struct');
101             has $bsize_attributes[0] => (
102             @common_attrib_args,
103             isa => 'Num',
104             predicate => 'has_bsize',
105             writer => "_$bsize_attributes[0]", # private writer
106             );
107             has $_ => (@common_attrib_args, writer => "_$_") # private writer
108             foreach @bsize_attributes[1..$#bsize_attributes];
109             has 'saddle_struct' => (
110             @common_attrib_args,
111             isa => 'RNAStruct | DisconSaddle',
112             predicate => 'has_saddle_struct',
113             );
114              
115             # Optional reference to the father minimum.
116             has 'father' => (
117             is => 'rw',
118             trigger => \&_check_father,
119             # Use name 'has_father_REF' because has_father==false reads as if the
120             # min does not have a father at all.
121             # We don't need this, we always have it if we have a father.
122             # predicate => 'has_father_ref',
123             );
124              
125             sub _check_father {
126 526     526   961 my ($self, $father) = @_;
127 526 50 33     3023 confess 'Need a reference to another minimum to set father attribute'
128             unless blessed $father and $father->isa( __PACKAGE__ );
129 526 50       16171 confess "Father's index does not match the index used during construction"
130             unless $self->father_index == $father->index;
131             }
132              
133             # Returns true iff the minimum has a father minimum it has been merged to.
134             sub has_father {
135 566     566 1 938 my $self = shift;
136 566         16749 my $has_father = $self->father_index > 0;
137 566         1981 return $has_father;
138             }
139              
140              
141             # Minimum is connected to basin 1 (mfe).
142             has 'is_connected' => (
143             is => 'ro',
144             lazy => 1,
145             init_arg => undef, # cannot be set manually
146             builder => '_build_is_connected',
147             );
148              
149             sub _build_is_connected {
150 12     12   21 my $self = shift;
151              
152 12 100       365 return 1 if $self->index == 1; # this is the mfe basin
153 11 100       88 return 0 unless $self->has_father; # basin has no father
154              
155             # confess 'Reference to father minimum has not been set, cannot proceed.'
156             # unless $self->has_father_ref;
157              
158 10         307 my $is_connected = $self->father->is_connected;
159 10         312 return $is_connected;
160             }
161              
162             # Parse passed line read from barriers file.
163             around BUILDARGS => sub {
164             my $orig = shift;
165             my $class = shift;
166              
167             my @args; # as passed to the constructor
168             if ( @_ == 1 && !ref $_[0] ) { # process line from bar file
169             my $input_line = shift;
170             my @fields = split /\s+/, $input_line;
171             shift @fields if $fields[0] eq q{}; # drop empty first field
172              
173             if (@fields < @default_attribs) {
174             confess "Input line has not enough fields: $input_line";
175             }
176              
177             # Add default args
178             push @args, $_ => shift @fields foreach @default_attribs;
179             # @args = map { $_ => shift @fields } @attributes;
180              
181             # Add saddle struct if present
182             if (@fields == 1 or @fields == @opt_attributes) {
183             push @args, saddle_struct => shift @fields;
184             }
185              
186             # Add bsize attributes if present
187             if (@fields == @bsize_attributes) {
188             push @args, $_ => shift @fields foreach @bsize_attributes;
189             }
190              
191             confess "Unrecognized number of fields on input line:\n$input_line"
192             unless @fields == 0; # all fields used up?
193             }
194             else {
195             @args = @_;
196             }
197             return $class->$orig(@args);
198             };
199              
200             sub BUILD {
201 578     578 0 1020 my $self = shift;
202              
203             # Ensure presence or absence of all bsize attributes
204 578         1114 my $defined_count = grep {defined $self->{$_}} @bsize_attributes;
  2890         6249  
205 578 50 66     18025 confess "Need to define all or none of the --bsize attributes ",
206             join q{, }, @bsize_attributes
207             unless $defined_count == 0 or $defined_count == @bsize_attributes;
208             }
209              
210             # Determine all ancestor minima of this minimum, i.e. this minimum's
211             # father, grand father, grand grand father etc. in this order.
212             # Returns list of all ancestors (may be empty if min is disconnected).
213             sub ancestors {
214 0     0 1 0 my ($self) = @_;
215              
216 0         0 my $ancestor = $self;
217 0         0 my @ancestors;
218 0         0 while ($ancestor->has_father) {
219             # confess 'Need father reference to determine ancestors'
220             # unless $ancestor->has_father_ref;
221 0         0 push @ancestors, $ancestor->father;
222 0         0 $ancestor = $ancestor->father;
223             }
224              
225 0         0 return @ancestors;
226             }
227              
228             # Stringify minimum to equal an entry line in the barriers output file.
229             # Format strings are taken from Barriers' C source code.
230             sub stringify {
231 528     528 1 1027 my $self = shift;
232              
233             # Default attributes
234 528         1004 my $min_string = $self->brief;
235              
236             # Add saddle struct if defined.
237 528 100       19765 if ($self->has_saddle_struct) {
238 264         7955 $min_string .= q{ } . $self->saddle_struct;
239             }
240              
241             # Add bsize attributes if defined.
242 528 100       18495 if ($self->has_bsize) {
243             $min_string .= sprintf " %12ld %8ld %10.6f %8ld %10.6f",
244 264         564 map {$self->{$_}} @bsize_attributes;
  1320         5309  
245             }
246              
247 528         2964 return $min_string;
248             }
249              
250             # Stringification method returning a more brief representation of the
251             # minimum containing only the index, min struct, its energy, the father's
252             # index, and the barrier height. This equals the output of Barriers if
253             # neither --bsize nor --saddle is given.
254             sub brief {
255 528     528 1 782 my $self = shift;
256              
257             # Default attributes
258             my $brief_string = sprintf "%4d %s %6.2f %4d %6.2f",
259 528         962 map {$self->{$_}} @default_attribs;
  2640         10358  
260              
261 528         1927 return $brief_string;
262             }
263              
264             # ABSOLUTE energy of the lowest structure connecting this basin to another
265             # one. The barrier height, in contrast, is the RELATIVE energy of the same
266             # structure w.r.t. to the basin's (local) mfe.
267             # BEWARE: if the basin does not have a father (father == 0), then the
268             # barrier height is (as reported by Barriers) given with respect to the global exploration
269             # threshold.
270             # Since this gives unexpected results, the saddle height is set to the
271             # global mfe for basin 1, and to Inf for disconnected basins.
272             sub saddle_height {
273 0     0 1   my $self = shift;
274              
275             # Mfe basin is connected to itself with a barrier of 0. Other
276             # fatherless basins are disconnected and thus have an unknown saddle
277             # height -- set to Inf.
278 0 0         my $barrier_height = $self->has_father ? $self->barrier_height
    0          
279             : $self->is_global_min ? 0
280             : 'Inf'
281             ;
282              
283             # Energy values from Bar file have only 2 digits precision.
284 0           my $saddle_height = sprintf "%.2f", $self->mfe + $barrier_height;
285 0           return $saddle_height;
286             }
287              
288             # Saddle height as described for saddle_height(), but with respect to the
289             # global mfe structure (basin 1).
290             sub global_saddle_height {
291 0     0 1   my $self = shift;
292              
293             # Move up in barrier tree until reachin basin 1 or realizing we are
294             # disconnected. Global saddle height is maximal encountered height.
295 0           my $ancestor = $self;
296 0           my $glob_sadd_height = $self->saddle_height;
297 0           while ($ancestor->father_index > 1) {
298 0           $ancestor = $ancestor->father;
299 0           $glob_sadd_height
300             = max $glob_sadd_height, $ancestor->saddle_height;
301             }
302              
303 0           return $glob_sadd_height;
304             }
305              
306             __PACKAGE__->meta->make_immutable;
307              
308             1;
309              
310              
311             __END__
312              
313             =pod
314              
315             =encoding UTF-8
316              
317             =head1 NAME
318              
319             Bio::RNA::Barriers::Minimum - Store a single local minimum
320             (macrostate) from the output of I<Barriers>
321              
322             =head1 SYNOPSIS
323              
324             use Bio::RNA::Barriers;
325              
326             my $min_string = '...'; # single line from .bar file
327             my $min = Bio::RNA::Barriers::Minimum->new($min_string);
328             # my $min2 = $results->get_min(3) # usually used like this
329              
330             print "$min\n"; # prints minimum as in the results file
331              
332             if ($min->has_bsize and $min->is_connected)
333             print "Minimum contributes an energy of ", $min->grad_basin_energy(),
334             " to the partition function."
335              
336              
337             =head1 DESCRIPTION
338              
339             Objects of this class repesent the individual local minima (macrostates) from
340             the results file of I<Barriers>. The construction is usually done
341             automatically by the results objects (cf. L<Bio::RNA::Barriers::Results>). The
342             methods can be used for various queries.
343              
344              
345             =head1 METHODS
346              
347             =head3 $min->new($results_file_line)
348              
349             Construct a minimum object from a single line of the I<Barriers> results file.
350              
351             =head3 $min->ancestors()
352              
353             Determine all ancestor minima of this minimum, i.e. this minimum's father,
354             grand father, grand grand father etc. in this order. Returns a list of all
355             ancestors (may be empty if min is disconnected).
356              
357             =head3 $min->has_bsize()
358              
359             Boolean. True iff the minimum provides information about the basin size as
360             computed by the I<Barriers> option C<--bsize>.
361              
362             =head3 $min->merged_struct_count()
363              
364             Given is the number of structures in the current basin (including merged ones)
365             B<at the time of merging>. For minimum 1, this is close to the total number of
366             input structures (except for disconnected structures and other missing ones
367             (???)).
368              
369             This attribute is only available if Barriers was used with the C<--bsize>
370             option. Use C<$min-E<gt>has_bsize()> to query this.
371              
372             =head3 $min->father_struct_count()
373              
374             The number of structures in the basin that we merge into (father basin) B<at
375             the time of merging>.
376              
377             This attribute is only available if Barriers was used with the C<--bsize>
378             option. Use C<$min-E<gt>has_bsize()> to query this.
379              
380             =head3 $min->merged_basin_energy()
381              
382             The free energy of (the partition function of) the basin -- including all
383             merged basins -- at the time B<this> basin is merged. For minimum 1, this
384             corresponds to the ensemble's free energy as far as it was enumerated by
385             RNAsubopt (excluding disconnected structures).
386              
387             This attribute is only available if Barriers was used with the C<--bsize>
388             option. Use C<$min-E<gt>has_bsize()> to query this.
389              
390             =head3 $min->grad_struct_count()
391              
392             The number of structures only in this gradient basin, excluding merged basins.
393              
394             Open questions: What about --minh merging? Why doesn't this column sum up to
395             exactly to the total number of structs if --max==Inf (some are missing)?
396             Issues due to degenerate energies?
397              
398             This attribute is only available if Barriers was used with the C<--bsize>
399             option. Use C<$min-E<gt>has_bsize()> to query this.
400              
401             =head3 $min->grad_basin_energy()
402              
403             Free energy of the basin without any merged basins. Summing
404             up the partition functions corresponding to these energies, one obtains a free
405             energy almost equal to the ensemble energy (up to rounding errors due to 6
406             digit precision, and of course up to the enumeration threshold used for
407             I<RNAsubopt>).
408              
409             This attribute is only available if Barriers was used with the C<--bsize>
410             option. Use C<$min-E<gt>has_bsize()> to query this.
411              
412             =head3 $min->is_global_min()
413              
414             Returns true iff this is the global minimum (i.e. basin 1).
415              
416             =head3 $min->index()
417              
418             1-based index of the minimum (as is the Barriers file).
419              
420             =head3 $min->struct()
421              
422             Returns the dot-bracket structure string of the minimum.
423              
424             =head3 $min->mfe()
425              
426             B<Local> minimum free energy of the basin (i.e. the minimum's energy).
427              
428             =head3 $min->father_index()
429              
430             Returns the index of the father minimum (i.e. the one this minimum has been
431             merged to).
432              
433             =head3 $min->barrier_height()
434              
435             Returns the barrier height (B<relative> energy difference of the saddle point
436             to the local minimum). For the B<absolute> energy of the saddle point, see
437             C<saddle_height()>.
438              
439             =head3 $min->saddle_height()
440              
441             B<Absolute> energy of the lowest structure connecting this basin to another
442             one. The barrier height, in contrast, is the B<relative> energy of the same
443             structure w.r.t. to the basin's (local) mfe.
444              
445             B<Beware>: if the basin does not have a father (father == 0), then the
446             reported saddle height is given with respect to the global exploration
447             threshold. This is strange but consistent with original Barriers files.
448              
449             =head3 $min->global_saddle_height()
450              
451             Saddle height as described for saddle_height(), but not with respect to
452             any neighbor minimum, but to the global mfe structure (basin 1).
453              
454             =head3 $min->saddle_struct()
455              
456             Returns the saddle structure via which it was merged to its father minimum.
457             If this attribute was not set (i.e. I<Barriers> was run without the
458             C<--saddle> option), it croaks when accessed.
459              
460             Use the C<has_saddle()> predicate to query the status of this attribute.
461              
462             =head3 $min->has_saddle_struct()
463              
464             Predicate for the C<saddle> attribute. True iff the minimum provides the
465             saddle structure via which it was merged to its father minimum, as computed by
466             the I<Barriers> option C<--saddle>. It can be queried via the C<saddle_struct>
467             method.
468              
469             =head3 $min->father()
470              
471             Returns (a reference to) the father minimum object.
472              
473             =head3 $min->has_father()
474              
475             Returns true iff the minimum has a father minimum it has been merged to.
476              
477             =head3 $min->is_connected()
478              
479             Boolean. True iff the minimum is connected to basin 1 (the mfe basin).
480              
481             =head3 $min->ancestors()
482              
483             Determine all ancestor minima of this minimum, i.e. this minimum's father,
484             grand father, grand grand father etc. in this order.
485              
486             =head3 $min->stringify()
487              
488             Stringify minimum to equal an entry line in the barriers output file.
489             Format strings are taken from Barriers' C source code.
490              
491             =head3 $min->brief()
492              
493             Stringification method returning a more brief representation of the
494             minimum containing only the index, min struct, its energy, the father's
495             index, and the barrier height. This equals the output of Barriers if
496             neither C<--bsize> nor C<--saddle> is given.
497              
498              
499             =head1 AUTHOR
500              
501             Felix Kuehnl, C<< <felix at bioinf.uni-leipzig.de> >>
502              
503             =head1 BUGS
504              
505             Please report any bugs or feature requests by raising an issue at
506             L<https://github.com/xileF1337/Bio-RNA-Barriers/issues>.
507              
508             You can also do so by mailing to C<bug-bio-rna-barmap at rt.cpan.org>,
509             or through the web interface at
510             L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-BarMap>. I will be
511             notified, and then you'll automatically be notified of progress on your bug as
512             I make changes.
513              
514              
515             =head1 SUPPORT
516              
517             You can find documentation for this module with the perldoc command.
518              
519             perldoc Bio::RNA::Barriers
520              
521              
522             You can also look for information at the official Barriers website:
523              
524             L<https://www.tbi.univie.ac.at/RNA/Barriers/>
525              
526              
527             =over 4
528              
529             =item * Github: the official repository
530              
531             L<https://github.com/xileF1337/Bio-RNA-Barriers>
532              
533              
534             =item * RT: CPAN's request tracker (report bugs here)
535              
536             L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-Barriers>
537              
538             =item * AnnoCPAN: Annotated CPAN documentation
539              
540             L<http://annocpan.org/dist/Bio-RNA-Barriers>
541              
542             =item * CPAN Ratings
543              
544             L<https://cpanratings.perl.org/d/Bio-RNA-Barriers>
545              
546             =item * Search CPAN
547              
548             L<https://metacpan.org/release/Bio-RNA-Barriers>
549              
550             =back
551              
552              
553             =head1 LICENSE AND COPYRIGHT
554              
555             Copyright 2019-2021 Felix Kuehnl.
556              
557             This program is free software: you can redistribute it and/or modify
558             it under the terms of the GNU General Public License as published by
559             the Free Software Foundation, either version 3 of the License, or
560             (at your option) any later version.
561              
562             This program is distributed in the hope that it will be useful,
563             but WITHOUT ANY WARRANTY; without even the implied warranty of
564             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
565             GNU General Public License for more details.
566              
567             You should have received a copy of the GNU General Public License
568             along with this program. If not, see L<http://www.gnu.org/licenses/>.
569              
570              
571             =cut
572              
573              
574             # End of Bio::RNA::Barriers::Minimum