File Coverage

Bio/Seq/Meta.pm
Criterion Covered Total %
statement 103 125 82.4
branch 46 70 65.7
condition 15 37 40.5
subroutine 17 23 73.9
pod 16 17 94.1
total 197 272 72.4


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Seq::Meta
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Heikki Lehvaslaiho
7             #
8             # Copyright Heikki Lehvaslaiho
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Seq::Meta - Generic superclass for sequence objects with
17             residue-based meta information
18              
19             =head1 SYNOPSIS
20              
21             use Bio::LocatableSeq;
22             use Bio::Seq::Meta;
23             use Bio::Tools::OddCodes;
24             use Bio::SeqIO;
25              
26             my $seq = Bio::Seq::Meta->new(-id=>'test',
27             -seq=>'ACTGCTAGCT',
28             -start=>2434,
29             -end=>2443,
30             -strand=>1,
31             -verbose=>1, # to see warnings
32             );
33              
34             # the existing sequence object can be a Bio::PrimarySeq, too
35              
36             # to test this is a meta seq object
37             $seq->isa("Bio::Seq::Meta")
38             || $seq->throw("$seq is not a Bio::Seq::Meta");
39              
40              
41             $seq->meta('1234567890');
42             $seq = Bio::Seq::Meta->new(-id=>'test',
43             -seq=>'HACILMIFGT',
44             -start=>2434,
45             -end=>2443,
46             -strand=>1,
47             -meta=>'1234567890',
48             -verbose=>1, # to see warnings
49             );
50              
51             # accessors
52             $string = $seq->meta_text();
53             $substring = $seq->submeta_text(2,5);
54             $unique_key = $seq->accession_number();
55              
56             # storing output from Bio::Tools::OddCodes as meta data
57             my $protcodes = Bio::Tools::OddCodes->new(-seq => $seq);
58             my @codes = qw(structural chemical functional charge hydrophobic);
59             map { $seq->named_meta($_, ${$protcodes->$_($seq) } )} @codes;
60              
61             my $out = Bio::SeqIO->new(-format=>'metafasta');
62             $out->write_seq($seq);
63              
64             =head1 DESCRIPTION
65              
66             This class implements generic methods for sequences with residue-based
67             meta information. Meta sequences with meta data are Bio::LocatableSeq
68             objects with additional methods to store that meta information. See
69             L and L.
70              
71             The meta information in this class is always one character per residue
72             long and blank values are space characters (ASCII 32).
73              
74             After the latest rewrite, the meta information no longer covers all
75             the residues automatically. Methods to check the length of meta
76             information (L)and to see if the ends are flushed to the
77             sequence have been added (L). To force the old
78             functionality, set L to true.
79              
80             It is assumed that meta data values do not depend on the nucleotide
81             sequence strand value.
82              
83             Application specific implementations should inherit from this class to
84             override and add to these methods.
85              
86             L allows for more complex meta values (scalars
87             or objects) to be used.
88              
89             =head2 Method naming
90              
91             Character based meta data is read and set by method meta() and its
92             variants. These are the suffixes and prefixes used in the variants:
93              
94             [named_] [sub] meta [_text]
95              
96             =over 3
97              
98             =item _text
99              
100             Suffix B<_text> guaranties that output is a string. Note that it does
101             not limit the input.
102              
103             In this implementation, the output is always text, so these methods
104             are redundant.
105              
106             =item sub
107              
108             Prefix B, like in subseq(), means that the method applies to sub
109             region of the sequence range and takes start and end as arguments.
110             Unlike subseq(), these methods are able to set values. If the range
111             is not defined, it defaults to the complete sequence.
112              
113             =item named
114              
115             Prefix B in method names allows the used to attach multiple
116             meta strings to one sequence by explicitly naming them. The name is
117             always the first argument to the method. The "unnamed" methods use the
118             class wide default name for the meta data and are thus special cases
119             "named" methods.
120              
121             Note that internally names are keys in a hash and any misspelling of a
122             name will silently store the data under a wrong name. The used names
123             (keys) can be retrieved using method meta_names(). See L.
124              
125             =back
126              
127             =head1 NOTE
128              
129             This Bio::Seq::MetaI implementation inherits from Bio::LocatableSeq, which
130             itself inherits from Bio::PrimarySeq. It is not a Bio::SeqI, so bless-ing
131             objects of this class into a Bio::SeqI or vice versa and will not work as
132             expected (see bug 2262). This may be addressed in a future refactor of
133             Bio::LocatableSeq.
134              
135              
136             =head1 SEE ALSO
137              
138             L,
139             L,
140             L
141              
142             =head1 FEEDBACK
143              
144             =head2 Mailing Lists
145              
146             User feedback is an integral part of the evolution of this and other
147             Bioperl modules. Send your comments and suggestions preferably to one
148             of the Bioperl mailing lists. Your participation is much appreciated.
149              
150             bioperl-l@bioperl.org - General discussion
151             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
152              
153             =head2 Support
154              
155             Please direct usage questions or support issues to the mailing list:
156              
157             I
158              
159             rather than to the module maintainer directly. Many experienced and
160             reponsive experts will be able look at the problem and quickly
161             address it. Please include a thorough description of the problem
162             with code and data examples if at all possible.
163              
164             =head2 Reporting Bugs
165              
166             Report bugs to the Bioperl bug tracking system to help us keep track
167             the bugs and their resolution. Bug reports can be submitted via the
168             web:
169              
170             https://github.com/bioperl/bioperl-live/issues
171              
172             =head1 AUTHOR - Heikki Lehvaslaiho
173              
174             Email heikki-at-bioperl-dot-org
175              
176             =head1 CONTRIBUTORS
177              
178             Chad Matsalla, bioinformatics@dieselwurks.com
179              
180             Aaron Mackey, amackey@virginia.edu
181              
182             =head1 APPENDIX
183              
184             The rest of the documentation details each of the object methods.
185             Internal methods are usually preceded with a _
186              
187             =cut
188              
189              
190             # Let the code begin...
191              
192              
193             package Bio::Seq::Meta;
194 9     9   470 use vars qw($DEFAULT_NAME $GAP $META_GAP);
  9         11  
  9         434  
195 9     9   28 use strict;
  9         10  
  9         168  
196              
197             #use overload '""' => \&to_string;
198              
199 9     9   24 use base qw(Bio::LocatableSeq Bio::Seq::MetaI);
  9         12  
  9         3617  
200              
201              
202             BEGIN {
203              
204 9     9   15 $DEFAULT_NAME = 'DEFAULT';
205 9         13 $GAP = '-';
206 9         11336 $META_GAP = ' ';
207             }
208              
209             =head2 new
210              
211             Title : new
212             Usage : $metaseq = Bio::Seq::Meta->new
213             ( -meta => 'aaaaaaaabbbbbbbb',
214             -seq => 'TKLMILVSHIVILSRM'
215             -id => 'human_id',
216             -accession_number => 'S000012',
217             );
218             Function: Constructor for Bio::Seq::Meta class, meta data being in a
219             string. Note that you can provide an empty quality string.
220             Returns : a new Bio::Seq::Meta object
221              
222             =cut
223              
224              
225             sub new {
226 195     195 1 1339 my ($class, @args) = @_;
227              
228 195         443 my $self = $class->SUPER::new(@args);
229              
230 195         490 my($meta, $forceflush, $nm) =
231             $self->_rearrange([qw(META
232             FORCE_FLUSH
233             NAMED_META)],
234             @args);
235              
236             #$self->{'_meta'} = {};
237 195         460 $self->{'_meta'}->{$DEFAULT_NAME} = "";
238              
239 195 50       276 $meta && $self->meta($meta);
240 195 100 66     320 if ($nm && ref($nm) eq 'HASH') {
241 1         5 while (my ($name, $meta) = each %$nm) {
242 2         3 $self->named_meta($name, $meta);
243             }
244             }
245 195 100       250 $forceflush && $self->force_flush($forceflush);
246              
247 195         409 return $self;
248             }
249              
250              
251             =head2 meta
252              
253             Title : meta
254             Usage : $meta_values = $obj->meta($values_string);
255             Function:
256              
257             Get and set method for the meta data starting from residue
258             position one. Since it is dependent on the length of the
259             sequence, it needs to be manipulated after the sequence.
260              
261             The length of the returned value always matches the length
262             of the sequence, if force_flush() is set. See L.
263              
264             Returns : meta data in a string
265             Args : new value, string, optional
266              
267             =cut
268              
269             sub meta {
270 15     15 1 33 shift->named_meta($DEFAULT_NAME, shift);
271             }
272              
273             =head2 meta_text
274              
275             Title : meta_text
276             Usage : $meta_values = $obj->meta_text($values_arrayref);
277             Function: Variant of meta() guarantied to return a textual
278             representation of meta data. For details, see L.
279             Returns : a string
280             Args : new value, optional
281              
282             =cut
283              
284             sub meta_text {
285 0     0 1 0 shift->meta(shift);
286             }
287              
288             =head2 named_meta
289              
290             Title : named_meta()
291             Usage : $meta_values = $obj->named_meta($name, $values_arrayref);
292             Function: A more general version of meta(). Each meta data set needs
293             to be named. See also L.
294             Returns : a string
295             Args : scalar, name of the meta data set
296             new value, optional
297              
298             =cut
299              
300             sub named_meta {
301 73     73 1 3412 my ($self, $name, $value) = @_;
302              
303 73   33     139 $name ||= $DEFAULT_NAME;
304 73 100       135 if( defined $value) {
305              
306 45 50       85 $self->throw("I need a scalar value, not [". ref($value). "]")
307             if ref($value);
308              
309             # test for length
310 45         109 my $diff = $self->length - CORE::length($value);
311 45 100       73 if ($diff > 0) {
312 3         7 $value .= (" " x $diff);
313             }
314              
315 45         71 $self->{'_meta'}->{$name} = $value;
316              
317             #$self->_test_gap_positions($name) if $self->verbose > 0;
318             }
319              
320             return " " x $self->length
321 73 50 66     126 if $self->force_flush && not defined $self->{'_meta'}->{$name};
322              
323              
324 73 100       96 $self->_do_flush if $self->force_flush;
325              
326 73         171 return $self->{'_meta'}->{$name};
327             }
328              
329             =head2 _test_gap_positions
330              
331             Title : _test_gap_positions
332             Usage : $meta_values = $obj->_test_gap_positions($name);
333             Function: Internal test for correct position of gap characters.
334             Gap being only '-' this time.
335              
336             This method is called from named_meta() when setting meta
337             data but only if verbose is positive as this can be an
338             expensive process on very long sequences. Set verbose(1) to
339             see warnings when gaps do not align in sequence and meta
340             data and turn them into errors by setting verbose(2).
341              
342             Returns : true on success, prints warnings
343             Args : none
344              
345             =cut
346              
347             sub _test_gap_positions {
348 0     0   0 my $self = shift;
349 0         0 my $name = shift;
350 0         0 my $success = 1;
351              
352 0 0       0 $self->seq || return $success;
353 0         0 my $len = CORE::length($self->seq);
354 0         0 for (my $i=0; $i < $len; $i++) {
355 0         0 my $s = substr $self->{seq}, $i, 1;
356 0         0 my $m = substr $self->{_meta}->{$name}, $i, 1;
357 0 0 0     0 $self->warn("Gap mismatch [$m/$s] in column [". ($i+1). "] of [$name] meta data in seq [". $self->id. "]")
      0        
358             and $success = 0
359             if ($s eq $META_GAP) && $s ne $m;
360             }
361 0         0 return $success;
362             }
363              
364             =head2 named_meta_text
365              
366             Title : named_meta_text()
367             Usage : $meta_values = $obj->named_meta_text($name, $values_arrayref);
368             Function: Variant of named_meta() guarantied to return a textual
369             representation of the named meta data.
370             For details, see L.
371             Returns : a string
372             Args : scalar, name of the meta data set
373             new value, optional
374              
375             =cut
376              
377             sub named_meta_text {
378 0     0 1 0 shift->named_meta(@_);
379             }
380              
381             =head2 submeta
382              
383             Title : submeta
384             Usage : $subset_of_meta_values = $obj->submeta(10, 20, $value_string);
385             $subset_of_meta_values = $obj->submeta(10, undef, $value_string);
386             Function:
387              
388             Get and set method for meta data for subsequences.
389              
390             Numbering starts from 1 and the number is inclusive, ie 1-2
391             are the first two residue of the sequence. Start cannot be
392             larger than end but can be equal.
393              
394             If the second argument is missing the returned values
395             should extend to the end of the sequence.
396              
397             The return value may be a string or an array reference,
398             depending on the implementation. If in doubt, use
399             submeta_text() which is a variant guarantied to return a
400             string. See L.
401              
402             Returns : A reference to an array or a string
403             Args : integer, start position
404             integer, end position, optional when a third argument present
405             new value, optional
406              
407             =cut
408              
409             sub submeta {
410 5     5 1 15 shift->named_submeta($DEFAULT_NAME, @_);
411             }
412              
413             =head2 submeta_text
414              
415             Title : submeta_text
416             Usage : $meta_values = $obj->submeta_text(20, $value_string);
417             Function: Variant of submeta() guarantied to return a textual
418             representation of meta data. For details, see L.
419             Returns : a string
420             Args : new value, optional
421              
422              
423             =cut
424              
425             sub submeta_text {
426 0     0 1 0 shift->submeta(@_);
427             }
428              
429             =head2 named_submeta
430              
431             Title : named_submeta
432             Usage : $subset_of_meta_values = $obj->named_submeta($name, 10, 20, $value_string);
433             $subset_of_meta_values = $obj->named_submeta($name, 10);
434             Function: Variant of submeta() guarantied to return a textual
435             representation of meta data. For details, see L.
436             Returns : A reference to an array or a string
437             Args : scalar, name of the meta data set
438             integer, start position
439             integer, end position, optional when a third argument present
440             new value, optional
441              
442             =cut
443              
444             sub named_submeta {
445 8     8 1 12 my ($self, $name, $start, $end, $value) = @_;
446              
447 8   33     14 $name ||= $DEFAULT_NAME;
448 8   100     16 $start ||=1;
449              
450              
451 8 50 33     55 $start =~ /^[+]?\d+$/ and $start > 0 or
452             $self->throw("Need at least a positive integer start value");
453              
454 8 100       12 if ($value) {
455 3   66     9 $end ||= $start+length($value)-1;
456 3 50       8 $self->warn("You are setting meta values beyond the length of the sequence\n".
457             "[$start > ". length($self->seq)."] in sequence ". $self->id)
458             if $start > length $self->seq;
459              
460             # pad meta data if needed
461 3 50       7 $self->{_meta}->{$name} = () unless defined $self->{_meta}->{$name};
462 3 100       6 if (length($self->{_meta}->{$name}) < $start) {
463 1         5 $self->{'_meta'}->{$name} .= " " x ( $start - length($self->{'_meta'}->{$name}) -1);
464             }
465              
466 3         5 my $tail = '';
467             $tail = substr ($self->{_meta}->{$name}, $start-1+length($value))
468 3 100       10 if length($self->{_meta}->{$name}) >= $start-1+length($value);
469            
470 3         6 substr ($self->{_meta}->{$name}, --$start) = $value;
471 3         4 $self->{_meta}->{$name} .= $tail;
472              
473 3         14 return substr ($self->{_meta}->{$name}, $start, $end - $start + 1);
474              
475             } else {
476              
477 5 100       13 $end or $end = length $self->seq;
478              
479             # pad meta data if needed
480 5 50       10 if (length($self->{_meta}->{$name}) < $end) {
481 0         0 $self->{'_meta'}->{$name} .= " " x ( $start - length($self->{'_meta'}->{$name}));
482             }
483              
484 5         24 return substr ($self->{_meta}->{$name}, $start-1, $end - $start + 1)
485             }
486             }
487              
488              
489             =head2 named_submeta_text
490              
491             Title : named_submeta_text
492             Usage : $meta_values = $obj->named_submeta_text($name, 20, $value_string);
493             Function: Variant of submeta() guarantied to return a textual
494             representation of meta data. For details, see L.
495             Returns : a string
496             Args : scalar, name of the meta data
497             Args : integer, start position, optional
498             integer, end position, optional
499             new value, optional
500              
501             =cut
502              
503             sub named_submeta_text {
504 0     0 1 0 shift->named_submeta(@_);
505             }
506              
507             =head2 meta_names
508              
509             Title : meta_names
510             Usage : @meta_names = $obj->meta_names()
511             Function: Retrieves an array of meta data set names. The default
512             (unnamed) set name is guarantied to be the first name.
513             Returns : an array of names
514             Args : none
515              
516             =cut
517              
518             sub meta_names {
519 57     57 1 750 my ($self) = @_;
520              
521 57         39 my @r;
522 57         37 foreach ( sort keys %{$self->{'_meta'}} ) {
  57         139  
523 74 100       129 push (@r, $_) unless $_ eq $DEFAULT_NAME;
524             }
525 57 100       102 unshift @r, $DEFAULT_NAME if $self->{'_meta'}->{$DEFAULT_NAME};
526 57         92 return @r;
527             }
528              
529              
530             =head2 meta_length
531              
532             Title : meta_length()
533             Usage : $meeta_len = $obj->meta_length();
534             Function: return the number of elements in the meta set
535             Returns : integer
536             Args : -
537              
538             =cut
539              
540             sub meta_length {
541 2     2 1 4 my ($self) = @_;
542 2         4 return $self->named_meta_length($DEFAULT_NAME);
543             }
544              
545              
546             =head2 named_meta_length
547              
548             Title : named_meta_length()
549             Usage : $meta_len = $obj->named_meta_length($name);
550             Function: return the number of elements in the named meta set
551             Returns : integer
552             Args : -
553              
554             =cut
555              
556             sub named_meta_length {
557 48     48 1 33 my ($self, $name) = @_;
558 48   33     55 $name ||= $DEFAULT_NAME;
559 48         110 return length ($self->{'_meta'}->{$name});
560             }
561              
562              
563             =head2 force_flush
564              
565             Title : force_flush()
566             Usage : $force_flush = $obj->force_flush(1);
567             Function: Automatically pad with empty values or truncate meta values
568             to sequence length. Not done by default.
569             Returns : boolean 1 or 0
570             Args : optional boolean value
571              
572             Note that if you turn this forced padding off, the previously padded
573             values are not changed.
574              
575             =cut
576              
577             sub force_flush {
578 155     155 1 117 my ($self, $value) = @_;
579              
580 155 100       200 if (defined $value) {
581 6 100       9 if ($value) {
582 4         6 $self->{force_flush} = 1;
583 4         6 $self->_do_flush;
584             } else {
585 2         3 $self->{force_flush} = 0;
586             }
587             }
588              
589 155         321 return $self->{force_flush};
590             }
591              
592              
593             =head2 _do_flush
594              
595             Title : _do_flush
596             Usage :
597             Function: internal method to do the force that meta values are same
598             length as the sequence . Called from L
599             Returns :
600             Args :
601              
602             =cut
603              
604              
605             sub _do_flush {
606 12     12   11 my ($self) = @_;
607              
608 12         20 foreach my $name ( ('DEFAULT', $self->meta_names) ) {
609              
610             # elongnation
611 22 100       33 if ($self->length > $self->named_meta_length($name)) {
    100          
612 6         12 $self->{'_meta'}->{$name} .= $META_GAP x ($self->length - $self->named_meta_length($name)) ;
613             }
614             # truncation
615             elsif ( $self->length < $self->named_meta_length($name) ) {
616 2         4 $self->{_meta}->{$name} = substr($self->{_meta}->{$name}, 0, $self->length-1);
617             }
618             }
619              
620             }
621              
622              
623             =head2 is_flush
624              
625             Title : is_flush
626             Usage : $is_flush = $obj->is_flush()
627             or $is_flush = $obj->is_flush($my_meta_name)
628             Function: Boolean to tell if all meta values are in
629             flush with the sequence length.
630             Returns true if force_flush() is set
631             Set verbosity to a positive value to see failed meta sets
632             Returns : boolean 1 or 0
633             Args : optional name of the meta set
634              
635             =cut
636              
637             sub is_flush {
638              
639 3     3 1 5 my ($self, $name) = shift;
640              
641 3 50       5 return 1 if $self->force_flush;
642              
643 3         3 my $sticky = '';
644              
645              
646 3 50       5 if ($name) {
647 0 0       0 $sticky .= "$name " if $self->length != $self->named_meta_length($name);
648             } else {
649 3         4 foreach my $m ($self->meta_names) {
650 1 50 33     3 $sticky .= "$m " if ($self->named_meta_length($m) > 0) && ($self->length != $self->named_meta_length($m));
651             }
652             }
653              
654 3 50       5 if ($sticky) {
655 0 0       0 print "These meta set are not flush: $sticky\n" if $self->verbose;
656 0         0 return 0;
657             }
658              
659 3         9 return 1;
660             }
661              
662              
663             =head1 Bio::PrimarySeqI methods
664              
665             =head2 revcom
666              
667             Title : revcom
668             Usage : $newseq = $seq->revcom();
669             Function: Produces a new Bio::Seq::MetaI implementing object where
670             the order of residues and their meta information is reversed.
671             Returns : A new (fresh) Bio::Seq::Meta object
672             Args : none
673             Throws : if the object returns false on is_flush()
674              
675             Note: The method does nothing to meta values, it reorders them, only.
676              
677             =cut
678              
679             sub revcom {
680 2     2 1 4 my $self = shift;
681              
682 2 50       4 $self->throw("Can not get a reverse complement. The object is not flush.")
683             unless $self->is_flush;
684              
685 2         8 my $new = $self->SUPER::revcom;
686 2         2 foreach (keys %{$self->{_meta}}) {
  2         6  
687 2         5 $new->named_meta($_, scalar reverse $self->{_meta}->{$_} );
688             };
689 2         7 return $new;
690             }
691              
692             =head2 trunc
693              
694             Title : trunc
695             Usage : $subseq = $seq->trunc(10,100);
696             Function: Provides a truncation of a sequence together with meta data
697             Returns : a fresh Bio::Seq::Meta implementing object
698             Args : Two integers denoting first and last residue of the sub-sequence.
699              
700             =cut
701              
702             sub trunc {
703 1     1 1 1 my ($self, $start, $end) = @_;
704              
705             # test arguments
706 1 50 33     10 $start =~ /^[+]?\d+$/ and $start > 0 or
707             $self->throw("Need at least a positive integer start value as start");
708 1 50 33     7 $end =~ /^[+]?\d+$/ and $end > 0 or
709             $self->throw("Need at least a positive integer start value as end");
710 1 50       2 $end >= $start or
711             $self->throw("End position has to be larger or equal to start");
712 1 50       3 $end <= $self->length or
713             $self->throw("End position can not be larger than sequence length");
714              
715 1         5 my $new = $self->SUPER::trunc($start, $end);
716 1         2 $start--;
717 1         1 foreach (keys %{$self->{_meta}}) {
  1         3  
718             $new->named_meta($_,
719 1         3 substr($self->{_meta}->{$_}, $start, $end - $start)
720             );
721             };
722 1         4 return $new;
723             }
724              
725              
726             sub to_string {
727 0     0 0   my ($self) = @_;
728 0           my $out = Bio::SeqIO->new(-format=>'metafasta');
729 0           $out->write_seq($self);
730 0           return 1;
731             }
732              
733             1;