| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # BioPerl module for Bio::AlignIO::selex | 
| 3 |  |  |  |  |  |  |  | 
| 4 |  |  |  |  |  |  | #   based on the Bio::SeqIO::selex module | 
| 5 |  |  |  |  |  |  | #       by Ewan Birney | 
| 6 |  |  |  |  |  |  | #       and Lincoln Stein | 
| 7 |  |  |  |  |  |  | # | 
| 8 |  |  |  |  |  |  | #       and the SimpleAlign.pm module of Ewan Birney | 
| 9 |  |  |  |  |  |  | # | 
| 10 |  |  |  |  |  |  | # Copyright Peter Schattner | 
| 11 |  |  |  |  |  |  | # | 
| 12 |  |  |  |  |  |  | # You may distribute this module under the same terms as perl itself | 
| 13 |  |  |  |  |  |  | # _history | 
| 14 |  |  |  |  |  |  | # September 5, 2000 | 
| 15 |  |  |  |  |  |  | # POD documentation - main docs before the code | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | =head1 NAME | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | Bio::AlignIO::selex - selex sequence input/output stream | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | # Do not use this module directly.  Use it via the L class. | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | use Bio::AlignIO; | 
| 26 |  |  |  |  |  |  | use strict; | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | my $in = Bio::AlignIO->new(-format => 'selex', | 
| 29 |  |  |  |  |  |  | -file   => 't/data/testaln.selex'); | 
| 30 |  |  |  |  |  |  | while( my $aln = $in->next_aln ) { | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | } | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | This object can transform L objects to and from selex flat | 
| 37 |  |  |  |  |  |  | file databases. | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | =head1 FEEDBACK | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | =head2 Support | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | Please direct usage questions or support issues to the mailing list: | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | I | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | rather than to the module maintainer directly. Many experienced and | 
| 48 |  |  |  |  |  |  | reponsive experts will be able look at the problem and quickly | 
| 49 |  |  |  |  |  |  | address it. Please include a thorough description of the problem | 
| 50 |  |  |  |  |  |  | with code and data examples if at all possible. | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | =head2 Reporting Bugs | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | Report bugs to the Bioperl bug tracking system to help us keep track | 
| 55 |  |  |  |  |  |  | the bugs and their resolution. Bug reports can be submitted via the | 
| 56 |  |  |  |  |  |  | web: | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | https://github.com/bioperl/bioperl-live/issues | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | =head1 AUTHORS - Peter Schattner | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | Email: schattner@alum.mit.edu | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | =head1 CONTRIBUTORS | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | Jason Stajich, jason-at-bioperl.org | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | =head1 APPENDIX | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | The rest of the documentation details each of the object | 
| 71 |  |  |  |  |  |  | methods. Internal methods are usually preceded with a _ | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | =cut | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | # Let the code begin... | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | package Bio::AlignIO::selex; | 
| 78 | 3 |  |  | 3 |  | 407 | use strict; | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 93 |  | 
| 79 |  |  |  |  |  |  |  | 
| 80 | 3 |  |  | 3 |  | 13 | use base qw(Bio::AlignIO); | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 1260 |  | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | =head2 next_aln | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | Title   : next_aln | 
| 85 |  |  |  |  |  |  | Usage   : $aln = $stream->next_aln() | 
| 86 |  |  |  |  |  |  | Function: returns the next alignment in the stream. Tries to read *all* selex | 
| 87 |  |  |  |  |  |  | It reads all non whitespace characters in the alignment | 
| 88 |  |  |  |  |  |  | area. For selexs with weird gaps (eg ~~~) map them by using | 
| 89 |  |  |  |  |  |  | $al->map_chars('~','-') | 
| 90 |  |  |  |  |  |  | Returns : L object | 
| 91 |  |  |  |  |  |  | Args    : NONE | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | =cut | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | sub next_aln { | 
| 96 | 3 |  |  | 3 | 1 | 10 | my $self = shift; | 
| 97 | 3 |  |  |  |  | 4 | my $entry; | 
| 98 | 3 |  |  |  |  | 6 | my ($start,$end,%align,$name,$seqname,%hash,@c2name, %accession,%desc); | 
| 99 | 3 |  |  |  |  | 21 | my $aln =  Bio::SimpleAlign->new(-source => 'selex'); | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | # in selex format, every non-blank line that does not start | 
| 102 |  |  |  |  |  |  | # with '#=' is an alignment segment; the '#=' lines are mark up lines. | 
| 103 |  |  |  |  |  |  | # Of particular interest are the '#=GF  AC ' | 
| 104 |  |  |  |  |  |  | # lines, which give accession numbers for each segment | 
| 105 | 3 |  |  |  |  | 23 | while( $entry = $self->_readline) { | 
| 106 | 375 | 50 |  |  |  | 1335 | if( $entry =~ /^\#=GS\s+(\S+)\s+AC\s+(\S+)/ ) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 107 | 0 |  |  |  |  | 0 | $accession{ $1 } = $2; | 
| 108 | 0 |  |  |  |  | 0 | next; | 
| 109 |  |  |  |  |  |  | } elsif( $entry =~ /^\#=GS\s+(\S+)\s+DE\s+(.+)\s*$/ ) { | 
| 110 | 0 |  |  |  |  | 0 | $desc{$1} .= $2; | 
| 111 |  |  |  |  |  |  | } elsif ( $entry =~ /^([^\#]\S+)\s+([A-Za-z\.\-\*]+)\s*/ ) { | 
| 112 | 156 |  |  |  |  | 279 | my ($name,$seq) = ($1,$2); | 
| 113 |  |  |  |  |  |  |  | 
| 114 | 156 | 100 |  |  |  | 259 | if( ! defined $align{$name}  ) { | 
| 115 | 6 |  |  |  |  | 11 | push @c2name, $name; | 
| 116 |  |  |  |  |  |  | } | 
| 117 | 156 |  |  |  |  | 310 | $align{$name} .= $seq; | 
| 118 |  |  |  |  |  |  | } | 
| 119 |  |  |  |  |  |  | } | 
| 120 |  |  |  |  |  |  | # ok... now we can make the sequences | 
| 121 |  |  |  |  |  |  |  | 
| 122 | 3 |  |  |  |  | 10 | foreach my $name ( @c2name ) { | 
| 123 |  |  |  |  |  |  |  | 
| 124 | 6 | 50 |  |  |  | 15 | if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) { | 
| 125 | 0 |  |  |  |  | 0 | $seqname = $1; | 
| 126 | 0 |  |  |  |  | 0 | $start = $2; | 
| 127 | 0 |  |  |  |  | 0 | $end = $3; | 
| 128 |  |  |  |  |  |  | } else { | 
| 129 | 6 |  |  |  |  | 10 | $seqname=$name; | 
| 130 | 6 |  |  |  |  | 9 | $start = 1; | 
| 131 | 6 |  |  |  |  | 10 | $end = length($align{$name}); | 
| 132 |  |  |  |  |  |  | } | 
| 133 |  |  |  |  |  |  | my $seq = Bio::LocatableSeq->new | 
| 134 |  |  |  |  |  |  | ('-seq'              => $align{$name}, | 
| 135 |  |  |  |  |  |  | '-display_id'       => $seqname, | 
| 136 |  |  |  |  |  |  | '-start'            => $start, | 
| 137 |  |  |  |  |  |  | '-end'              => $end, | 
| 138 |  |  |  |  |  |  | '-description'      => $desc{$name}, | 
| 139 | 6 |  |  |  |  | 28 | '-accession_number' => $accession{$name}, | 
| 140 |  |  |  |  |  |  | '-alphabet'         => $self->alphabet, | 
| 141 |  |  |  |  |  |  | ); | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 6 |  |  |  |  | 23 | $aln->add_seq($seq); | 
| 144 |  |  |  |  |  |  | } | 
| 145 |  |  |  |  |  |  |  | 
| 146 | 3 | 50 |  |  |  | 12 | return $aln if $aln->num_sequences; | 
| 147 | 0 |  |  |  |  | 0 | return; | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | =head2 write_aln | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | Title   : write_aln | 
| 154 |  |  |  |  |  |  | Usage   : $stream->write_aln(@aln) | 
| 155 |  |  |  |  |  |  | Function: writes the $aln object into the stream in selex format | 
| 156 |  |  |  |  |  |  | Returns : 1 for success and 0 for error | 
| 157 |  |  |  |  |  |  | Args    : L object | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | =cut | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | sub write_aln { | 
| 163 | 2 |  |  | 2 | 1 | 9 | my ($self,@aln) = @_; | 
| 164 | 2 |  |  |  |  | 5 | my ($namestr,$seq,$add); | 
| 165 | 2 |  |  |  |  | 0 | my ($maxn); | 
| 166 | 2 |  |  |  |  | 5 | foreach my $aln (@aln) { | 
| 167 | 2 |  |  |  |  | 8 | $maxn = $aln->maxdisplayname_length(); | 
| 168 | 2 |  |  |  |  | 5 | foreach $seq ( $aln->each_seq() ) { | 
| 169 | 8 |  |  |  |  | 16 | $namestr = $aln->displayname($seq->get_nse()); | 
| 170 | 8 |  |  |  |  | 16 | $add = $maxn - length($namestr) + 2; | 
| 171 | 8 |  |  |  |  | 13 | $namestr .= " " x $add; | 
| 172 | 8 | 50 |  |  |  | 16 | $self->_print (sprintf("%s  %s\n",$namestr,$seq->seq())) or return; | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  | } | 
| 175 | 2 | 50 | 33 |  |  | 6 | $self->flush if $self->_flush_on_write && defined $self->_fh; | 
| 176 | 2 |  |  |  |  | 6 | return 1; | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | 1; |