File Coverage

Bio/Tools/Alignment/Consed.pm
Criterion Covered Total %
statement 265 593 44.6
branch 81 200 40.5
condition 20 46 43.4
subroutine 22 53 41.5
pod 43 43 100.0
total 431 935 46.1


line stmt bran cond sub pod time code
1             # Bio::Tools::Alignment::Consed
2             #
3             # Please direct questions and support issues to
4             #
5             # Cared for by Chad Matsalla
6             #
7             # Copyright Chad Matsalla
8             #
9             # You may distribute this module under the same terms as perl itself
10              
11             # POD documentation - main docs before the code
12              
13             =head1 NAME
14              
15             Bio::Tools::Alignment::Consed - A module to work with objects from consed .ace files
16              
17             =head1 SYNOPSIS
18              
19             # a report for sequencing stuff
20             my $o_consed = Bio::Tools::Alignment::Consed->new(
21             -acefile => "/path/to/an/acefile.ace.1",
22             -verbose => 1);
23             my $foo = $o_consed->set_reverse_designator("r");
24             my $bar = $o_consed->set_forward_designator("f");
25              
26             # get the contig numbers
27             my @keys = $o_consed->get_contigs();
28              
29             # construct the doublets
30             my $setter_doublets = $o_consed->choose_doublets();
31              
32             # get the doublets
33             my @doublets = $o_consed->get_doublets();
34              
35             =head1 DESCRIPTION
36              
37             L provides methods and objects to deal
38             with the output from the Consed software suite. Specifically,
39             takes an C<.ace> file and provides objects for the results.
40              
41             A word about doublets: This module was written to accommodate a large
42             EST sequencing operation. In this case, EST's were sequenced from the
43             3' and from the 5' end of the EST. The objective was to find a
44             consensus sequence for these two reads. Thus, a contig of two is what
45             we wanted, and this contig should consist of the forward and reverse
46             reads of a getn clone. For example, for a forward designator of "F"
47             and a reverse designator of "R", if the two reads chad1F and chad1R
48             were in a single contig (for example Contig 5) it will be determined
49             that the consensus sequence for Contig 5 will be the sequence for
50             clone chad1.
51              
52             Doublets are good!
53              
54             This module parses C<.ace> and related files. A detailed list of methods
55             can be found at the end of this document.
56              
57             I wrote a detailed rationale for design that may explain the reasons
58             why some things were done the way they were done. That document is
59             beyond the scope of this pod and can probably be found in the
60             directory from which this module was 'made' or at
61             L.
62              
63             Note that the POD in that document might be old but the original
64             rationale still stands.
65              
66             =head1 FEEDBACK
67              
68             =head2 Mailing Lists
69              
70             User feedback is an integral part of the evolution of this and other
71             Bioperl modules. Send your comments and suggestions preferably to one
72             of the Bioperl mailing lists. Your participation is much appreciated.
73              
74             bioperl-l@bioperl.org - General discussion
75             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
76              
77             =head2 Support
78              
79             Please direct usage questions or support issues to the mailing list:
80              
81             I
82              
83             rather than to the module maintainer directly. Many experienced and
84             reponsive experts will be able look at the problem and quickly
85             address it. Please include a thorough description of the problem
86             with code and data examples if at all possible.
87              
88             =head2 Reporting Bugs
89              
90             Report bugs to the Bioperl bug tracking system to help us keep track
91             the bugs and their resolution. Bug reports can be submitted via the
92             web:
93              
94             https://github.com/bioperl/bioperl-live/issues
95              
96             =head1 AUTHOR - Chad Matsalla
97              
98             Email chad-at-dieselwurks.com
99              
100             =head1 APPENDIX
101              
102             The rest of the documentation details each of the object
103             methods. Internal methods are usually preceded with a _
104              
105             =cut
106              
107             #'
108              
109             package Bio::Tools::Alignment::Consed;
110              
111 1     1   643 use strict;
  1         1  
  1         24  
112              
113 1     1   354 use FileHandle;
  1         1528  
  1         4  
114 1     1   597 use Dumpvalue qw(dumpValue);
  1         3042  
  1         22  
115 1     1   313 use Bio::Tools::Alignment::Trim;
  1         1  
  1         23  
116 1     1   4 use File::Spec;
  1         2  
  1         16  
117              
118 1     1   3 use base qw(Bio::Root::Root Bio::Root::IO);
  1         1  
  1         3201  
119              
120             our %DEFAULTS = ( 'f_designator' => 'f',
121             'r_designator' => 'r');
122              
123             =head2 new()
124              
125             Title : new(-acefile => $path_to_some_acefile, -verbose => "1")
126             Usage : $o_consed = Bio::Tools::Alignment::Consed->
127             new(-acefile => $path_to_some_acefile, -verbose => "1");
128             Function: Construct the Bio::Tools::Alignment::Consed object. Sets
129             verbosity for the following procedures, if necessary:
130             1. Construct a new Bio::Tools::Alignment::Trim object, to
131             handle quality trimming 2. Read in the acefile and parse it
132              
133             Returns : A reference to a Bio::Tools::Alignment::Consed object.
134             Args : A hash. (-acefile) is the filename of an acefile. If a full path
135             is not specified "./" is prepended to the filename and used from
136             instantiation until destruction. If you want
137             Bio::Tools::Alignment::Consed to be noisy during parsing of
138             the acefile, specify some value for (-verbose).
139              
140             =cut
141              
142             sub new {
143 1     1 1 10 my ($class,%args) = @_;
144 1         7 my $self = $class->SUPER::new(%args);
145              
146 1         2 $self->{'filename'} = $args{'-acefile'};
147              
148             # this is special to UNIX and should probably use catfile : DONE!
149             # if (!($self->{'filename'} =~ m{/})) {
150             # $self->{'filename'} = "./".$self->{'filename'};
151             # }
152             # $self->{'filename'} =~ s#\\#\/#g if $^O =~ m/mswin/i;
153             # $self->{'filename'} =~ m/(.*\/)(.*)ace.*$/;
154             # $self->{'path'} = $1;
155              
156             # this is more generic and should work on most systems
157 1         17 (undef, $self->{'path'}, undef) = File::Spec->splitpath($self->{'filename'});
158              
159 1         7 $self->_initialize_io('-file'=>$self->{'filename'});
160 1         3 $self->{'o_trim'} = Bio::Tools::Alignment::Trim->new(-verbose => $self->verbose());
161 1         3 $self->set_forward_designator($DEFAULTS{'f_designator'});
162 1         3 $self->set_reverse_designator($DEFAULTS{'r_designator'});
163              
164 1         2 $self->_read_file();
165 1         5 return $self;
166             }
167              
168             =head2 set_verbose()
169              
170             Title : set_verbose()
171             Usage : $o_consed->set_verbose(1);
172             Function: Set the verbosity level for debugging messages. On instantiation
173             of the Bio::Tools::Alignment::Consed object the verbosity level
174             is set to 0 (quiet).
175             Returns : 1 or 0.
176             Args : The verbosity levels are:
177             0 - quiet
178             1 - noisy
179             2 - noisier
180             3 - annoyingly noisy
181              
182             This method for setting verbosity has largely been superseded by a
183             sub-by-sub way, where for every sub you can provide a (-verbose)
184             switch. I am doing converting this bit-by-bit so do not be surprised
185             if some subs do not honour this.
186              
187             =cut
188              
189             # from RootI
190              
191             # backwards compat
192 0     0 1 0 sub set_verbose { (shift)->verbose(@_) }
193              
194             =head2 get_filename()
195              
196             Title : get_filename()
197             Usage : $o_consed->get_filename();
198             Function: Returns the name of the acefile being used by the
199             Bio::Tools::Alignment::Consed object.
200             Returns : A scalar containing the name of a file.
201             Args : None.
202              
203             =cut
204              
205              
206             sub get_filename {
207 0     0 1 0 my $self = shift;
208 0         0 return $self->{'filename'};
209             }
210              
211             =head2 count_sequences_with_grep()
212              
213             Title : count_sequences_with_grep()
214             Usage : $o_consed->count_sequences_with_grep();
215             Function: Use /bin/grep to scan through the files in the ace project dir
216             and count sequences in those files. I used this method in the
217             development of this module to verify that I was getting all of the
218             sequences. It works, but it is (I think) unix-like platform
219             dependent.
220             Returns : A scalar containing the number of sequences in the ace project
221             directory.
222             Args : None.
223              
224             If you are on a non-UNIX platform, you really do not have to use
225             this. It is more of a debugging routine designed to address very
226             specific problems.
227              
228             This method was reimplemented to be platform independent with a pure
229             perl implementation. The above note can be ignored.
230              
231             =cut
232              
233             sub count_sequences_with_grep {
234 1     1 1 426 my $self = shift;
235 1         2 my ($working_dir,$grep_cli,@total_grep_sequences);
236             # this should be migrated to a pure perl implementation ala
237             # Tom Christiansen's 'tcgrep'
238             # http://www.cpan.org/modules/by-authors/id/TOMC/scripts/tcgrep.gz
239              
240 1 50       31 open my $FILE, '<', $self->{'filename'} or do {
241 0         0 $self->warn("Could not read file '$self->{'filename'}' for grepping: $!");
242             return
243 0         0 };
244 1         2 my $counter =0;
245 1 100       17 while(<$FILE>) { $counter++ if(/^AF/); }
  7661         12480  
246 1         27 close $FILE;
247              
248 1         39 opendir my $SINGLETS, $self->{'path'};
249 1         745 foreach my $f ( readdir($SINGLETS) ) {
250 582 100       648 next unless ($f =~ /\.singlets$/);
251              
252 1         27 my $singlet_file = File::Spec->catfile($self->{'path'}, $f);
253 1 50       30 open my $S_FILE, '<', $singlet_file or do {
254 0         0 $self->warn("Could not read file '$singlet_file': $!");
255             next
256 0         0 };
257 1 100       20 while(<$S_FILE>) { $counter++ if(/^>/) }
  1291         2193  
258 1         8 close $S_FILE;
259             }
260 1         43 closedir $SINGLETS;
261 1         7 return $counter;
262             }
263              
264             =head2 get_path()
265              
266             Title : get_path()
267             Usage : $o_consed->get_path();
268             Function: Returns the path to the acefile this object is working with.
269             Returns : Scalar. The path to the working acefile.
270             Args : None.
271              
272             =cut
273              
274             sub get_path {
275 0     0 1 0 my $self = shift;
276 0         0 return $self->{'path'};
277             }
278              
279             =head2 get_contigs()
280              
281             Title : get_contigs()
282             Usage : $o_consed->get_contigs();
283             Function: Return the keys to the Bio::Tools::Alignment::Consed object.
284             Returns : An array containing the keynames in the
285             Bio::Tools::Alignment::Consed object.
286             Args : None.
287              
288             This would normally be used to get the keynames for some sort of
289             iterator. These keys are worthless in general day-to-day use because
290             in the Consed acefile they are simply Contig1, Contig2, ...
291              
292             =cut
293              
294             sub get_contigs {
295 0     0 1 0 my ($self,$contig) = @_;
296 0         0 my @contigs = sort keys %{$self->{'contigs'}};
  0         0  
297 0         0 return @contigs;
298             }
299              
300             =head2 get_class($contig_keyname)
301              
302             Title : get_class($contig_keyname)
303             Usage : $o_consed->get_class($contig_keyname);
304             Function: Return the class name for this contig
305             Returns : A scalar representing the class of this contig.
306             Args : None.
307             Notes :
308              
309             =cut
310              
311             sub get_class {
312 0     0 1 0 my ($self,$contig) = @_;
313 0         0 return $self->{contigs}->{$contig}->{class};
314             }
315              
316             =head2 get_quality_array($contig_keyname)
317              
318             Title : get_quality_array($contig_keyname)
319             Usage : $o_consed->get_quality_array($contig_keyname);
320             Function: Returns the quality for the consensus sequence for the given
321             contig as an array. See get_quality_scalar to get this as a scalar.
322             Returns : An array containing the quality for the consensus sequence with
323             the given keyname.
324             Args : The keyname of a contig. Note: This is a keyname. The key would
325             normally come from get_contigs.
326              
327             Returns an array, not a reference. Is this a bug? I No.
328             Well, maybe. Why was this developed like this? I was using FreezeThaw
329             for object persistence, and when it froze out these arrays it took a
330             long time to thaw it. Much better as a scalar.
331              
332             See L
333              
334             =cut
335              
336             sub get_quality_array {
337 0     0 1 0 my ($self,$contig) = @_;
338 0         0 return split ' ', $self->{contigs}->{$contig}->{quality};
339             }
340              
341             =head2 get_quality_scalar($contig_keyname)
342              
343             Title : get_quality_scalar($contig_keyname)
344             Usage : $o_consed->get_quality_scalar($contig_keyname);
345             Function: Returns the quality for the consensus sequence for the given
346             contig as a scalar. See get_quality_array to get this as an array.
347             Returns : An scalar containing the quality for the consensus sequence with
348             the given keyname.
349             Args : The keyname of a contig. Note this is a _keyname_. The key would
350             normally come from get_contigs.
351              
352             Why was this developed like this? I was using FreezeThaw for object
353             persistence, and when it froze out these arrays it took a coon's age
354             to thaw it. Much better as a scalar.
355              
356             See L
357              
358             =cut
359              
360             #'
361             sub get_quality_scalar {
362 0     0 1 0 my ($self,$contig) = @_;
363 0         0 return $self->{'contigs'}->{$contig}->{'quality'};
364             }
365              
366             =head2 freeze_hash()
367              
368             Title : freeze_hash()
369             Usage : $o_consed->freeze_hash();
370              
371             Function: Use Ilya's FreezeThaw module to create a persistent data
372             object for this Bio::Tools::Alignment::Consed data
373             structure. In the case of AAFC, we use
374             Bio::Tools::Alignment::Consed to pre-process bunches of
375             sequences, freeze the structures, and send in a harvesting
376             robot later to do database stuff.
377             Returns : 0 or 1;
378             Args : None.
379              
380             This procedure was removed so Consed.pm won't require FreezeThaw.
381              
382             =cut
383              
384             #'
385             sub freeze_hash {
386 0     0 1 0 my $self = shift;
387 0         0 $self->warn("This method (freeze_hash) was removed ".
388             "from the bioperl consed.pm. Sorry.\n");
389 0         0 if (1==2) {
390             $self->debug("Bio::Tools::Alignment::Consed::freeze_hash:".
391             " \$self->{path} is $self->{path}\n");
392             my $filename = $self->{'path'}."frozen";
393             my %contigs = %{$self->{'contigs'}};
394             my $frozen = freeze(%contigs);
395             umask 0001;
396             open my $FREEZE, '>', $filename or do {
397             $self->warn( "Bio::Tools::Alignment::Consed could not ".
398             "freeze the contig hash because the file ".
399             "($filename) could not be opened: $!");
400             return 1;
401             };
402             print $FREEZE $frozen;
403             return 0;
404             }
405             }
406              
407             =head2 get_members($contig_keyname)
408              
409             Title : get_members($contig_keyname)
410             Usage : $o_consed->get_members($contig_keyname);
411             Function: Return the _names_ of the reads in this contig.
412             Returns : An array containing the names of the reads in this contig.
413             Args : The keyname of a contig. Note this is a keyname. The keyname
414             would normally come from get_contigs.
415              
416             See L
417              
418             =cut
419              
420             sub get_members {
421 0     0 1 0 my ($self,$contig) = @_;
422 0 0       0 if (!$contig) {
423 0         0 $self->warn("You need to provide the name of a contig to ".
424             "use Bio::Tools::Alignment::Consed::get_members!\n");
425 0         0 return;
426             }
427 0         0 return @{$self->{'contigs'}->{$contig}->{'member_array'}};
  0         0  
428             }
429              
430             =head2 get_members_by_name($some_arbitrary_name)
431              
432             Title : get_members_by_name($some_arbitrary_name)
433             Usage : $o_consed->get_members_by_name($some_arbitrary_name);
434             Function: Return the names of the reads in a contig. This is the name given
435             to $contig{key} based on what is in the contig. This is different
436             from the keys retrieved through get_contigs().
437             Returns : An array containing the names of the reads in the contig with this
438             name.
439             Args : The name of a contig. Not a key, but a name.
440              
441             Highly inefficient. use some other method if possible.
442             See L
443              
444             =cut
445              
446             sub get_members_by_name {
447 0     0 1 0 my ($self,$name) = @_;
448             # build a list to try to screen for redundancy
449 0         0 my @contigs_with_that_name;
450 0         0 foreach my $currkey ( sort keys %{$self->{'contigs'}} ) {
  0         0  
451 0 0       0 next if (!$self->{'contigs'}->{$currkey}->{'name'});
452 0 0       0 if ($self->{'contigs'}->{$currkey}->{'name'} eq "$name") {
453 0         0 push @contigs_with_that_name,$currkey;
454             }
455             }
456 0         0 my $count = @contigs_with_that_name;
457 0 0       0 if ($count == 1) {
458 0         0 my $contig_num = $contigs_with_that_name[0];
459 0         0 return @{$self->{'contigs'}->{$contig_num}->{'member_array'}};
  0         0  
460             }
461             }
462              
463             =head2 get_contig_number_by_name($some_arbitrary_name)
464              
465             Title : get_contig_number_by_name($some_arbitrary_name)
466             Usage : $o_consed->get_contig_number_by_name($some_arbitrary_name);
467             Function: Return the names of the reads in a contig. This is the name given
468             to $contig{key} based on what is in the contig. This is different
469             from the keys retrieved through get_contigs().
470             Returns : An array containing the names of the reads in the contig with this
471             name.
472             Args : The name of a contig. Not a key, but a name.
473              
474             See L
475              
476             =cut
477              
478             sub get_contig_number_by_name {
479 0     0 1 0 my ($self,$name) = @_;
480 0         0 foreach my $currkey (sort keys %{$self->{'contigs'}}) {
  0         0  
481 0 0 0     0 if ($self->{'contigs'}->{$currkey}->{'name'} &&
482             $self->{'contigs'}->{$currkey}->{'name'} eq "$name") {
483 0         0 return $currkey;
484             }
485             }
486             }
487              
488             =head2 get_sequence($contig_keyname)
489              
490             Title : get_sequence($contig_keyname)
491             Usage : $o_consed->get_sequence($contig_keyname);
492             Function: Returns the consensus sequence for a given contig.
493             Returns : A scalar containing a sequence.
494             Args : The keyname of a contig. Note this is a key. The key would
495             normally come from get_contigs.
496              
497             See L
498              
499             =cut
500              
501             sub get_sequence {
502 0     0 1 0 my ($self,$contig) = @_;
503 0         0 return $self->{'contigs'}->{$contig}->{'consensus'};
504             }
505              
506             =head2 set_final_sequence($some_sequence)
507              
508             Title : set_final_sequence($name,$some_sequence)
509             Usage : $o_consed->set_final_sequence($name,$some_sequence);
510             Function: Provides a manual way to set the sequence for a given key in the
511             contig hash. Rarely used.
512             Returns : 0 or 1;
513             Args : The name (not the keyname) of a contig and an arbitrary string.
514              
515             A method with a questionable and somewhat mysterious origin. May raise
516             the dead or something like that.
517              
518             =cut
519              
520             sub set_final_sequence {
521 0     0 1 0 my ($self,$name,$sequence) = @_;
522 0 0       0 if (!$self->{'contigs'}->{$name}) {
523 0         0 $self->warn("You cannot set the final sequence for ".
524             "$name because it doesn't exist!\n");
525 0         0 return 1;
526             }
527             else {
528 0         0 $self->{'contigs'}->{$name}->{'final_sequence'} = $sequence;
529             }
530 0         0 return 0;
531             }
532              
533             =head2 _read_file()
534              
535             Title : _read_file();
536             Usage : _read_file();
537             Function: An internal subroutine used to read in an acefile and parse it
538             into a Bio::Tools::Alignment::Consed object.
539             Returns : 0 or 1.
540             Args : Nothing.
541              
542             This routine creates and saves the filhandle for reading the files in
543             {fh}
544              
545             =cut
546              
547             sub _read_file {
548 1     1   2 my ($self) = @_;
549 1         1 my ($line,$in_contig,$in_quality,$contig_number,$top);
550             # make it easier to type $fhl
551 1         4 while (defined($line=$self->_readline()) ) {
552 7661         5851 chomp $line;
553             # check if there is anything on this line
554             # if not, you can stop gathering consensus sequence
555 7661 100       25616 if (!$line) {
    100          
    100          
    100          
    100          
    50          
    100          
556             # if the line is blank you are no longer to gather consensus
557             # sequence or quality values
558 488         337 $in_contig = 0;
559 488         707 $in_quality = 0;
560             }
561             # you are currently gathering consensus sequence
562             elsif ($in_contig) {
563 910 50       1163 if ($in_contig == 1) {
564 910         1967 $self->debug("Adding $line to consensus of contig number $contig_number.\n");
565 910         2157 $self->{'contigs'}->{$contig_number}->{'consensus'} .= $line;
566             }
567             }
568             elsif ($in_quality) {
569 910 50       871 if (!$line) {
570 0         0 $in_quality = undef;
571             }
572             else {
573              
574             # I wrote this in here because acefiles produced by
575             # cap3 do not have a leading space like the acefiles
576             # produced by phrap and there is the potential to have
577             # concatenated quality values like this: 2020 rather
578             # then 20 20 whre lines collide. Thanks Andrew for
579             # noticing.
580              
581 910 100 66     2791 if ($self->{'contigs'}->{$contig_number}->{'quality'} &&
582             !($self->{'contigs'}->{$contig_number}->{'quality'} =~ m/\ $/)) {
583 857         833 $self->{'contigs'}->{$contig_number}->{'quality'} .= " ";
584             }
585 910         1982 $self->{'contigs'}->{$contig_number}->{'quality'} .= $line;
586             }
587             }
588             elsif ($line =~ /^BQ/) {
589 53         90 $in_quality = 1;
590             }
591              
592             # the line /^CO/ like this:
593             # CO Contig1 796 1 1 U
594             # can be broken down as follows:
595             # CO - Contig!
596             # Contig1 - the name of this contig
597             # 796 - Number of bases in this contig
598             # 1 - Number of reads in this contig
599             # 1 - number of base segments in this contig
600             # U - Uncomplemented
601              
602             elsif ($line =~ /^CO/) {
603 53         127 $line =~ m/^CO\ Contig(\d+)\ \d+\ \d+\ \d+\ (\w)/;
604 53         95 $contig_number = $1;
605 53 50       110 if ($2 eq "C") {
606 0         0 $self->debug("Contig $contig_number is complemented!\n");
607             }
608 53         151 $self->{'contigs'}->{$contig_number}->{'member_array'} = [];
609 53         100 $self->{'contigs'}->{$contig_number}->{'contig_direction'} = "$2";
610 53         96 $in_contig = 1;
611             }
612              
613             # 000713
614             # this BS is deprecated, I think.
615             # haha, I am really witty.
616              
617             elsif ($line =~ /^BSDEPRECATED/) {
618 0         0 $line =~ m/^BS\s+\d+\s+\d+\s+(.+)/;
619 0         0 my $member = $1;
620 0         0 $self->{'contigs'}->{$contig_number}->{$member}++;
621             }
622             # the members of the contigs are determined by the AF line in the ace file
623             elsif ($line =~ /^AF/) {
624 114         205 $self->debug("I see an AF line here.\n");
625 114         264 $line =~ /^AF\ (\S+)\ (\w)\ (\S+)/;
626              
627             # push the name of the current read onto the member array for this contig
628 114         89 push @{$self->{'contigs'}->{$contig_number}->{'member_array'}},$1;
  114         297  
629              
630             # the first read in the contig will be named the "top" read
631 114 100       140 if (!$top) {
632 57         85 $self->debug("\$top is not set.\n");
633 57 50       96 if ($self->{'contigs'}->{$contig_number}->{'contig_direction'} eq "C") {
634 0         0 $self->debug("Reversing the order of the reads. The bottom will be $1\n");
635              
636             # if the contig sequence is marked as the
637             # complement, the top becomes the bottom and$
638 0         0 $self->{'contigs'}->{$contig_number}->{'bottom_name'} = $1;
639 0         0 $self->{'contigs'}->{$contig_number}->{'bottom_complement'} = $2;
640 0         0 $self->{'contigs'}->{$contig_number}->{'bottom_start'} = $3;
641             }
642             else {
643 57         136 $self->debug("NOT reversing the order of the reads. ".
644             "The top_name will be $1\n");
645             # if the contig sequence is marked as the
646             # complement, the top becomes the bottom and$
647 57         136 $self->{'contigs'}->{$contig_number}->{'top_name'} = $1;
648 57         79 $self->{'contigs'}->{$contig_number}->{'top_complement'} = $2;
649 57         90 $self->{'contigs'}->{$contig_number}->{'top_start'} = $3;
650             }
651 57         102 $top = 1;
652             }
653             else {
654              
655             # if the contig sequence is marked as the complement,
656             # the top becomes the bottom and the bottom becomes
657             # the top
658 57 50       84 if ($self->{'contigs'}->{$contig_number}->{'contig_direction'} eq "C") {
659 0         0 $self->debug("Reversing the order of the reads. The top will be $1\n");
660 0         0 $self->{'contigs'}->{$contig_number}->{'top_name'} = $1;
661 0         0 $self->{'contigs'}->{$contig_number}->{'top_complement'} = $2;
662 0         0 $self->{'contigs'}->{$contig_number}->{'top_start'} = $3;
663             }
664             else {
665 57         140 $self->debug("NOT reversing the order of the reads. The bottom will be $1\n");
666 57         123 $self->{'contigs'}->{$contig_number}->{'bottom_name'} = $1;
667 57         84 $self->{'contigs'}->{$contig_number}->{'bottom_complement'} = $2;
668 57         89 $self->{'contigs'}->{$contig_number}->{'bottom_start'} = $3;
669             }
670 57         99 $top = undef;
671             }
672             }
673             }
674 1         2 return 0;
675             }
676              
677             =head2 set_reverse_designator($some_string)
678              
679             Title : set_reverse_designator($some_string)
680             Usage : $o_consed->set_reverse_designator($some_string);
681             Function: Set the designator for the reverse read of contigs in this
682             Bio::Tools::Alignment::Consed object. Used to determine if
683             contigs containing two reads can be named.
684             Returns : The value of $o_consed->{reverse_designator} so you can check
685             to see that it was set properly.
686             Args : An arbitrary string.
687              
688             May be useful only to me. I
689              
690             =cut
691              
692             sub set_reverse_designator {
693 1     1 1 1 my ($self,$reverse_designator) = @_;
694 1         2 $self->{'reverse_designator'} = $reverse_designator;
695 1         2 $self->{'o_trim'}->set_reverse_designator($reverse_designator);
696 1         1 return $self->{'reverse_designator'};
697             } # end set_reverse_designator
698              
699             =head2 set_forward_designator($some_string)
700              
701             Title : set_forward_designator($some_string)
702             Usage : $o_consed->set_forward_designator($some_string);
703             Function: Set the designator for the forward read of contigs in this
704             Bio::Tools::Alignment::Consed object. Used to determine if
705             contigs containing two reads can be named.
706             Returns : The value of $o_consed->{forward_designator} so you can check
707             to see that it was set properly.
708             Args : An arbitrary string.
709              
710             May be useful only to me. I
711              
712             =cut
713              
714             sub set_forward_designator {
715 1     1 1 2 my ($self,$forward_designator) = @_;
716 1         1 $self->{'forward_designator'} = $forward_designator;
717 1         4 $self->{'o_trim'}->set_forward_designator($forward_designator);
718 1         1 return $self->{'forward_designator'};
719             } # end set_forward_designator
720              
721             =head2 set_designator_ignore_case("yes")
722              
723             Title : set_designator_ignore_case("yes")
724             Usage : $o_consed->set_designator_ignore_case("yes");
725             Function: Deprecated.
726             Returns : Deprecated.
727             Args : Deprecated.
728              
729             Deprecated. Really. Trust me.
730              
731             =cut
732              
733             sub set_designator_ignore_case {
734 0     0 1 0 my ($self,$ignore_case) = @_;
735 0 0       0 if ($ignore_case eq "yes") {
736 0         0 $self->{'designator_ignore_case'} = 1;
737             }
738 0         0 return $self->{'designator_ignore_case'};
739             } # end set_designator_ignore_case
740              
741             =head2 set_trim_points_singlets_and_singletons()
742              
743             Title : set_trim_points_singlets_and_singletons()
744             Usage : $o_consed->set_trim_points_singlets_and_singletons();
745             Function: Set the trim points for singlets and singletons based on
746             quality. Uses the Bio::Tools::Alignment::Trim object. Use
747             at your own risk because the Bio::Tools::Alignment::Trim
748             object was designed specifically for me and is mysterious
749             in its ways. Every time somebody other then me uses it a
750             swarm of locusts decends on a small Central American
751             village so do not say you weren't warned.
752             Returns : Nothing.
753             Args : None.
754              
755             Working on exceptions and warnings here.
756              
757             See L for more information
758              
759             =cut
760              
761             #' to make my emacs happy
762              
763             sub set_trim_points_singlets_and_singletons {
764 0     0 1 0 my ($self) = @_;
765 0         0 $self->debug("Consed.pm : \$self is $self\n");
766 0         0 my (@points,$trimmed_sequence);
767 0 0       0 if (!$self->{'doublets_set'}) {
768 0         0 $self->debug("You need to set the doublets before you use ".
769             "set_trim_points_singlets_and_doublets. Doing that now.");
770 0         0 $self->set_doublets();
771             }
772 0         0 foreach (sort keys %{$self->{'contigs'}}) {
  0         0  
773 0 0       0 if ($self->{'contigs'}->{$_}->{'class'} eq "singlet") {
774 0         0 $self->debug("Singlet $_\n");
775             # this is what Warehouse wants
776             # my ($self,$sequence,$quality,$name) = @_;
777             # this is what Bio::Tools::Alignment::Trim::trim_singlet wants:
778             # my ($self,$sequence,$quality,$name,$class) = @_;
779             # the following several lines are to make the parameter passing legible.
780 0         0 my ($sequence,$quality,$name,$class);
781 0         0 $sequence = $self->{'contigs'}->{$_}->{'consensus'};
782 0 0       0 if (!$self->{'contigs'}->{$_}->{'quality'}) { $quality = "unset"; }
  0         0  
783 0         0 else { $quality = $self->{'contigs'}->{$_}->{'quality'}; }
784 0         0 $name = $self->{'contigs'}->{$_}->{'name'};
785 0         0 $class = $self->{'contigs'}->{$_}->{'class'};
786 0         0 @points = @{$self->{'o_trim'}->trim_singlet($sequence,$quality,$name,$class)};
  0         0  
787 0         0 $self->{'contigs'}->{$_}->{'start_point'} = $points[0];
788 0         0 $self->{'contigs'}->{$_}->{'end_point'} = $points[1];
789             $self->{'contigs'}->{$_}->{'sequence_trimmed'} =
790 0         0 substr($self->{contigs}->{$_}->{'consensus'},$points[0],$points[1]-$points[0]);
791             }
792             }
793 0         0 $self->debug("Bio::Tools::Alignment::Consed::set_trim_points_singlets".
794             "_and_singletons: Done setting the quality trimpoints.\n");
795 0         0 return;
796             } # end set_trim_points_singlet
797              
798             =head2 set_trim_points_doublets()
799              
800             Title : set_trim_points_doublets()
801             Usage : $o_consed->set_trim_points_doublets();
802             Function: Set the trim points for doublets based on quality. Uses the
803             Bio::Tools::Alignment::Trim object. Use at your own risk because
804             the Bio::Tools::Alignment::Trim object was designed specifically
805             for me and is mysterious in its ways. Every time somebody other
806             then me uses it you risk a biblical plague being loosed on your
807             city.
808             Returns : Nothing.
809             Args : None.
810             Notes : Working on exceptions here.
811              
812             See L for more information
813              
814             =cut
815              
816             sub set_trim_points_doublets {
817 0     0 1 0 my $self = shift;
818 0         0 my @points;
819 0         0 $self->debug("Bio::Tools::Alignment::Consed::set_trim_points_doublets:".
820             " Restoring zeros for doublets.\n");
821             # &show_missing_sequence($self);
822 0         0 $self->debug("Bio::Tools::Alignment::Consed::set_trim_points_doublets:".
823             " Setting doublet trim points.\n");
824 0         0 foreach (sort keys %{$self->{'contigs'}}) {
  0         0  
825 0 0       0 if ($self->{'contigs'}->{$_}->{'class'} eq "doublet") {
826             # my ($self,$sequence,$quality,$name,$class) = @_;
827 0         0 my @quals = split(' ',$self->{'contigs'}->{$_}->{'quality'});
828              
829             @points = $self->{o_trim}->trim_doublet
830             ($self->{'contigs'}->{$_}->{'consensus'},
831             $self->{'contigs'}->{$_}->{'quality'},
832             $self->{'contigs'}->{$_}->{name},
833 0         0 $self->{'contigs'}->{$_}->{'class'});
834 0         0 $self->{'contigs'}->{$_}->{'start_point'} = $points[0];
835 0         0 $self->{'contigs'}->{$_}->{'end_point'} = $points[1];
836             # now set this
837             $self->{'contigs'}->{$_}->{'sequence_trimmed'} =
838 0         0 substr($self->{contigs}->{$_}->{'consensus'},
839             $points[0],$points[1]-$points[0]);
840             # 010102 the deprecated way to do things:
841             }
842             }
843 0         0 $self->debug("Bio::Tools::Alignment::Consed::set_trim_points_doublets:".
844             " Done setting doublet trim points.\n");
845 0         0 return;
846             } # end set_trim_points_doublets
847              
848             =head2 get_trimmed_sequence_by_name($name)
849              
850             Title : get_trimmed_sequence_by_name($name)
851             Usage : $o_consed->get_trimmed_sequence_by_name($name);
852             Function: Returns the trimmed_sequence of a contig with {name} eq $name.
853             Returns : A scalar- the trimmed sequence.
854             Args : The {name} of a contig.
855             Notes :
856              
857             =cut
858              
859             sub get_trimmed_sequence_by_name {
860 0     0 1 0 my ($self,$name) = @_;
861 0         0 my $trimmed_sequence;
862 0         0 my $contigname = &get_contig_number_by_name($self,$name);
863 0         0 my $class = $self->{'contigs'}->{$contigname}->{'class'};
864             # what is this business and who was smoking crack while writing this?
865             # if ($class eq "singlet") {
866             # send the sequence, the quality, and the name
867             # $trimmed_sequence = $self->{o_trim}->trim_singlet
868             # ($self->{'contigs'}->{$contigname}->{consensus},
869             # $self->{'contigs'}->{$contigname}->{'quality'},$name);
870             # }
871 0         0 return $self->{'contigs'}->{$contigname}->{'sequence_trimmed'};
872             }
873              
874             =head2 set_dash_present_in_sequence_name("yes")
875              
876             Title : set_dash_present_in_sequence_name("yes")
877             Usage : $o_consed->set_dash_present_in_sequence_name("yes");
878             Function: Deprecated. Part of an uncompleted thought. ("Oooh! Shiny!")
879             Returns : Nothing.
880             Args : "yes" to set {dash_present_in_sequence_name} to 1
881             Notes :
882              
883             =cut
884              
885             sub set_dash_present_in_sequence_name {
886 0     0 1 0 my ($self,$dash_present) = @_;
887 0 0       0 if ($dash_present eq "yes") {
888 0         0 $self->{'dash_present_in_sequence_name'} = 1;
889             }
890             else {
891 0         0 $self->{'dash_present_in_sequence_name'} = 0;
892             }
893 0         0 return $self->{'dash_present_in_sequence_name'};
894             } # end set_dash_present_in_sequence_name
895              
896             =head2 set_doublets()
897              
898             Title : set_doublets()
899             Usage : $o_consed->set_doublets();
900             Function: Find pairs that have similar names and mark them as doublets
901             and set the {name}.
902             Returns : 0 or 1.
903             Args : None.
904              
905             A complicated subroutine that iterates over the
906             Bio::Tools::Alignment::Consed looking for contigs of 2. If the forward
907             and reverse designator are removed from each of the reads in
908             {'member_array'} and the remaining reads are the same, {name} is set
909             to that name and the contig's class is set as "doublet". If any of
910             those cases fail the contig is marked as a "pair".
911              
912             =cut
913              
914             #' make my emacs happy
915              
916             sub set_doublets {
917 1     1 1 443 my ($self) = @_;
918             # set the designators in the Bio::Tools::Alignment::Trim object
919              
920             $self->{'o_trim'}->set_designators($self->{'reverse_designator'},
921 1         7 $self->{'forward_designator'});
922 1         2 foreach my $key_contig (sort keys %{$self->{'contigs'}}) {
  1         35  
923              
924             # if there is a member array (why would there not be? This should be a die()able offence
925             # but for now I will leave it
926 118 50       203 if ($self->{'contigs'}->{$key_contig}->{'member_array'}) {
927             # if there are two reads in this contig
928             # i am pretty sure that this is wrong but i am keeping it for reference
929             # if (@{$self->{'contigs'}->{$key_contig}->{'member_array'}} == 2 || !$self->{'contigs'}->{$key_contig}->{'class'}) {
930             #
931             # WRONG. Was I on crack?
932 118 100       63 if (@{$self->{'contigs'}->{$key_contig}->{'member_array'}} == 2) {
  118 100       131  
    50          
933 46         36 $self->{'contigs'}->{$key_contig}->{'num_members'} = 2;
934 46         54 $self->debug("\tThere are 2 members! Looking for the contig name...\n");
935 46         65 my $name = _get_contig_name($self,$self->{'contigs'}->{$key_contig}->{'member_array'});
936 46 100       111 $self->debug("The name is $name\n") if defined $name;
937 46 100       51 if ($name) {
938 45         55 $self->{'contigs'}->{$key_contig}->{'name'} = $name;
939 45         41 $self->{'contigs'}->{$key_contig}->{'class'} = "doublet";
940             } else {
941 1         3 $self->debug("$key_contig is a pair.\n");
942 1         2 $self->{'contigs'}->{$key_contig}->{'class'} = "pair";
943             }
944             }
945             # this is all fair and good but what about singlets?
946             # they have one reads in the member_array but certainly are not singletons
947 72         74 elsif (@{$self->{'contigs'}->{$key_contig}->{'member_array'}} == 1) {
948             # set the name to be the name of the read
949 68         40 $self->{'contigs'}->{$key_contig}->{name} = @{$self->{'contigs'}->{$key_contig}->{'member_array'}}[0];
  68         75  
950             # set the number of members to be one
951 68         47 $self->{'contigs'}->{$key_contig}->{num_members} = 1;
952             # if this was a singlet, it would already belong to the class "singlet"
953             # so leave it alone
954             # if it is not a singlet, it is a singleton! lablel it appropriately
955 68 100       86 unless ($self->{'contigs'}->{$key_contig}->{'class'}) {
956 3         4 $self->{'contigs'}->{$key_contig}->{'class'} = "singleton";
957             }
958             }
959             # set the multiplet characteristics
960 4         7 elsif (@{$self->{'contigs'}->{$key_contig}->{'member_array'}} >= 3) {
961 4         3 $self->{'contigs'}->{$key_contig}->{'num_members'} = @{$self->{'contigs'}->{$key_contig}->{'member_array'}};
  4         9  
962 4         5 $self->{'contigs'}->{$key_contig}->{'class'} = "multiplet";
963             }
964 118         65 $self->{'contigs'}->{$key_contig}->{'num_members'} = @{$self->{'contigs'}->{$key_contig}->{'member_array'}};
  118         131  
965              
966             }
967             }
968 1         6 $self->{'doublets_set'} = "done";
969 1         3 return 0;
970             } # end set_doublets
971              
972             =head2 set_singlets
973              
974             Title : set_singlets
975             Usage : $o_consed->set_singlets();
976             Function: Read in a singlets file and place them into the
977             Bio::Tools::Alignment::Consed object.
978             Returns : Nothing.
979             Args : A scalar to turn on verbose parsing of the singlets file.
980             Notes :
981              
982             =cut
983              
984             sub set_singlets {
985             # parse out the contents of the singlets file
986 1     1 1 340 my ($self) = @_;
987 1         5 $self->debug("Bio::Tools::Alignment::Consed Adding singlets to the contig hash...\n");
988 1         3 my $full_filename = $self->{'filename'};
989 1         5 $self->debug("Bio::Tools::Alignment::Consed::set_singlets: \$full_filename is $full_filename\n");
990 1 50       6 $full_filename =~ s#\\#\/#g if $^O =~ m/mswin/i;
991 1         5 $full_filename =~ m/(.*\/)(.*ace.*)$/;
992 1         2 my ($base_path,$filename) = ($1,$2);
993 1         5 $self->debug("Bio::Tools::Alignment::Consed::set_singlets: singlets filename is $filename and \$base_path is $base_path\n");
994 1         3 $filename =~ m/(.*)ace.*$/;
995 1         3 my $singletsfile = $base_path.$1."singlets";
996 1         8 $self->debug("\$singletsfile is $singletsfile\n");
997 1 50       38 if (!-f $singletsfile) {
998             # there is no singlets file.
999 0         0 $self->{'singlets_set'} = "done";
1000 0         0 return;
1001             }
1002 1         5 $self->debug("$singletsfile is indeed a file. Trying to open it...\n");
1003 1         6 my $singlets_fh = Bio::Root::IO->new(-file => $singletsfile);
1004 1         2 my ($sequence,$name,$count);
1005 1         3 while ($_ = $singlets_fh->_readline()) {
1006 1291         865 chomp $_;
1007 1291 100       1349 if (/\>/) {
1008 65 100 66     161 if ($name && $sequence) {
1009 64         213 $self->debug("Adding $name with sequence $sequence to hash...\n");
1010 64         57 push @{$self->{'contigs'}->{$name}->{'member_array'}},$name;
  64         237  
1011 64         85 $self->{'contigs'}->{$name}->{'consensus'} = $sequence;
1012 64         66 $self->{'contigs'}->{$name}->{'name'} = $name;
1013 64         60 $self->{'contigs'}->{$name}->{"singlet"} = 1;
1014 64         53 $self->{'contigs'}->{$name}->{'class'} = "singlet";
1015             }
1016 65         45 $sequence = $name = undef;
1017 65         46 $count++;
1018 65         299 m/^\>(.*)\s\sCHROMAT/;
1019 65         83 $name = $1;
1020 65 50       134 if (!$name) {
1021 0         0 m/\>(\S+)\s/;
1022 0         0 $name = $1;
1023             }
1024             }
1025 1226         1708 else { $sequence .= $_; }
1026             }
1027 1 50 33     9 if ($name && $sequence) {
1028 1         4 $self->debug("Pushing the last of the singlets ($name)\n");
1029 1         1 @{$self->{'contigs'}->{$name}->{'member_array'}} = $name;
  1         4  
1030 1         3 $self->{'contigs'}->{$name}->{'consensus'} = $sequence;
1031 1         2 $self->{'contigs'}->{$name}->{'name'} = $name;
1032 1         1 $self->{'contigs'}->{$name}->{"singlet"} = 1;
1033 1         2 $self->{'contigs'}->{$name}->{'class'} = "singlet";
1034             }
1035 1         2 $self->debug("Bio::Tools::Alignment::Consed::set_singlets: Done adding singlets to the singlets hash.\n");
1036 1         2 $self->{'singlets_set'} = "done";
1037 1         10 return 0;
1038             } # end sub set_singlets
1039              
1040             =head2 get_singlets()
1041              
1042             Title : get_singlets()
1043             Usage : $o_consed->get_singlets();
1044             Function: Return the keynames of the singlets.
1045             Returns : An array containing the keynames of all
1046             Bio::Tools::Alignment::Consed sequences in the class "singlet".
1047             Args : None.
1048             Notes :
1049              
1050             =cut
1051              
1052             sub get_singlets {
1053             # returns an array of singlet names
1054             # singlets have "singlet"=1 in the hash
1055 3     3 1 4 my $self = shift;
1056 3 50       8 if (!$self->{singlets_set}) {
1057 0         0 $self->debug("You need to set the singlets before you get them. Doing that now.");
1058 0         0 $self->set_singlets();
1059             }
1060              
1061 3         4 my (@singlets,@array);
1062 3         2 foreach my $key (sort keys %{$self->{'contigs'}}) {
  3         120  
1063             # @array = @{$Consed::contigs{$key}->{'member_array'}};
1064             # somethimes a user will try to get a list of singlets before the classes for the rest of the
1065             # contigs has been set (see t/test.t for how I figured this out.
1066             # so either way, just return class=singlets
1067 354 100       556 if (!$self->{'contigs'}->{$key}->{'class'}) {
    100          
1068             # print("$key has no class. why?\n");
1069             }
1070             elsif ($self->{'contigs'}->{$key}->{'class'} eq "singlet") {
1071 195         149 push @singlets,$key;
1072             }
1073             }
1074 3         24 return @singlets;
1075             }
1076              
1077             =head2 set_quality_by_name($name,$quality)
1078              
1079             Title : set_quality_by_name($name,$quality)
1080             Usage : $o_consed->set_quality_by_name($name,$quality);
1081             Function: Deprecated. Make the contig with {name} have {'quality'} $quality.
1082             Probably used for testing.
1083             Returns : Nothing.
1084             Args : The name of a contig and a scalar for its quality.
1085             Notes : Deprecated.
1086              
1087             =cut
1088              
1089             sub set_quality_by_name {
1090             # this is likely deprecated
1091 0     0 1 0 my ($self,$name,$quality) = shift;
1092 0         0 my $return;
1093 0         0 foreach (sort keys %{$self->{'contigs'}}) {
  0         0  
1094 0 0 0     0 if ($self->{'contigs'} eq "$name" || $self->{'contigs'}->{'name'} eq "$name") {
1095 0         0 $self->{'contigs'}->{'quality'} = $quality;
1096 0         0 $return=1;
1097             }
1098             }
1099 0 0       0 if ($return) { return "0"; } else { return "1"; }
  0         0  
  0         0  
1100             } # end set quality by name
1101              
1102             =head2 set_singlet_quality()
1103              
1104             Title : set_singlet_quality()
1105             Usage : $o_consed->set_singlet_quality();
1106             Function: For each singlet, go to the appropriate file in phd_dir and read
1107             in the phred quality for that read and place it into {'quality'}
1108             Returns : 0 or 1.
1109             Args : None.
1110             Notes : This is the next subroutine that will receive substantial revision
1111             in the next little while. It really should eval the creation of
1112             Bio::Tools::Alignment::Phred objects and go from there.
1113              
1114             =cut
1115              
1116             sub set_singlet_quality {
1117 0     0 1 0 my $self = shift;
1118 0         0 my $full_filename = $self->{'filename'};
1119 0 0       0 $full_filename =~ s#\\#\/#g if $^O =~ m/mswin/i;
1120 0         0 $full_filename =~ m/(.*\/)(.*)ace.*$/;
1121 0         0 my ($base_path,$filename) = ($1,"$2"."qual");
1122 0         0 my $singletsfile = $base_path.$filename;
1123 0 0       0 if (-f $singletsfile) {
1124             # print("$singletsfile is indeed a file. Trying to open it...\n");
1125             }
1126             else {
1127 0         0 $self->warn("$singletsfile is not a file. Sorry.\n");
1128 0         0 return;
1129             }
1130 0         0 my $singlets_fh = Bio::Root::IO->new(-file => $singletsfile);
1131 0         0 my ($sequence,$name,$count);
1132 0         0 my ($identity,$line,$quality,@qline);
1133 0         0 while ($line = $singlets_fh->_readline()) {
1134 0         0 chomp $line;
1135 0 0       0 if ($line =~ /^\>/) {
1136 0         0 $quality = undef;
1137 0         0 $line =~ m/\>(\S*)\s/;
1138 0         0 $identity = $1;
1139             }
1140             else {
1141 0 0       0 if ($self->{'contigs'}->{$identity}) {
1142 0         0 $self->{'contigs'}->{$identity}->{'quality'} .= "$line ";
1143             }
1144             }
1145              
1146             }
1147 0         0 return 0;
1148             }
1149              
1150             =head2 set_contig_quality()
1151              
1152             Title : set_contig_quality()
1153             Usage : $o_consed->set_contig_quality();
1154             Function: Deprecated.
1155             Returns : Deprecated.
1156             Args : Deprecated.
1157             Notes : Deprecated. Really. Trust me.
1158              
1159             =cut
1160              
1161             sub set_contig_quality {
1162             # note: contigs _include_ singletons but _not_ singlets
1163 0     0 1 0 my ($self) = shift;
1164             # the unexpected results I am referring to here are a doubling of quality values.
1165             # the profanity I uttered on discovering this reminded me of the simpsons:
1166             # Ned Flanders: "That is the loudest profanity I have ever heard!"
1167 0         0 $self->warn("set_contig_quality is deprecated and will likely produce unexpected results");
1168 0         0 my $full_filename = $self->{'filename'};
1169             # Run_SRC3700_2000-08-01_73+74.fasta.screen.contigs.qual
1170             # from Consed.pm
1171 0 0       0 $full_filename =~ s#\\#\/#g if $^O =~ m/mswin/i;
1172 0         0 $full_filename =~ m/(.*\/)(.*)ace.*$/;
1173 0         0 my ($base_path,$filename) = ($1,"$2"."contigs.qual");
1174 0         0 my $singletsfile = $base_path.$filename;
1175 0 0       0 if (-f $singletsfile) {
1176             # print("$singletsfile is indeed a file. Trying to open it...\n");
1177             }
1178             else {
1179 0         0 $self->warn("Bio::Tools::Alignment::Consed::set_contig_quality $singletsfile is not a file. Sorry.\n");
1180 0         0 return;
1181             }
1182 0         0 my $contig_quality_fh = Bio::Root::IO->new(-file => $singletsfile);
1183              
1184 0         0 my ($sequence,$name,$count,$identity,$line,$quality);
1185 0         0 while ($line = $contig_quality_fh->_readline()) {
1186 0         0 chomp $line;
1187 0 0       0 if ($line =~ /^\>/) {
1188 0         0 $quality = undef;
1189 0         0 $line =~ m/\>.*Contig(\d+)\s/;
1190 0         0 $identity = $1;
1191             }
1192             else {
1193 0 0       0 if ($self->{'contigs'}->{$identity} ) {
1194 0         0 $self->{'contigs'}->{$identity}->{'quality'} .= " $line";
1195             }
1196             }
1197             }
1198             } # end set_contig_quality
1199              
1200             =head2 get_multiplets()
1201              
1202             Title : get_multiplets()
1203             Usage : $o_consed->get_multiplets();
1204             Function: Return the keynames of the multiplets.
1205             Returns : Returns an array containing the keynames of all
1206             Bio::Tools::Alignment::Consed sequences in the class "multiplet".
1207             Args : None.
1208             Notes :
1209              
1210             =cut
1211              
1212             sub get_multiplets {
1213             # returns an array of multiplet names
1214             # multiplets have # members > 2
1215 3     3 1 427 my $self = shift;
1216 3         3 my (@multiplets,@array);
1217 3         3 foreach my $key (sort keys %{$self->{'contigs'}}) {
  3         87  
1218 354 50       395 if ($self->{'contigs'}->{$key}->{'class'}) {
1219 354 100       435 if ($self->{'contigs'}->{$key}->{'class'} eq "multiplet") {
1220 12         12 push @multiplets,$key;
1221             }
1222             }
1223             }
1224 3         16 return @multiplets;
1225             }
1226              
1227             =head2 get_all_members()
1228              
1229             Title : get_all_members()
1230             Usage : @all_members = $o_consed->get_all_members();
1231             Function: Return a list of all of the read names in the
1232             Bio::Tools::Alignment::Consed object.
1233             Returns : An array containing all of the elements in all of the
1234             {'member_array'}s.
1235             Args : None.
1236             Notes :
1237              
1238             =cut
1239              
1240             sub get_all_members {
1241 0     0 1 0 my $self = shift;
1242 0         0 my @members;
1243 0         0 foreach my $key (sort keys %{$self->{'contigs'}}) {
  0         0  
1244 0 0       0 if ($key =~ /^singlet/) {
    0          
1245 0         0 push @members,$self->{'contigs'}->{$key}->{'member_array'}[0];
1246             }
1247             elsif ($self->{'contigs'}->{$key}->{'member_array'}) {
1248 0         0 push @members,@{$self->{'contigs'}->{$key}->{'member_array'}};
  0         0  
1249             }
1250             # else {
1251             # print("Bio::Tools::Alignment::Consed: $key is _not_ an array. Pushing $self->{'contigs'}->{$key}->{'member_array'} onto \@members\n");
1252             # push @members,$self->{'contigs'}->{$key}->{'member_array'};
1253             # }
1254             }
1255 0         0 return @members;
1256             }
1257              
1258             =head2 sum_lets($total_only)
1259              
1260             Title : sum_lets($total_only)
1261             Usage : $statistics = $o_consed->sum_lets($total_only);
1262             Function: Provide numbers for how many sequences were accounted for in the
1263             Bio::Tools::Alignment::Consed object.
1264             Returns : If a scalar is present, returns the total number of
1265             sequences accounted for in all classes. If no scalar passed
1266             then returns a string that looks like this:
1267             Singt/singn/doub/pair/mult/total : 2,0,1(2),0(0),0(0),4
1268             This example means the following: There were 1 singlets.
1269             There were 0 singletons. There were 1 doublets for a total
1270             of 2 sequences in this class. There were 0 pairs for a
1271             total of 0 sequences in this class. There were 0
1272             multiplets for a total of 0 sequences in this class. There
1273             were a total of 4 sequences accounted for in the
1274             Bio::Tools::Alignment::Consed object.
1275             Args : A scalar is optional to change the way the numbers are returned.
1276             Notes:
1277              
1278             =cut
1279              
1280             sub sum_lets {
1281 2     2 1 3 my ($self,$total_only) = @_;
1282 2         3 my ($count,$count_multiplets,$multiplet_count);
1283 2         3 my $singlets = &get_singlets($self); $count += $singlets;
  2         3  
1284 2         3 my $doublets = &get_doublets($self); $count += ($doublets * 2);
  2         5  
1285 2         4 my $pairs = &get_pairs($self); $count += ($pairs * 2);
  2         3  
1286 2         3 my $singletons = &get_singletons($self); $count += $singletons;
  2         3  
1287 2         5 my @multiplets = &get_multiplets($self);
1288 2         3 $count_multiplets = @multiplets;
1289 2         1 my $return_string;
1290 2         3 foreach (@multiplets) {
1291 8         6 my $number_members = $self->{'contigs'}->{$_}->{num_members};
1292 8         8 $multiplet_count += $number_members;
1293             }
1294 2 50       5 if ($multiplet_count) {
1295 2         2 $count += $multiplet_count;
1296             }
1297 2         2 foreach (qw(multiplet_count singlets doublets pairs singletons
1298             multiplets count_multiplets)) {
1299 1     1   7 no strict 'refs'; # renege for the block
  1         2  
  1         810  
1300 14 50       9 if (!${$_}) {
  14         28  
1301 14         10 ${$_} = 0;
  14         15  
1302             }
1303             }
1304 2 50       5 if (!$multiplet_count) { $multiplet_count = 0; }
  0         0  
1305 2 100       3 if ($total_only) {
1306 1         5 return $count;
1307             }
1308 1         9 $return_string = "Singt/singn/doub/pair/mult/total : ".
1309             "$singlets,$singletons,$doublets(".
1310             ($doublets*2)."),$pairs(".($pairs*2).
1311             "),$count_multiplets($multiplet_count),$count";
1312 1         4 return $return_string;
1313             }
1314              
1315             =head2 write_stats()
1316              
1317             Title : write_stats()
1318             Usage : $o_consed->write_stats();
1319             Function: Write a file called "statistics" containing numbers similar to
1320             those provided in sum_lets().
1321             Returns : Nothing. Write a file in $o_consed->{path} containing something
1322             like this:
1323              
1324             0,0,50(100),0(0),0(0),100
1325              
1326             Where the numbers provided are in the format described in the
1327             documentation for sum_lets().
1328             Args : None.
1329             Notes : This might break platform independence, I do not know.
1330              
1331             See L
1332              
1333             =cut
1334              
1335             sub write_stats {
1336             # worry about platform dependence here?
1337             # oh shucksdarn.
1338 0     0 1 0 my $self = shift;
1339 0         0 my $stats_filename = $self->{'path'}."statistics";
1340 0         0 my $statistics_raw = $self->sum_lets;
1341 0         0 my ($statsfilecontents) = $statistics_raw =~ s/.*\ \:\ //g;
1342 0         0 umask 0001;
1343 0         0 my $fh = Bio::Root::IO->new(-file=>"$stats_filename");
1344             # open my $STATSFILE, '>', $stats_filename or print "Could not write the statsfile: $!\n");
1345 0         0 $fh->_print("$statsfilecontents");
1346             # close $STATSFILE;
1347 0         0 $fh->close();
1348             }
1349              
1350             =head2 get_singletons()
1351              
1352             Title : get_singletons()
1353             Usage : @singletons = $o_consed->get_singletons();
1354             Function: Return the keynames of the singletons.
1355             Returns : Returns an array containing the keynames of all
1356             Bio::Tools::Alignment::Consed sequences in the class "singleton".
1357             Args : None.
1358             Notes :
1359              
1360             =cut
1361              
1362             sub get_singletons {
1363             # returns an array of singleton names
1364             # singletons are contigs with one member (see consed documentation)
1365 3     3 1 453 my $self = shift;
1366 3         2 my (@singletons,@array);
1367 3         3 foreach my $key (sort keys %{$self->{'contigs'}}) {
  3         86  
1368 354 50       353 if ($self->{'contigs'}->{$key}->{'class'}) {
1369             # print ("$key class: $self->{'contigs'}->{$key}->{'class'}\n");
1370             }
1371             else {
1372             # print("$key belongs to no class. why?\n");
1373             }
1374 354 50       391 if ($self->{'contigs'}->{$key}->{'member_array'}) {
1375 354         216 @array = @{$self->{'contigs'}->{$key}->{'member_array'}};
  354         448  
1376             }
1377 354         230 my $num_array_elem = @array;
1378 354 100 66     818 if ($num_array_elem == 1 && $self->{'contigs'}->{$key}->{'class'} && $self->{'contigs'}->{$key}->{'class'} eq "singleton") { push @singletons,$key; }
  9   66     9  
1379             }
1380 3         17 return @singletons;
1381             }
1382              
1383             =head2 get_pairs()
1384              
1385             Title : get_pairs()
1386             Usage : @pairs = $o_consed->get_pairs();
1387             Function: Return the keynames of the pairs.
1388             Returns : Returns an array containing the keynames of all
1389             Bio::Tools::Alignment::Consed sequences in the class "pair".
1390             Args : None.
1391             Notes :
1392              
1393             =cut
1394              
1395             sub get_pairs {
1396             # returns an array of pair contig names
1397             # a pair is a contig of two where the names do not match
1398 3     3 1 3 my $self = shift;
1399 3         3 my (@pairs,@array);
1400 3         3 foreach my $key (sort keys %{$self->{'contigs'}}) {
  3         89  
1401 354 50       406 if ($self->{'contigs'}->{$key}->{'member_array'}) {
1402 354 100 100     185 if (@{$self->{'contigs'}->{$key}->{'member_array'}} == 2 &&
  354         701  
1403             $self->{'contigs'}->{$key}->{'class'} eq "pair") {
1404 3         6 push @pairs,$key;
1405             }
1406             }
1407             }
1408 3         16 return @pairs;
1409             }
1410              
1411             =head2 get_name($contig_keyname)
1412              
1413             Title : get_name($contig_keyname)
1414             Usage : $name = $o_consed->get_name($contig_keyname);
1415             Function: Return the {name} for $contig_keyname.
1416             Returns : A string. ({name})
1417             Args : A contig keyname.
1418             Notes :
1419              
1420             =cut
1421              
1422             sub get_name {
1423 0     0 1 0 my ($self,$contig) = @_;
1424 0         0 return $self->{'contigs'}->{$contig}->{'name'};
1425             }
1426              
1427             =head2 _get_contig_name(\@array_containing_reads)
1428              
1429             Title : _get_contig_name(\@array_containing_reads)
1430             Usage : $o_consed->_get_contig_name(\@array_containing_reads);
1431             Function: The logic for the set_doublets subroutine.
1432             Returns : The name for this contig.
1433             Args : A reference to an array containing read names.
1434             Notes : Depends on reverse_designator. Be sure this is set the way you
1435             intend.
1436              
1437             =cut
1438              
1439             sub _get_contig_name {
1440 46     46   31 my ($self,$r_array) = @_;
1441 46         70 my @contig_members = @$r_array;
1442 46         26 my @name_nodir;
1443 46         39 foreach (@contig_members) {
1444             # how can I distinguish the clone name from the direction label?
1445             # look for $Consed::reverse_designator and $Consed::forward_designator
1446             # what if you do not find _any_ of those?
1447 92   50     115 my $forward_designator = $self->{'forward_designator'} || "f";
1448 92   50     98 my $reverse_designator = $self->{'reverse_designator'} || "r";
1449 92   33     372 my $any_hits = /(.+)($forward_designator.*)/ || /(.+)($reverse_designator.*)/||/(.+)(_.+)/;
1450 92         101 my $name = $1;
1451 92         69 my $suffix = $2;
1452 92 50       106 if ($name) {
1453             # print("\t\$name is $name ");
1454             }
1455 92 50       93 if ($suffix) {
1456             # print("and \$suffix is $suffix.\n");
1457             }
1458             # Jee, I hope we get a naming convention soon
1459 92 50       91 if ($suffix) {
1460 92 50 66     291 if ($suffix =~ /^$forward_designator/ || $suffix =~ /^$reverse_designator/) {
1461 92         148 push @name_nodir,$name;
1462             }
1463             # bugwatch here! should this be unnested?
1464             else {
1465 0         0 push @name_nodir,"$name$suffix";
1466             }
1467             }
1468             }
1469             # print("\@name_nodir: @name_nodir\n");
1470 46         30 my $mismatch = 0;
1471 46         72 for (my $counter=0; $counter<@name_nodir;$counter++) {
1472 92 100       169 next if ($name_nodir[0] eq $name_nodir[$counter]);
1473 1         2 $mismatch = 1;
1474             }
1475 46 100       46 if ($mismatch == 0) {
1476             # print("\tYou have a cohesive contig named $name_nodir[0].\n\n");
1477 45         78 return $name_nodir[0];
1478             } else {
1479             # print("\tYou have mixed names in this contig.\n\n");
1480             }
1481             } # end _get_contig_name
1482              
1483             =head2 get_doublets()
1484              
1485             Title : get_doublets()
1486             Usage : @doublets = $o_consed->get_doublets();
1487             Function: Return the keynames of the doublets.
1488             Returns : Returns an array containing the keynames of all
1489             Bio::Tools::Alignment::Consed sequences in the class "doublet".
1490             Args : None.
1491             Notes :
1492              
1493             =cut
1494              
1495             sub get_doublets {
1496 3     3 1 4 my $self = shift;
1497 3 50       7 if (!$self->{doublets_set}) {
1498 0         0 $self->warn("You need to set the doublets before you can get them. Doing that now.");
1499 0         0 $self->set_doublets();
1500             }
1501 3         4 my @doublets;
1502 3         1 foreach (sort keys %{$self->{'contigs'}}) {
  3         91  
1503 354 100 100     876 if ($self->{'contigs'}->{$_}->{name} && $self->{'contigs'}->{$_}->{'class'} eq "doublet") {
1504 135         103 push @doublets,$_;
1505             }
1506             }
1507 3         23 return @doublets;
1508             } # end get_doublets
1509              
1510             =head2 dump_hash()
1511              
1512             Title : dump_hash()
1513             Usage : $o_consed->dump_hash();
1514             Function: Use dumpvar.pl to dump out the Bio::Tools::Alignment::Consed
1515             object to STDOUT.
1516             Returns : Nothing.
1517             Args : None.
1518             Notes : I used this a lot in debugging.
1519              
1520             =cut
1521              
1522             sub dump_hash {
1523 0     0 1   my $self = shift;
1524 0           my $dumper = Dumpvalue->new();
1525 0           $self->debug( "Bio::Tools::Alignment::Consed::dump_hash - ".
1526             "The following is the contents of the contig hash...\n");
1527 0           $dumper->dumpValue($self->{'contigs'});
1528             }
1529              
1530             =head2 dump_hash_compact()
1531              
1532             Title : dump_hash_compact()
1533             Usage : $o_consed->dump_hash_compact();
1534             Function: Dump out the Bio::Tools::Alignment::Consed object in a compact way.
1535             Returns : Nothing.
1536             Args : Nothing.
1537             Notes : Cleaner then dumpValue(), dumpHash(). I used this a lot in
1538             debugging.
1539              
1540             =cut
1541              
1542             sub dump_hash_compact {
1543 1     1   5 no strict 'refs'; # renege for the block
  1         1  
  1         1012  
1544 0     0 1   my ($self,$sequence) = @_;
1545             # get the classes
1546 0           my @singlets = $self->get_singlets();
1547 0           my @singletons = $self->get_singletons();
1548 0           my @doublets = $self->get_doublets();
1549 0           my @pairs = $self->get_pairs();
1550 0           my @multiplets = $self->get_multiplets();
1551 0           print("Name\tClass\tMembers\tQuality?\n");
1552 0           foreach (@singlets) {
1553 0           my @members = $self->get_members($_);
1554 0           print($self->get_name($_)."\tsinglets\t".(join',',@members)."\t");
1555 0 0         if ($self->{'contigs'}->{$_}->{'quality'}) {
1556 0           print("qualities found here\n");
1557             } else {
1558 0           print("no qualities found here\n");
1559             }
1560              
1561             }
1562 0           foreach (@singletons) {
1563 0           my @members = $self->get_members($_);
1564 0           print($self->get_name($_)."\tsingletons\t".(join',',@members)."\t");
1565 0 0         if ($self->{'contigs'}->{$_}->{'quality'}) {
1566 0           print("qualities found here\n");
1567             } else {
1568 0           print("no qualities found here\n");
1569             }
1570             }
1571 0           foreach my $pair (@pairs) {
1572 0           my @members = $self->get_members($pair);
1573 0           my $name;
1574 0 0         if (!$self->get_name($pair)) {
1575 0           $name = "BLANK";
1576             } else {
1577 0           $name = $self->get_name($pair);
1578             }
1579 0           print("$name\tpairs\t".(join',',@members)."\n");
1580             }
1581 0           foreach (@doublets) {
1582 0           my @members = $self->get_members_by_name($_);
1583 0           print("$_\tdoublets\t".(join',',@members)."\t");
1584 0           my $contig_number = &get_contig_number_by_name($self,$_);
1585 0 0         if ($self->{'contigs'}->{$contig_number}->{'quality'}) {
1586 0           print("qualities found here\n");
1587             } else {
1588 0           print("no qualities found here\n");
1589             }
1590             # print($_."\tdoublets\t".(join',',@members)."\n");
1591             }
1592 0           foreach (@multiplets) {
1593 0           my @members = $self->get_members($_);
1594 0           print("Contig $_"."\tmultiplets\t".(join',',@members)."\n");
1595             }
1596             } # end dump_hash_compact
1597              
1598             =head2 get_phreds()
1599              
1600             Title : get_phreds()
1601             Usage : @phreds = $o_consed->get_phreds();
1602             Function: For each doublet in the Bio::Tools::Alignment::Consed hash, go
1603             and get the phreds for the top and bottom reads. Place them into
1604             {top_phreds} and {bottom_phreds}.
1605             Returns : Nothing.
1606             Args : Nothing.
1607              
1608             Requires parse_phd() and reverse_and_complement(). I realize that it
1609             would be much more elegant to pull qualities as required but there
1610             were certain "features" in the acefile that required a bit more
1611             detailed work be done to get the qualities for certain parts of the
1612             consensus sequence. In order to make _sure_ that this was done
1613             properly I wrote things to do all steps and then I used dump_hash()
1614             and checked each one to ensure expected behavior. I have never changed
1615             this, so there you are.
1616              
1617             =cut
1618              
1619             sub get_phreds {
1620             # this subroutine is the target of a rewrite to use the Bio::Tools::Alignment::Phred object.
1621 0     0 1   my $self = shift;
1622 0           my $current_contig;
1623 0           foreach $current_contig (sort keys %{$self->{'contigs'}}) {
  0            
1624 0 0         if ($self->{'contigs'}->{$current_contig}->{'class'} eq "doublet") {
1625 0           $self->debug("$current_contig is a doublet. Going to parse_phd for top($self->{'contigs'}->{$current_contig}->{'top_name'}) and bottom($self->{'contigs'}->{$current_contig}->{'bottom_name'})\n");
1626 0           my $r_phreds_top = &parse_phd($self,$self->{'contigs'}->{$current_contig}->{'top_name'});
1627 0           my $r_phreds_bottom = &parse_phd($self,$self->{'contigs'}->{$current_contig}->{'bottom_name'});
1628 0 0         if ($self->{'contigs'}->{$current_contig}->{'top_complement'} eq "C") {
1629             # print("Reversing and complementing...\n");
1630 0           $r_phreds_top = &reverse_and_complement($r_phreds_top);
1631             }
1632 0 0         if ($self->{'contigs'}->{$current_contig}->{'bottom_complement'} eq "C") {
1633 0           $r_phreds_bottom = &reverse_and_complement($r_phreds_bottom);
1634             }
1635 0           $self->{'contigs'}->{$current_contig}->{'top_phreds'} = $r_phreds_top;
1636 0           $self->{'contigs'}->{$current_contig}->{'bottom_phreds'} = $r_phreds_bottom;
1637             }
1638             }
1639             }
1640              
1641             =head2 parse_phd($read_name)
1642              
1643             Title : parse_phd($read_name)
1644             Usage : $o_consed->parse_phd($read_name);
1645             Function: Suck in the contents of a .phd file.
1646             Returns : A reference to an array containing the quality values for the read.
1647             Args : The name of a read.
1648             Notes : This is a significantly weak subroutine because it was always
1649             intended that these functions, along with the functions provided by
1650             get_phreds() be put into the Bio::SeqIO:phd module. This is done
1651             now but the Bio::Tools::Alignment::Consed module has not be
1652             rewritten to reflect this change.
1653              
1654             See L for more information.
1655              
1656             =cut
1657              
1658             sub parse_phd {
1659 0     0 1   my ($self,$sequence_name) = @_;
1660 0           $self->debug("Parsing phd for $sequence_name\n");
1661 0           my $in_dna = 0;
1662 0           my $base_number = 0;
1663 0           my (@bases,@current_line);
1664             # print("parse_phd: $sequence_name\n");
1665 0           my $fh = Bio::Root::IO->new
1666             (-file=>"$self->{path}/../phd_dir/$sequence_name.phd.1");
1667 0           while ($fh->_readline()) {
1668             # print("Reading a line from a phredfile!\n");
1669 0           chomp;
1670 0 0         if (/^BEGIN_DNA/) { $in_dna = 1; next}
  0            
  0            
1671 0 0         if (/^END_DNA/) { last; }
  0            
1672 0 0         if (!$in_dna) { next; }
  0            
1673 0           push(@bases,$_);
1674             }
1675 0           return \@bases;
1676             }
1677              
1678             =head2 reverse_and_complement(\@source)
1679              
1680             Title : reverse_and_complement(\@source)
1681             Usage : $reference_to_array = $o_consed->reverse_and_complement(\@source);
1682             Function: A stub for the recursive routine reverse_recurse().
1683             Returns : A reference to a reversed and complemented array of phred data.
1684             Args : A reference to an array of phred data.
1685             Notes :
1686              
1687             =cut
1688              
1689             sub reverse_and_complement {
1690 0     0 1   my $r_source = shift;
1691 0           my $r_destination;
1692 0           $r_destination = &reverse_recurse($r_source,$r_destination);
1693 0           return $r_destination;
1694             }
1695              
1696             =head2 reverse_recurse($r_source,$r_destination)
1697              
1698             Title : reverse_recurse(\@source,\@destination)
1699             Usage : $o_consed->reverse_recurse(\@source,\@destination);
1700             Function: A recursive routine to reverse and complement an array of
1701             phred data.
1702             Returns : A reference to an array containing reversed phred data.
1703             Args : A reference to a source array and a reverence to a destination
1704             array.
1705              
1706             Recursion is kewl, but this sub should likely be _reverse_recurse.
1707              
1708             =cut
1709              
1710              
1711             sub reverse_recurse($$) {
1712 0     0 1   my ($r_source,$r_destination) = @_;
1713 0 0         if (!@$r_source) {
1714 0           return $r_destination;
1715             }
1716 0           $_=pop(@$r_source);
1717 0 0 0       s/c/g/ || s/g/c/ || s/a/t/ || s/t/a/;
      0        
1718 0           push(@$r_destination,$_);
1719 0           &reverse_recurse($r_source,$r_destination);
1720             }
1721              
1722             =head2 show_missing_sequence()
1723              
1724             Title : show_missing_sequence();
1725             Usage : $o_consed->show_missing_sequence();
1726             Function: Used by set_trim_points_doublets() to fill in quality values where
1727             consed (phrap?) set them to 0 at the beginning and/or end of the
1728             consensus sequences.
1729             Returns : Nothing.
1730             Args : None.
1731              
1732             Acts on doublets only. Really very somewhat quite ugly. A disgusting
1733             kludge. I It was written stepwise with no real plan
1734             because it was not really evident why consed (phrap?) was doing this.
1735              
1736             =cut
1737              
1738             sub show_missing_sequence() {
1739              
1740             # decide which sequence should not have been clipped at consensus
1741             # position = 0
1742              
1743 0     0 1   my $self = shift;
1744 0           &get_phreds($self);
1745 0           my ($current_contig,@qualities);
1746 0           foreach $current_contig (sort keys %{$self->{'contigs'}}) {
  0            
1747 0 0         if ($self->{'contigs'}->{$current_contig}->{'class'} eq "doublet") {
1748 0           my $number_leading_xs = 0;
1749 0           my $number_trailing_xs = 0;
1750 0           my $measurer = $self->{'contigs'}->{$current_contig}->{'quality'};
1751 0           while ($measurer =~ s/^\ 0\ /\ /) {
1752 0           $number_leading_xs++;
1753             }
1754 0           while ($measurer =~ s/\ 0(\s*)$/$1/) {
1755 0           $number_trailing_xs++;
1756             }
1757 0           @qualities = split(' ',$self->{'contigs'}->{$current_contig}->{'quality'});
1758 0           my $in_initial_zeros = 0;
1759 0           for (my $count=0;$count
1760 0 0         if ($qualities[$count] == 0) {
1761 0           my ($quality,$top_phred_position,$bottom_phred_position,$top_phred_data,$bottom_phred_data);
1762             # print("The quality of the consensus at ".($count+1)." is zero. Retrieving the real quality value.\n");
1763             # how do I know which strand to get these quality values from????
1764             # boggle
1765 0           my $top_quality_here = $self->{'contigs'}->{$current_contig}->{'top_phreds'}->[0-$self->{'contigs'}->{$current_contig}->{'top_start'}+$count+1];
1766 0           my $bottom_quality_here = $self->{'contigs'}->{$current_contig}->{'bottom_phreds'}->[1-$self->{'contigs'}->{$current_contig}->{'bottom_start'}+$count];
1767 0 0 0       if (!$bottom_quality_here || (1-$self->{'contigs'}->{$current_contig}->{'bottom_start'}+$count)<0) {
1768 0           $bottom_quality_here = "not found";
1769             }
1770 0 0         if (!$top_quality_here) {
1771 0           $top_quality_here = "not found";
1772             }
1773             # print("Looking for quals at position $count of $current_contig: top position ".(0-$self->{'contigs'}->{$current_contig}->{top_start}+$count)." ($self->{'contigs'}->{$current_contig}->{top_name}) $top_quality_here , bottom position ".(1-$self->{'contigs'}->{$current_contig}->{bottom_start}+$count)." ($self->{'contigs'}->{$current_contig}->{bottom_name}) $bottom_quality_here\n");
1774 0 0         if ($count<$number_leading_xs) {
1775             # print("$count is less then $number_leading_xs so I will get the quality from the top strand\n");
1776             # print("retrieved quality is ".$self->{'contigs'}->{$current_contig}->{top_phreds}[0-$self->{'contigs'}->{$current_contig}->{top_start}+$count+1]."\n");
1777 0           my $quality = $top_quality_here;
1778 0           $quality =~ /\S+\s(\d+)\s+/;
1779 0           $quality = $1;
1780             # print("retrieved quality for leading zero $count is $quality\n");
1781             # t 9 9226
1782 0           $qualities[$count] = $quality;
1783             } else {
1784             # this part is tricky
1785             # if the contig is like this
1786             # cccccccccccccccc
1787             # ffffffffffffffffff
1788             # rrrrrrrrrrrrrrrrr
1789             # then take the quality value for the trailing zeros in the cons. seq from the r
1790             #
1791             # but if the contig is like this
1792             # cccccccccccccccccc
1793             # ffffffffffffffffffffffffffffffff
1794             # rrrrrrrrrrrrrrrrrrrrrrrxxxxxxxxr
1795             # ^^^
1796             # then any zeros that fall in the positions (^) must be decided whether the quality
1797             # is the qual from the f or r strand. I will use the greater number
1798             # does a similar situation exist for the leading zeros? i dunno
1799             #
1800             # print("$count is greater then $number_leading_xs so I will get the quality from the bottom strand\n");
1801             # print("retrieved quality is ".$contigs->{$current_contig}->{top_phreds}[0-$contigs->{$current_contig}->{top_start}+$count+1]."\n");
1802             # my ($quality,$top_phred_position,$bottom_phred_position,$top_phred_data,$bottom_phred_data);
1803 0 0         if ($bottom_quality_here eq "not found") {
    0          
1804             # $top_phred_position = 1-$contigs->{$current_contig}->{bottom_start}+$count;
1805             # print("Going to get quality from here: $top_phred_position of the top.\n");
1806             # my $temp_quality - $contigs->{$current_contig}->{top_phreds}
1807             # $quality = $contigs->{$current_contig}->{top_phreds}[$top_phred_position];
1808 0           $top_quality_here =~ /\w+\s(\d+)\s/;
1809 0           $quality = $1;
1810             } elsif ($top_quality_here eq "not found") {
1811             # $bottom_phred_position = 1+$contigs->{$current_contig}->{bottom_start}+$count;
1812             # print("Going to get quality from here: $bottom_phred_position of the bottom.\n");
1813             # $quality = $contigs->{$current_contig}->{bottom_phreds}[$bottom_phred_position];
1814             # print("Additional: no top quality but bottom is $quality\n");
1815 0           $bottom_quality_here =~ /\w+\s(\d+)\s/;
1816 0           $quality = $1;
1817             } else {
1818             # print("Oh jeepers, there are 2 qualities to choose from at this position.\n");
1819             # print("Going to compare these phred qualities: top: #$top_quality_here# bottom: #$bottom_quality_here#\n");
1820             # now you have to compare them
1821             # my $top_quality_phred = $contigs->{$current_contig}->{top_phreds}[$top_phred_position];
1822             # #t 40 875#
1823             # print("regexing #$top_quality_here#... ");
1824 0           $top_quality_here =~ /\w\ (\d+)\s/;
1825 0           my $top_quality = $1;
1826             # print("$top_quality\nregexing #$bottom_quality_here#... ");
1827 0           $bottom_quality_here =~ /\w\ (\d+)\s/;
1828 0           my $bottom_quality = $1;
1829             # print("$bottom_quality\n");
1830             # print("top_quality: $top_quality bottom quality: $bottom_quality\n");
1831 0 0         if ($bottom_quality > $top_quality) {
1832             # print("Chose to take the bottom quality: $bottom_quality\n");
1833 0           $quality = $bottom_quality;
1834             } else {
1835             # print("Chose to take the top quality: $top_quality\n");
1836 0           $quality = $top_quality;
1837             }
1838             }
1839 0 0         if (!$quality) {
1840             # print("Warning: no quality value for $current_contig, position $count!\n");
1841             # print("Additional data: top quality phred: $top_quality_here\n");
1842             # print("Additional data: bottom quality phred: $bottom_quality_here\n");
1843             } else {
1844 0           $qualities[$count] = $quality;
1845             }
1846             }
1847             }
1848              
1849             }
1850 0 0         unless (!@qualities) {
1851 0           $self->{'contigs'}->{$current_contig}->{'quality'} = join(" ",@qualities);
1852             }
1853 0           $self->{'contigs'}->{$current_contig}->{'bottom_phreds'} = undef;
1854 0           $self->{'contigs'}->{$current_contig}->{'top_phreds'} = undef;
1855 0           my $count = 1;
1856             } # end foreach key
1857             }
1858             }
1859              
1860              
1861             1;