File Coverage

Bio/Tools/Lucy.pm
Criterion Covered Total %
statement 139 159 87.4
branch 38 82 46.3
condition 1 3 33.3
subroutine 22 22 100.0
pod 16 16 100.0
total 216 282 76.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Lucy
3             #
4             # Copyright Her Majesty the Queen of England
5             # written by Andrew Walsh (paeruginosa@hotmail.com) during employment with
6             # Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB
7             #
8             # You may distribute this module under the same terms as perl itself
9             # POD documentation - main docs before the code
10              
11             =head1 NAME
12              
13             Bio::Tools::Lucy - Object for analyzing the output from Lucy,
14             a vector and quality trimming program from TIGR
15              
16             =head1 SYNOPSIS
17              
18             # Create the Lucy object from an existing Lucy output file
19             @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);
20             $lucyObj = Bio::Tools::Lucy->new(@params);
21              
22             # Get names of all sequences
23             $names = $lucyObj->get_sequence_names();
24              
25             # Print seq and qual values for sequences >400 bp in order to run CAP3
26             foreach $name (@$names) {
27             next unless $lucyObj->length_clear($name) > 400;
28             print SEQ ">$name\n", $lucyObj->sequence($name), "\n";
29             print QUAL ">$name\n", $lucyObj->quality($name), "\n";
30             }
31              
32             # Get an array of Bio::PrimarySeq objects
33             @seqObjs = $lucyObj->get_Seq_Objs();
34              
35              
36             =head1 DESCRIPTION
37              
38             Bio::Tools::Lucy.pm provides methods for analyzing the sequence and
39             quality values generated by Lucy program from TIGR.
40              
41             Lucy will identify vector, poly-A/T tails, and poor quality regions in
42             a sequence. (www.genomics.purdue.edu/gcg/other/lucy.pdf)
43              
44             The input to Lucy can be the Phred sequence and quality files
45             generated from running Phred on a set of chromatograms.
46              
47             Lucy can be obtained (free of charge to academic users) from
48             www.tigr.org/softlab
49              
50             There are a few methods that will only be available if you make some
51             minor changes to the source for Lucy and then recompile. The changes
52             are in the 'lucy.c' file and there is a diff between the original and
53             the modified file in the Appendix
54              
55             Please contact the author of this module if you have any problems
56             making these modifications.
57              
58             You do not have to make these modifications to use this module.
59              
60             =head2 Creating a Lucy object
61              
62             @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1,
63             'fwd_desig' => '_F', 'rev_desig' => '_R');
64             $lucyObj = Bio::Tools::Lucy->new(@params);
65              
66             =head2 Using a Lucy object
67              
68             You should get an array with the sequence names in order to use
69             accessor methods. Note: The Lucy binary program will fail unless
70             the sequence names provided as input are unique.
71              
72             $names_ref = $lucyObj->get_sequence_names();
73              
74             This code snippet will produce a Fasta format file with sequence
75             lengths and %GC in the description line.
76              
77             foreach $name (@$names) {
78             print FILE ">$name\t",
79             $lucyObj->length_clear($name), "\t",
80             $lucyObj->per_GC($name), "\n",
81             $lucyObj->sequence($name), "\n";
82             }
83              
84              
85             Print seq and qual values for sequences >400 bp in order to assemble
86             them with CAP3 (or other assembler).
87              
88             foreach $name (@$names) {
89             next unless $lucyObj->length_clear($name) > 400;
90             print SEQ ">$name\n", $lucyObj->sequence($name), "\n";
91             print QUAL ">$name\n", $lucyObj->quality($name), "\n";
92             }
93              
94             Get all the sequences as Bio::PrimarySeq objects (eg., for use with
95             Bio::Tools::Run::StandaloneBlast to perform BLAST).
96              
97             @seqObjs = $lucyObj->get_Seq_Objs();
98              
99             Or use only those sequences that are full length and have a Poly-A
100             tail.
101              
102             foreach $name (@$names) {
103             next unless ($lucyObj->full_length($name) and $lucy->polyA($name));
104             push @seqObjs, $lucyObj->get_Seq_Obj($name);
105             }
106              
107              
108             Get the names of those sequences that were rejected by Lucy.
109              
110             $rejects_ref = $lucyObj->get_rejects();
111              
112             Print the names of the rejects and 1 letter code for reason they
113             were rejected.
114              
115             foreach $key (sort keys %$rejects_ref) {
116             print "$key: ", $rejects_ref->{$key};
117             }
118              
119             There is a lot of other information available about the sequences
120             analyzed by Lucy (see APPENDIX). This module can be used with the
121             DBI module to store this sequence information in a database.
122              
123             =head1 FEEDBACK
124              
125             =head2 Mailing Lists
126              
127             User feedback is an integral part of the evolution of this and other
128             Bioperl modules. Send your comments and suggestions preferably to one
129             of the Bioperl mailing lists. Your participation is much appreciated.
130              
131             bioperl-l@bioperl.org - General discussion
132             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
133              
134             =head2 Support
135              
136             Please direct usage questions or support issues to the mailing list:
137              
138             I
139              
140             rather than to the module maintainer directly. Many experienced and
141             reponsive experts will be able look at the problem and quickly
142             address it. Please include a thorough description of the problem
143             with code and data examples if at all possible.
144              
145             =head2 Reporting Bugs
146              
147             Report bugs to the Bioperl bug tracking system to help us keep track
148             the bugs and their resolution. Bug reports can be submitted via the web:
149              
150             https://github.com/bioperl/bioperl-live/issues
151              
152             =head1 AUTHOR
153              
154             Andrew G. Walsh paeruginosa@hotmail.com
155              
156             =head1 APPENDIX
157              
158             Methods available to Lucy objects are described below. Please note
159             that any method beginning with an underscore is considered internal
160             and should not be called directly.
161              
162             =cut
163              
164              
165             package Bio::Tools::Lucy;
166              
167 1     1   425 use vars qw($AUTOLOAD @ATTR %OK_FIELD);
  1         1  
  1         44  
168 1     1   3 use strict;
  1         1  
  1         14  
169 1     1   347 use Bio::PrimarySeq;
  1         1  
  1         26  
170              
171 1     1   4 use base qw(Bio::Root::Root Bio::Root::IO);
  1         1  
  1         1547  
172             @ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr);
173             foreach my $attr (@ATTR) {
174             $OK_FIELD{$attr}++
175             }
176              
177             sub AUTOLOAD {
178 6     6   588 my $self = shift;
179 6         7 my $attr = $AUTOLOAD;
180 6         20 $attr =~ s/.*:://;
181 6         7 $attr = lc $attr;
182 6 50       12 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
183 6 100       12 $self->{$attr} = shift if @_;
184 6         8 return $self->{$attr};
185             }
186              
187             =head2 new
188              
189             Title : new
190             Usage : $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R',
191             fwd_desig => '_F')
192             Function: creates a Lucy object from Lucy analysis files
193             Returns : reference to Bio::Tools::Lucy object
194             Args : seqfile Fasta sequence file generated by Lucy
195             qualfile Quality values file generated by Lucy
196             infofile Info file created when Lucy is run with -debug
197             'infofile' option
198             stderrfile Standard error captured from Lucy when Lucy is run
199             with -info option and STDERR is directed to stderrfile
200             (ie. lucy ... 2> stderrfile).
201             Info in this file will include sequences dropped for low
202             quality. If you've modified Lucy source (see adv_stderr below),
203             it will also include info on which sequences were dropped because
204             they were vector, too short, had no insert, and whether a poly-A
205             tail was found (if Lucy was run with -cdna option).
206             lucy_verbose verbosity level (0-1).
207             fwd_desig The string used to determine whether sequence is a
208             forward read.
209             The parser will assume that this match will occus at the
210             end of the sequence name string.
211             rev_desig As above, for reverse reads.
212             adv_stderr Can be set to a true value (1). Will only work if
213             you have modified
214             the Lucy source code as outlined in DESCRIPTION and capture
215             the standard error from Lucy.
216              
217             If you don't provide filenames for qualfile, infofile or stderrfile,
218             the module will assume that .qual, .info, and .stderr are the file
219             extensions and search in the same directory as the .seq file for these
220             files.
221              
222             For example, if you create a Lucy object with $lucyObj =
223             Bio::Tools::Lucy-Enew(seqfile =Elucy.seq), the module will
224             find lucy.qual, lucy.info and lucy.stderr.
225              
226             You can omit any or all of the quality, info or stderr files, but you
227             will not be able to use all of the object methods (see method
228             documentation below).
229              
230             =cut
231              
232             sub new {
233 1     1 1 9 my ($class,@args) = @_;
234 1         8 my $self = $class->SUPER::new(@args);
235 1         1 my ($attr, $value);
236 1         3 while (@args) {
237 3         2 $attr = shift @args;
238 3         3 $attr = lc $attr;
239 3         3 $value = shift @args;
240 3         6 $self->{$attr} = $value;
241             }
242 1         3 &_parse($self);
243 1         3 return $self;
244             }
245              
246             =head2 _parse
247              
248             Title : _parse
249             Usage : n/a (internal function)
250             Function: called by new() to parse Lucy output files
251             Returns : nothing
252             Args : none
253              
254             =cut
255              
256             sub _parse {
257 1     1   1 my $self = shift;
258 1         5 $self->{seqfile} =~ /^(\S+)\.\S+$/;
259 1         1 my $file = $1;
260              
261 1 50       3 $self->warn("Opening $self->{seqfile} for parsing...\n") if $self->{lucy_verbose};
262 1 50       32 open my $SEQ, '<', $self->{seqfile} or $self->throw("Could not read sequence file '$self->{seqfile}': $!");
263 1         2 my ($name, $line);
264 1         1 my $seq = "";
265 1         17 my @lines = <$SEQ>;
266 1         3 while ($line = pop @lines) {
267 23         12 chomp $line;
268 23 100       25 if ($line =~ /^>(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) {
269 1         2 $name = $1;
270 1 50       4 if ($self->{fwd_desig}) {
271 0 0       0 $self->{sequences}{$name}{direction} = "F" if $name =~ /^(\S+)($self->{fwd_desig})$/;
272             }
273 1 50       3 if ($self->{rev_desig}) {
274 1 50       21 $self->{sequences}{$name}{direction} = "R" if $name =~ /^(\S+)($self->{rev_desig})$/;
275             }
276 1         3 $self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4
277 1         2 $self->{sequences}{$name}{max_clone_len} = $3;
278 1         3 $self->{sequences}{$name}{med_clone_len} = $4;
279 1         2 $self->{sequences}{$name}{beg_clear} = $5;
280 1         2 $self->{sequences}{$name}{end_clear} = $6;
281 1         4 $self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls. Please let me know if I'm wrong.
282 1         2 my $beg = $5-1; # substr function begins with index 0
283 1         5 $seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);
284 1         2 my $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;
285 1         1 my $countGC = $seq =~ tr/[GC]//;
286 1         3 $self->{sequences}{$name}{per_GC} = $countGC/$count * 100;
287 1         3 $seq = "";
288             }
289             else {
290 22         45 $seq = $line.$seq;
291             }
292             }
293              
294             # now parse quality values (check for presence of quality file first)
295 1 50       60 if ($self->{qualfile}) {
    50          
296 0 0       0 open my $QUAL, '<', $self->{qualfile} or $self->throw("Could not read quality file '$self->{qualfile}': $!");
297 0         0 @lines = <$QUAL>;
298             }
299             elsif (-e "$file.qual") {
300 1 50       3 $self->warn("You did not set qualfile, but I'm opening $file.qual\n") if $self->{lucy_verbose};
301 1         12 $self->qualfile("$file.qual");
302 1 50       24 open my $QUAL, '<', "$file.qual" or $self->throw("Could not read quality file '$file.qual': $!");
303 1         30 @lines = <$QUAL>;
304             }
305             else {
306 0 0       0 $self->warn("I did not find a quality file. You will not be able to use all of the accessor methods.\n") if $self->{lucy_verbose};
307 0         0 @lines = ();
308             }
309              
310 1         2 my (@vals, @slice, $num, $tot, $vals);
311 1         2 my $qual = "";
312 1         3 while ($line = pop @lines) {
313 55         33 chomp $line;
314 55 100       51 if ($line =~ /^>(\S+)/) {
315 1         2 $name = $1;
316 1         210 @vals = split /\s/ , $qual;
317 1         51 @slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];
318 1         22 $vals = join "\t", @slice;
319 1         2 $self->{sequences}{$name}{quality} = $vals;
320 1         1 $qual = "";
321 1         2 foreach $num (@slice) {
322 420         271 $tot += $num;
323             }
324 1         1 $num = @slice;
325 1         2 $self->{sequences}{$name}{avg_quality} = $tot/$num;
326 1         3 $tot = 0;
327             }
328             else {
329 54         111 $qual = $line.$qual;
330             }
331             }
332              
333             # determine whether reads are full length
334 1 50       15 if ($self->{infofile}) {
    50          
335 0 0       0 open my $INFO, '<', $self->{infofile} or $self->throw("Could not read info file '$self->{infofile}': $!");
336 0         0 @lines = <$INFO>;
337             }
338             elsif (-e "$file.info") {
339 1 50       5 $self->warn("You did not set infofile, but I'm opening $file.info\n") if $self->{lucy_verbose};
340 1         10 $self->infofile("$file.info");
341 1 50       21 open my $INFO, '<', "$file.info" or $self->throw("Could not read info file '$file.info': $!");
342 1         12 @lines = <$INFO>;
343             }
344             else {
345 0 0       0 $self->warn("I did not find an info file. You will not be able to use all of the accessor methods.\n") if $self->{lucy_verbose};
346 0         0 @lines = ();
347             }
348              
349 1         2 foreach (@lines) {
350 1         5 /^(\S+).+CLV\s+(\d+)\s+(\d+)$/;
351 1 50 33     7 if ($2>0 && $3>0) {
352 1 50       5 $self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too
353             }
354             }
355              
356              
357             # parse rejects (and presence of poly-A if Lucy has been modified)
358 1 50       12 if ($self->{stderrfile}) {
    50          
359 0 0       0 open my $STDERR_LUCY, '<', $self->{stderrfile} or $self->throw("Could not read quality file '$self->{stderrfile}': $!");
360 0         0 @lines = <$STDERR_LUCY>;
361             }
362             elsif (-e "$file.stderr") {
363 1 50       3 $self->warn("You did not set stderrfile, but I'm opening $file.stderr\n") if $self->{lucy_verbose};
364 1         9 $self->stderrfile("$file.stderr");
365 1 50       20 open my $STDERR_LUCY, '<', "$file.stderr" or $self->throw("Could not read quality file '$file.stderr': $!");
366 1         11 @lines = <$STDERR_LUCY>;
367             }
368             else {
369 0 0       0 $self->warn("I did not find a standard error file. You will not be able to use all of the accessor methods.\n") if $self->{lucy_verbose};
370 0         0 @lines = ();
371             }
372              
373 1 50       3 if ($self->{adv_stderr}) {
374 1         2 foreach (@lines) {
375 2 100       9 $self->{reject}{$1} = "Q" if /dropping\s+(\S+)/;
376 2 50       5 $self->{reject}{$1} = "V" if /Vector: (\S+)/;
377 2 50       4 $self->{reject}{$1} = "E" if /Empty: (\S+)/;
378 2 50       3 $self->{reject}{$1} = "S" if m{Short/ no insert: (\S+)};
379 2 100       7 $self->{sequences}{$1}{polyA} = 1 if /(\S+) has PolyA/;
380 2 50       58 if (/Dropped PolyA: (\S+)/) {
381 0         0 $self->{reject}{$1} = "P";
382 0         0 delete $self->{sequences}{$1};
383             }
384             }
385             }
386             else {
387 0         0 foreach (@lines) {
388 0 0       0 $self->{reject}{$1} = "R" if /dropping\s+(\S+)/;
389             }
390             }
391             }
392              
393             =head2 get_Seq_Objs
394              
395             Title : get_Seq_Objs
396             Usage : $lucyObj->get_Seq_Objs()
397             Function: returns an array of references to Bio::PrimarySeq objects
398             where -id = 'sequence name' and -seq = 'sequence'
399              
400             Returns : array of Bio::PrimarySeq objects
401             Args : none
402              
403             =cut
404              
405             sub get_Seq_Objs {
406 1     1 1 252 my $self = shift;
407 1         1 my($seqobj, @seqobjs);
408 1         2 foreach my $key (sort keys %{$self->{sequences}}) {
  1         3  
409 1         6 $seqobj = Bio::PrimarySeq->new( -seq => "$self->{sequences}{$key}{sequence}",
410             -id => "$key");
411 1         3 push @seqobjs, $seqobj;
412             }
413 1         3 return \@seqobjs;
414             }
415              
416             =head2 get_Seq_Obj
417              
418             Title : get_Seq_Obj
419             Usage : $lucyObj->get_Seq_Obj($seqname)
420             Function: returns reference to a Bio::PrimarySeq object where -id = 'sequence name'
421             and -seq = 'sequence'
422             Returns : reference to Bio::PrimarySeq object
423             Args : name of a sequence
424              
425             =cut
426              
427             sub get_Seq_Obj {
428 1     1 1 3 my ($self, $key) = @_;
429 1         8 my $seqobj = Bio::PrimarySeq->new( -seq => "$self->{sequences}{$key}{sequence}",
430             -id => "$key");
431 1         2 return $seqobj;
432             }
433              
434             =head2 get_sequence_names
435              
436             Title : get_sequence_names
437             Usage : $lucyObj->get_sequence_names
438             Function: returns reference to an array of names of the sequences analyzed by Lucy.
439             These names are required for most of the accessor methods.
440             Note: The Lucy binary will fail unless sequence names are unique.
441             Returns : array reference
442             Args : none
443              
444             =cut
445              
446             sub get_sequence_names {
447 1     1 1 250 my $self = shift;
448 1         2 my @keys = sort keys %{$self->{sequences}};
  1         4  
449 1         2 return \@keys;
450             }
451              
452             =head2 sequence
453              
454             Title : sequence
455             Usage : $lucyObj->sequence($seqname)
456             Function: returns the DNA sequence of one of the sequences analyzed by Lucy.
457             Returns : string
458             Args : name of a sequence
459              
460             =cut
461              
462             sub sequence {
463 1     1 1 2 my ($self, $key) = @_;
464 1         3 return $self->{sequences}{$key}{sequence};
465             }
466              
467             =head2 quality
468              
469             Title : quality
470             Usage : $lucyObj->quality($seqname)
471             Function: returns the quality values of one of the sequences analyzed by Lucy.
472             This method depends on the user having provided a quality file.
473             Returns : string
474             Args : name of a sequence
475              
476             =cut
477              
478             sub quality {
479 1     1 1 2 my($self, $key) = @_;
480 1         3 return $self->{sequences}{$key}{quality};
481             }
482              
483             =head2 avg_quality
484              
485             Title : avg_quality
486             Usage : $lucyObj->avg_quality($seqname)
487             Function: returns the average quality value for one of the sequences analyzed by Lucy.
488             Returns : float
489             Args : name of a sequence
490              
491             =cut
492              
493             sub avg_quality {
494 1     1 1 2 my($self, $key) = @_;
495 1         3 return $self->{sequences}{$key}{avg_quality};
496             }
497              
498             =head2 direction
499              
500             Title : direction
501             Usage : $lucyObj->direction($seqname)
502             Function: returns the direction for one of the sequences analyzed by Lucy
503             providing that 'fwd_desig' or 'rev_desig' were set when the
504             Lucy object was created.
505             Strings returned are: 'F' for forward, 'R' for reverse.
506             Returns : string
507             Args : name of a sequence
508              
509             =cut
510              
511             sub direction {
512 1     1 1 1 my($self, $key) = @_;
513 1 50       5 return $self->{sequences}{$key}{direction} if $self->{sequences}{$key}{direction};
514 0         0 return "";
515             }
516              
517             =head2 length_raw
518              
519             Title : length_raw
520             Usage : $lucyObj->length_raw($seqname)
521             Function: returns the length of a DNA sequence prior to quality/ vector
522             trimming by Lucy.
523             Returns : integer
524             Args : name of a sequence
525              
526             =cut
527              
528             sub length_raw {
529 1     1 1 712 my($self, $key) = @_;
530 1         4 return $self->{sequences}{$key}{length_raw};
531             }
532              
533             =head2 length_clear
534              
535             Title : length_clear
536             Usage : $lucyObj->length_clear($seqname)
537             Function: returns the length of a DNA sequence following quality/ vector
538             trimming by Lucy.
539             Returns : integer
540             Args : name of a sequence
541              
542             =cut
543              
544             sub length_clear {
545 1     1 1 1 my($self, $key) = @_;
546 1         4 return $self->{sequences}{$key}{length_clear};
547             }
548              
549             =head2 start_clear
550              
551             Title : start_clear
552             Usage : $lucyObj->start_clear($seqname)
553             Function: returns the beginning position of good quality, vector free DNA sequence
554             determined by Lucy.
555             Returns : integer
556             Args : name of a sequence
557              
558             =cut
559              
560             sub start_clear {
561 1     1 1 2 my($self, $key) = @_;
562 1         3 return $self->{sequences}{$key}{beg_clear};
563             }
564              
565              
566             =head2 end_clear
567              
568             Title : end_clear
569             Usage : $lucyObj->end_clear($seqname)
570             Function: returns the ending position of good quality, vector free DNA sequence
571             determined by Lucy.
572             Returns : integer
573             Args : name of a sequence
574              
575             =cut
576              
577             sub end_clear {
578 1     1 1 1 my($self, $key) = @_;
579 1         3 return $self->{sequences}{$key}{end_clear};
580             }
581              
582             =head2 per_GC
583              
584             Title : per_GC
585             Usage : $lucyObj->per_GC($seqname)
586             Function: returns the percente of the good quality, vector free DNA sequence
587             determined by Lucy.
588             Returns : float
589             Args : name of a sequence
590              
591             =cut
592              
593             sub per_GC {
594 1     1 1 2 my($self, $key) = @_;
595 1         3 return $self->{sequences}{$key}{per_GC};
596             }
597              
598             =head2 full_length
599              
600             Title : full_length
601             Usage : $lucyObj->full_length($seqname)
602             Function: returns the truth value for whether or not the sequence read was
603             full length (ie. vector present on both ends of read). This method
604             depends on the user having provided the 'info' file (Lucy must be
605             run with the -debug 'info_filename' option to get this file).
606             Returns : boolean
607             Args : name of a sequence
608              
609             =cut
610              
611             sub full_length {
612 1     1 1 2 my($self, $key) = @_;
613 1 50       5 return 1 if $self->{sequences}{$key}{full_length};
614 0         0 return 0;
615             }
616              
617             =head2 polyA
618              
619             Title : polyA
620             Usage : $lucyObj->polyA($seqname)
621             Function: returns the truth value for whether or not a poly-A tail was detected
622             and clipped by Lucy. This method depends on the user having modified
623             the source for Lucy as outlined in DESCRIPTION and invoking Lucy with
624             the -cdna option and saving the standard error.
625             Note, the final sequence will not show the poly-A/T region.
626             Returns : boolean
627             Args : name of a sequence
628              
629             =cut
630              
631             sub polyA {
632 1     1 1 2 my($self, $key) = @_;
633 1 50       5 return 1 if $self->{sequences}{$key}{polyA};
634 0         0 return 0;
635             }
636              
637             =head2 get_rejects
638              
639             Title : get_rejects
640             Usage : $lucyObj->get_rejects()
641             Function: returns a hash containing names of rejects and a 1 letter code for the
642             reason Lucy rejected the sequence.
643             Q- rejected because of low quality values
644             S- sequence was short
645             V- sequence was vector
646             E- sequence was empty
647             P- poly-A/T trimming caused sequence to be too short
648             In order to get the rejects, you must provide a file with the standard
649             error from Lucy. You will only get the quality category rejects unless
650             you have modified the source and recompiled Lucy as outlined in DESCRIPTION.
651             Returns : hash reference
652             Args : none
653              
654             =cut
655              
656             sub get_rejects {
657 1     1 1 253 my $self = shift;
658 1         2 return $self->{reject};
659             }
660              
661             =head2 Diff for Lucy source code
662              
663             352a353,354
664             > /* AGW added next line */
665             > fprintf(stderr, "Empty: %s\n", seqs[i].name);
666             639a642,643
667             > /* AGW added next line */
668             > fprintf(stderr, "Short/ no insert: %s\n", seqs[i].name);
669             678c682,686
670             < if (left) seqs[i].left+=left;
671             ---
672             > if (left) {
673             > seqs[i].left+=left;
674             > /* AGW added next line */
675             > fprintf(stderr, "%s has PolyA (left).\n", seqs[i].name);
676             > }
677             681c689,693
678             < if (right) seqs[i].right-=right;
679             ---
680             > if (right) {
681             > seqs[i].right-=right;
682             > /* AGW added next line */
683             > fprintf(stderr, "%s has PolyA (right).\n", seqs[i].name);
684             > }
685             682a695,696
686             > /* AGW added next line */
687             > fprintf(stderr, "Dropped PolyA: %s\n", seqs[i].name);
688             734a749,750
689             > /* AGW added next line */
690             > fprintf(stderr, "Vector: %s\n", seqs[i].name);
691              
692             =cut
693              
694             =head2 This patch is to be applied to lucy.c from the lucy-1.19p release
695              
696             277a278,279
697             > /* AGW added next line */
698             > fprintf(stderr, "Short/ no insert: %s\n", seqs[i].name);
699             588c590,592
700             < if ((seqs[i].len=bases)<=0)
701             ---
702             > if ((seqs[i].len=bases)<=0) {
703             > /* AGW added next line */
704             > fprintf(stderr, "Empty: %s\n", seqs[i].name);
705             589a594
706             > }
707             893c898,902
708             < if (left) seqs[i].left+=left;
709             ---
710             > if (left) {
711             > seqs[i].left+=left;
712             > /* AGW added next line */
713             > fprintf(stderr, "%s has PolyA (left).\n", seqs[i].name);
714             > }
715             896c905,909
716             < if (right) seqs[i].right-=right;
717             ---
718             > if (right) {
719             > seqs[i].right-=right;
720             > /* AGW added next line */
721             > fprintf(stderr, "%s has PolyA (right).\n", seqs[i].name);
722             > }
723             898a912,913
724             > /* AGW added next line */
725             > fprintf(stderr, "Dropped PolyA: %s\n", seqs[i].name);
726             949a965,966
727             > /* AGW added next line */
728             > fprintf(stderr, "Vector: %s\n", seqs[i].name);
729              
730             =cut
731              
732             1;