File Coverage

Bio/Seq/SeqBuilder.pm
Criterion Covered Total %
statement 108 117 92.3
branch 30 40 75.0
condition 6 9 66.6
subroutine 18 19 94.7
pod 17 17 100.0
total 179 202 88.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Seq::SeqBuilder
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Hilmar Lapp
7             #
8             # Copyright Hilmar Lapp
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             #
13             # (c) Hilmar Lapp, hlapp at gmx.net, 2002.
14             # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
15             #
16             # You may distribute this module under the same terms as perl itself.
17             # Refer to the Perl Artistic License (see the license accompanying this
18             # software package, or see http://www.perl.com/language/misc/Artistic.html)
19             # for the terms under which you may use, modify, and redistribute this module.
20             #
21             # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
22             # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
23             # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
24             #
25              
26             # POD documentation - main docs before the code
27              
28             =head1 NAME
29              
30             Bio::Seq::SeqBuilder - Configurable object builder for sequence stream parsers
31              
32             =head1 SYNOPSIS
33              
34             use Bio::SeqIO;
35              
36             # usually you won't instantiate this yourself - a SeqIO object -
37             # you will have one already
38             my $seqin = Bio::SeqIO->new(-fh => \*STDIN, -format => "genbank");
39             my $builder = $seqin->sequence_builder();
40              
41             # if you need only sequence, id, and description (e.g. for
42             # conversion to FASTA format):
43             $builder->want_none();
44             $builder->add_wanted_slot('display_id','desc','seq');
45              
46             # if you want everything except the sequence and features
47             $builder->want_all(1); # this is the default if it's untouched
48             $builder->add_unwanted_slot('seq','features');
49              
50             # if you want only human sequences shorter than 5kb and skip all
51             # others
52             $builder->add_object_condition(sub {
53             my $h = shift;
54             return 0 if $h->{'-length'} > 5000;
55             return 0 if exists($h->{'-species'}) &&
56             ($h->{'-species'}->binomial() ne "Homo sapiens");
57             return 1;
58             });
59              
60             # when you are finished with configuring the builder, just use
61             # the SeqIO API as you would normally
62             while(my $seq = $seqin->next_seq()) {
63             # do something
64             }
65              
66             =head1 DESCRIPTION
67              
68             This is an implementation of L used by
69             parsers of rich sequence streams. It provides for a relatively
70             easy-to-use configurator of the parsing flow.
71              
72             Configuring the parsing process may be for you if you need much less
73             information, or much less sequence, than the stream actually
74             contains. Configuration can in both cases speed up the parsing time
75             considerably, because unwanted sections or the rest of unwanted
76             sequences are skipped over by the parser. This configuration could
77             also conserve memory if you're running out of available RAM.
78              
79             See the methods of the class-specific implementation section for
80             further documentation of what can be configured.
81              
82             =head1 FEEDBACK
83              
84             =head2 Mailing Lists
85              
86             User feedback is an integral part of the evolution of this and other
87             Bioperl modules. Send your comments and suggestions preferably to
88             the Bioperl mailing list. Your participation is much appreciated.
89              
90             bioperl-l@bioperl.org - General discussion
91             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
92              
93             =head2 Support
94              
95             Please direct usage questions or support issues to the mailing list:
96              
97             I
98              
99             rather than to the module maintainer directly. Many experienced and
100             reponsive experts will be able look at the problem and quickly
101             address it. Please include a thorough description of the problem
102             with code and data examples if at all possible.
103              
104             =head2 Reporting Bugs
105              
106             Report bugs to the Bioperl bug tracking system to help us keep track
107             of the bugs and their resolution. Bug reports can be submitted via
108             the web:
109              
110             https://github.com/bioperl/bioperl-live/issues
111              
112             =head1 AUTHOR - Hilmar Lapp
113              
114             Email hlapp at gmx.net
115              
116             =head1 APPENDIX
117              
118             The rest of the documentation details each of the object methods.
119             Internal methods are usually preceded with a _
120              
121             =cut
122              
123              
124             # Let the code begin...
125              
126              
127             package Bio::Seq::SeqBuilder;
128 82     82   287 use strict;
  82         90  
  82         2269  
129              
130             # Object preamble - inherits from Bio::Root::Root
131              
132              
133 82     82   286 use base qw(Bio::Root::Root Bio::Factory::ObjectBuilderI);
  82         91  
  82         25196  
134              
135             my %slot_param_map = ("add_SeqFeature" => "features",
136             );
137             my %param_slot_map = ("features" => "add_SeqFeature",
138             );
139              
140             =head2 new
141              
142             Title : new
143             Usage : my $obj = Bio::Seq::SeqBuilder->new();
144             Function: Builds a new Bio::Seq::SeqBuilder object
145             Returns : an instance of Bio::Seq::SeqBuilder
146             Args :
147              
148             =cut
149              
150             sub new {
151 435     435 1 814 my($class,@args) = @_;
152              
153 435         1431 my $self = $class->SUPER::new(@args);
154              
155 435         899 $self->{'wanted_slots'} = [];
156 435         758 $self->{'unwanted_slots'} = [];
157 435         671 $self->{'object_conds'} = [];
158 435         917 $self->{'_objhash'} = {};
159 435         1238 $self->want_all(1);
160              
161 435         768 return $self;
162             }
163              
164             =head1 Methods for implementing L
165              
166             =cut
167              
168             =head2 want_slot
169              
170             Title : want_slot
171             Usage :
172             Function: Whether or not the object builder wants to populate the
173             specified slot of the object to be built.
174              
175             The slot can be specified either as the name of the
176             respective method, or the initialization parameter that
177             would be otherwise passed to new() of the object to be
178             built.
179              
180             Note that usually only the parser will call this
181             method. Use add_wanted_slots and add_unwanted_slots for
182             configuration.
183              
184             Example :
185             Returns : TRUE if the object builder wants to populate the slot, and
186             FALSE otherwise.
187             Args : the name of the slot (a string)
188              
189              
190             =cut
191              
192             sub want_slot{
193 439     439 1 582 my ($self,$slot) = @_;
194 439         441 my $ok = 0;
195              
196 439 50       968 $slot = substr($slot,1) if substr($slot,0,1) eq '-';
197 439 100       821 if($self->want_all()) {
198 419         853 foreach ($self->get_unwanted_slots()) {
199             # this always overrides in want-all mode
200 29 100       89 return 0 if($slot eq $_);
201             }
202 411 100       788 if(! exists($self->{'_objskel'})) {
203 88         165 $self->{'_objskel'} = $self->sequence_factory->create_object();
204             }
205 411 100       1165 if(exists($param_slot_map{$slot})) {
206 103         568 $ok = $self->{'_objskel'}->can($param_slot_map{$slot});
207             } else {
208 308         1192 $ok = $self->{'_objskel'}->can($slot);
209             }
210 411 50       2240 return $ok if $ok;
211             # even if the object 'cannot' do this slot, it might have been
212             # added to the list of wanted slot, so carry on
213             }
214 20         24 foreach ($self->get_wanted_slots()) {
215 68 100       93 if($slot eq $_) {
216 2         3 $ok = 1;
217 2         2 last;
218             }
219             }
220 20         76 return $ok;
221             }
222              
223             =head2 add_slot_value
224              
225             Title : add_slot_value
226             Usage :
227             Function: Adds one or more values to the specified slot of the object
228             to be built.
229              
230             Naming the slot is the same as for want_slot().
231              
232             The object builder may further filter the content to be
233             set, or even completely ignore the request.
234              
235             If this method reports failure, the caller should not add
236             more values to the same slot. In addition, the caller may
237             find it appropriate to abandon the object being built
238             altogether.
239              
240             This implementation will allow the caller to overwrite the
241             return value from want_slot(), because the slot is not
242             checked against want_slot().
243              
244             Note that usually only the parser will call this method,
245             but you may call it from anywhere if you know what you are
246             doing. A derived class may be used to further manipulate
247             the value to be added.
248              
249             Example :
250             Returns : TRUE on success, and FALSE otherwise
251             Args : the name of the slot (a string)
252             parameters determining the value to be set
253              
254             OR
255              
256             alternatively, a list of slotname/value pairs in the style
257             of named parameters as they would be passed to new(), where
258             each element at an even index is the parameter (slot) name
259             starting with a dash, and each element at an odd index is
260             the value of the preceding name.
261              
262             =cut
263              
264             sub add_slot_value{
265 839     839 1 3418 my ($self,$slot,@args) = @_;
266              
267 839         1123 my $h = $self->{'_objhash'};
268 839 50       1531 return unless $h;
269             # multiple named parameter variant of calling?
270 839 100 66     4157 if((@args > 1) && (@args % 2) && (substr($slot,0,1) eq '-')) {
      66        
271 325         668 unshift(@args, $slot);
272 325         792 while(@args) {
273 2424         1910 my $key = shift(@args);
274 2424         4557 $h->{$key} = shift(@args);
275             }
276             } else {
277 514 50       923 if($slot eq 'add_SeqFeature') {
278 0         0 $slot = '-'.$slot_param_map{$slot};
279 0 0       0 $h->{$slot} = [] unless $h->{$slot};
280 0         0 push(@{$h->{$slot}}, @args);
  0         0  
281             } else {
282 514 50       1240 $slot = '-'.$slot unless substr($slot,0,1) eq '-';
283 514         1285 $h->{$slot} = $args[0];
284             }
285             }
286 839         1510 return 1;
287             }
288              
289             =head2 want_object
290              
291             Title : want_object
292             Usage :
293             Function: Whether or not the object builder is still interested in
294             continuing with the object being built.
295              
296             If this method returns FALSE, the caller should not add any
297             more values to slots, or otherwise risks that the builder
298             throws an exception. In addition, make_object() is likely
299             to return undef after this method returned FALSE.
300              
301             Note that usually only the parser will call this
302             method. Use add_object_condition for configuration.
303              
304             Example :
305             Returns : TRUE if the object builder wants to continue building
306             the present object, and FALSE otherwise.
307             Args : none
308              
309             =cut
310              
311             sub want_object{
312 335     335 1 496 my $self = shift;
313              
314 335         389 my $ok = 1;
315 335         800 foreach my $cond ($self->get_object_conditions()) {
316 21         41 $ok = &$cond($self->{'_objhash'});
317 21 100       120 last unless $ok;
318             }
319 335 100       630 delete $self->{'_objhash'} unless $ok;
320 335         834 return $ok;
321             }
322              
323             =head2 make_object
324              
325             Title : make_object
326             Usage :
327             Function: Get the built object.
328              
329             This method is allowed to return undef if no value has ever
330             been added since the last call to make_object(), or if
331             want_object() returned FALSE (or would have returned FALSE)
332             before calling this method.
333              
334             For an implementation that allows consecutive building of
335             objects, a caller must call this method once, and only
336             once, between subsequent objects to be built. I.e., a call
337             to make_object implies 'end_object.'
338              
339             Example :
340             Returns : the object that was built
341             Args : none
342              
343             =cut
344              
345             sub make_object{
346 214     214 1 282 my $self = shift;
347              
348 214         227 my $obj;
349 214 100 66     654 if(exists($self->{'_objhash'}) && %{$self->{'_objhash'}}) {
  212         1543  
350 212         631 $obj = $self->sequence_factory->create_object(%{$self->{'_objhash'}});
  212         1848  
351             }
352 214         662 $self->{'_objhash'} = {}; # reset
353 214         1232 return $obj;
354             }
355              
356             =head1 Implementation specific methods
357              
358             These methods allow to conveniently configure this sequence object
359             builder as to which slots are desired, and under which circumstances a
360             sequence object should be abandoned altogether. The default mode is
361             want_all(1), which means the builder will report all slots as wanted
362             that the object created by the sequence factory supports.
363              
364             You can add specific slots you want through add_wanted_slots(). In
365             most cases, you will want to call want_none() before in order to relax
366             zero acceptance through a list of wanted slots.
367              
368             Alternatively, you can add specific unwanted slots through
369             add_unwanted_slots(). In this case, you will usually want to call
370             want_all(1) before (which is the default if you never touched the
371             builder) to restrict unrestricted acceptance.
372              
373             I.e., want_all(1) means want all slots except for the unwanted, and
374             want_none() means only those explicitly wanted.
375              
376             If a slot is in both the unwanted and the wanted list, the following
377             rules hold. In want-all mode, the unwanted list overrules. In
378             want-none mode, the wanted list overrides the unwanted list. If this
379             is confusing to you, just try to avoid having slots at the same time
380             in the wanted and the unwanted lists.
381              
382             =cut
383              
384             =head2 get_wanted_slots
385              
386             Title : get_wanted_slots
387             Usage : $obj->get_wanted_slots($newval)
388             Function: Get the list of wanted slots
389             Example :
390             Returns : a list of strings
391             Args :
392              
393              
394             =cut
395              
396             sub get_wanted_slots{
397 21     21 1 22 my $self = shift;
398              
399 21         15 return @{$self->{'wanted_slots'}};
  21         67  
400             }
401              
402             =head2 add_wanted_slot
403              
404             Title : add_wanted_slot
405             Usage :
406             Function: Adds the specified slots to the list of wanted slots.
407             Example :
408             Returns : TRUE
409             Args : an array of slot names (strings)
410              
411             =cut
412              
413             sub add_wanted_slot{
414 2     2 1 4 my ($self,@slots) = @_;
415              
416 2         4 my $myslots = $self->{'wanted_slots'};
417 2         3 foreach my $slot (@slots) {
418 4 50       8 if(! grep { $slot eq $_; } @$myslots) {
  6         10  
419 4         7 push(@$myslots, $slot);
420             }
421             }
422 2         5 return 1;
423             }
424              
425             =head2 remove_wanted_slots
426              
427             Title : remove_wanted_slots
428             Usage :
429             Function: Removes all wanted slots added previously through
430             add_wanted_slots().
431             Example :
432             Returns : the previous list of wanted slot names
433             Args : none
434              
435             =cut
436              
437             sub remove_wanted_slots{
438 1     1 1 2 my $self = shift;
439 1         3 my @slots = $self->get_wanted_slots();
440 1         2 $self->{'wanted_slots'} = [];
441 1         2 return @slots;
442             }
443              
444             =head2 get_unwanted_slots
445              
446             Title : get_unwanted_slots
447             Usage : $obj->get_unwanted_slots($newval)
448             Function: Get the list of unwanted slots.
449             Example :
450             Returns : a list of strings
451             Args : none
452              
453             =cut
454              
455             sub get_unwanted_slots{
456 421     421 1 430 my $self = shift;
457              
458 421         378 return @{$self->{'unwanted_slots'}};
  421         1007  
459             }
460              
461             =head2 add_unwanted_slot
462              
463             Title : add_unwanted_slot
464             Usage :
465             Function: Adds the specified slots to the list of unwanted slots.
466             Example :
467             Returns : TRUE
468             Args : an array of slot names (strings)
469              
470             =cut
471              
472             sub add_unwanted_slot{
473 2     2 1 9 my ($self,@slots) = @_;
474              
475 2         4 my $myslots = $self->{'unwanted_slots'};
476 2         5 foreach my $slot (@slots) {
477 3 50       7 if(! grep { $slot eq $_; } @$myslots) {
  1         3  
478 3         7 push(@$myslots, $slot);
479             }
480             }
481 2         7 return 1;
482             }
483              
484             =head2 remove_unwanted_slots
485              
486             Title : remove_unwanted_slots
487             Usage :
488             Function: Removes the list of unwanted slots added previously through
489             add_unwanted_slots().
490             Example :
491             Returns : the previous list of unwanted slot names
492             Args : none
493              
494             =cut
495              
496             sub remove_unwanted_slots{
497 2     2 1 4 my $self = shift;
498 2         6 my @slots = $self->get_unwanted_slots();
499 2         3 $self->{'unwanted_slots'} = [];
500 2         4 return @slots;
501             }
502              
503             =head2 want_none
504              
505             Title : want_none
506             Usage :
507             Function: Disables all slots. After calling this method, want_slot()
508             will return FALSE regardless of slot name.
509              
510             This is different from removed_wanted_slots() in that it
511             also sets want_all() to FALSE. Note that it also resets the
512             list of unwanted slots in order to avoid slots being in
513             both lists.
514              
515             Example :
516             Returns : TRUE
517             Args : none
518              
519             =cut
520              
521             sub want_none{
522 1     1 1 2 my $self = shift;
523              
524 1         3 $self->want_all(0);
525 1         4 $self->remove_wanted_slots();
526 1         2 $self->remove_unwanted_slots();
527 1         1 return 1;
528             }
529              
530             =head2 want_all
531              
532             Title : want_all
533             Usage : $obj->want_all($newval)
534             Function: Whether or not this sequence object builder wants to
535             populate all slots that the object has. Whether an object
536             supports a slot is generally determined by what can()
537             returns. You can add additional 'virtual' slots by calling
538             add_wanted_slot.
539              
540             This will be ON by default. Call $obj->want_none() to
541             disable all slots.
542              
543             Example :
544             Returns : TRUE if this builder wants to populate all slots, and
545             FALSE otherwise.
546             Args : on set, new value (a scalar or undef, optional)
547              
548             =cut
549              
550             sub want_all{
551 875     875 1 1018 my $self = shift;
552              
553 875 100       2324 return $self->{'want_all'} = shift if @_;
554 439         906 return $self->{'want_all'};
555             }
556              
557             =head2 get_object_conditions
558              
559             Title : get_object_conditions
560             Usage :
561             Function: Get the list of conditions an object must meet in order to
562             be 'wanted.' See want_object() for where this is used.
563              
564             Conditions in this implementation are closures (anonymous
565             functions) which are passed one parameter, a hash reference
566             the keys of which are equal to initialization
567             paramaters. The closure must return TRUE to make the object
568             'wanted.'
569              
570             Conditions will be implicitly ANDed.
571              
572             Example :
573             Returns : a list of closures
574             Args : none
575              
576             =cut
577              
578             sub get_object_conditions{
579 335     335 1 335 my $self = shift;
580              
581 335         310 return @{$self->{'object_conds'}};
  335         873  
582             }
583              
584             =head2 add_object_condition
585              
586             Title : add_object_condition
587             Usage :
588             Function: Adds a condition an object must meet in order to be 'wanted.'
589             See want_object() for where this is used.
590              
591             Conditions in this implementation must be closures
592             (anonymous functions). These will be passed one parameter,
593             which is a hash reference with the sequence object
594             initialization parameters being the keys.
595              
596             Conditions are implicitly ANDed. If you want other
597             operators, perform those tests inside of one closure
598             instead of multiple. This will also be more efficient.
599              
600             Example :
601             Returns : TRUE
602             Args : the list of conditions
603              
604             =cut
605              
606             sub add_object_condition{
607 2     2 1 2 my ($self,@conds) = @_;
608              
609 2 50       3 if(grep { ref($_) ne 'CODE'; } @conds) {
  2         7  
610 0         0 $self->throw("conditions against which to validate an object ".
611             "must be anonymous code blocks");
612             }
613 2         2 push(@{$self->{'object_conds'}}, @conds);
  2         4  
614 2         3 return 1;
615             }
616              
617             =head2 remove_object_conditions
618              
619             Title : remove_object_conditions
620             Usage :
621             Function: Removes the conditions an object must meet in order to be
622             'wanted.'
623             Example :
624             Returns : The list of previously set conditions (an array of closures)
625             Args : none
626              
627             =cut
628              
629             sub remove_object_conditions{
630 0     0 1 0 my $self = shift;
631 0         0 my @conds = $self->get_object_conditions();
632 0         0 $self->{'object_conds'} = [];
633 0         0 return @conds;
634             }
635              
636             =head1 Methods to control what type of object is built
637              
638             =cut
639              
640             =head2 sequence_factory
641              
642             Title : sequence_factory
643             Usage : $obj->sequence_factory($newval)
644             Function: Get/set the sequence factory to be used by this object
645             builder.
646             Example :
647             Returns : the Bio::Factory::SequenceFactoryI implementing object to use
648             Args : on set, new value (a Bio::Factory::SequenceFactoryI
649             implementing object or undef, optional)
650              
651             =cut
652              
653             sub sequence_factory{
654 1120     1120 1 1204 my $self = shift;
655              
656 1120 100       2157 if(@_) {
657 410         599 delete $self->{'_objskel'};
658 410         1068 return $self->{'sequence_factory'} = shift;
659             }
660 710         2303 return $self->{'sequence_factory'};
661             }
662              
663             1;