File Coverage

Bio/Tools/AmpliconSearch.pm
Criterion Covered Total %
statement 145 155 93.5
branch 56 66 84.8
condition 15 21 71.4
subroutine 20 20 100.0
pod 8 8 100.0
total 244 270 90.3


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Tools::AmpliconSearch
2             #
3             # Copyright Florent Angly
4             #
5             # You may distribute this module under the same terms as perl itself
6              
7              
8             package Bio::Tools::AmpliconSearch;
9              
10 1     1   957 use strict;
  1         2  
  1         24  
11 1     1   4 use warnings;
  1         2  
  1         26  
12 1     1   5 use Bio::Tools::IUPAC;
  1         1  
  1         16  
13 1     1   209 use Bio::SeqFeature::Amplicon;
  1         2  
  1         29  
14 1     1   275 use Bio::Tools::SeqPattern;
  1         2  
  1         31  
15             # we require Bio::SeqIO
16             # and Bio::SeqFeature::Primer
17              
18 1     1   6 use base qw(Bio::Root::Root);
  1         2  
  1         1326  
19              
20             my $template_str;
21              
22              
23             =head1 NAME
24              
25             Bio::Tools::AmpliconSearch - Find amplicons in a template using degenerate PCR primers
26              
27             =head1 SYNOPSIS
28              
29             use Bio::PrimarySeq;
30             use Bio::Tools::AmpliconSearch;
31              
32             my $template = Bio::PrimarySeq->new(
33             -seq => 'aaaaaCCCCaaaaaaaaaaTTTTTTaaaaaCCACaaaaaTTTTTTaaaaaaaaaa',
34             );
35             my $fwd_primer = Bio::PrimarySeq->new(
36             -seq => 'CCNC',
37             );
38             my $rev_primer = Bio::PrimarySeq->new(
39             -seq => 'AAAAA',
40             );
41              
42             my $search = Bio::Tools::AmpliconSearch->new(
43             -template => $template,
44             -fwd_primer => $fwd_primer,
45             -rev_primer => $rev_primer,
46             );
47            
48             while (my $amplicon = $search->next_amplicon) {
49             print "Found amplicon at position ".$amplicon->start.'..'.$amplicon->end.":\n";
50             print $amplicon->seq->seq."\n\n";
51             }
52              
53             # Now change the template (but you could change the primers instead) and look
54             # for amplicons again
55              
56             $template = Bio::PrimarySeq->new(
57             -seq => 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
58             );
59             $search->template($template);
60              
61             while (my $amplicon = $search->next_amplicon) {
62             print "Found amplicon at position ".$amplicon->start.'..'.$amplicon->end.":\n";
63             print $amplicon->seq->seq."\n\n";
64             }
65              
66             =head1 DESCRIPTION
67              
68             Perform an in silico PCR reaction, i.e. search for amplicons in a given template
69             sequence using the specified degenerate primer.
70              
71             The template sequence is a sequence object, e.g. L, and the primers
72             can be a sequence or a L object and contain ambiguous
73             residues as defined in the IUPAC conventions. The primer sequences are converted
74             into regular expressions using L and the matching regions of
75             the template sequence, i.e. the amplicons, are returned as L
76             objects.
77              
78             AmpliconSearch will look for amplicons on both strands (forward and reverse-
79             complement) of the specified template sequence. If the reverse primer is not
80             provided, an amplicon will be returned and span a match of the forward primer to
81             the end of the template. Similarly, when no forward primer is given, match from
82             the beginning of the template sequence. When several amplicons overlap, only the
83             shortest one to more accurately represent the biases of PCR. Future improvements
84             may include modelling the effects of the number of PCR cycles or temperature on
85             the PCR products.
86              
87             =head1 TODO
88              
89             Future improvements may include:
90              
91             =over
92              
93             =item *
94              
95             Allowing a small number of primer mismatches
96              
97             =item *
98              
99             Reporting all amplicons, including overlapping ones
100              
101             =item *
102              
103             Putting a limit on the length of amplicons, in accordance with the processivity
104             of the polymerase used
105              
106             =back
107              
108             =head1 FEEDBACK
109              
110             =head2 Mailing Lists
111              
112             User feedback is an integral part of the evolution of this and other
113             Bioperl modules. Send your comments and suggestions preferably to one
114             of the Bioperl mailing lists. Your participation is much appreciated.
115              
116             bioperl-l@bioperl.org - General discussion
117             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
118              
119             =head2 Support
120              
121             Please direct usage questions or support issues to the mailing list:
122              
123             I
124              
125             rather than to the module maintainer directly. Many experienced and
126             reponsive experts will be able look at the problem and quickly
127             address it. Please include a thorough description of the problem
128             with code and data examples if at all possible.
129              
130             =head2 Reporting Bugs
131              
132             Report bugs to the Bioperl bug tracking system to help us keep track
133             the bugs and their resolution. Bug reports can be submitted via the
134             web:
135              
136             https://github.com/bioperl/bioperl-live/issues
137              
138             =head1 AUTHOR
139              
140             Florent Angly
141              
142             =head1 APPENDIX
143              
144             The rest of the documentation details each of the object
145             methods. Internal methods are usually preceded with a _
146              
147             =head2 new
148              
149             Title : new
150             Usage : my $search = Bio::Tools::AmpliconSearch->new( );
151             Function : Initialize an amplicon search
152             Args : -template Sequence object for the template sequence. This object
153             will be converted to Bio::Seq if needed in since features
154             (amplicons and primers) will be added to this object.
155             -fwd_primer A sequence object representing the forward primer
156             -rev_primer A sequence object representing the reverse primer
157             -primer_file Read primers from a sequence file. It replaces
158             -fwd_primer and -rev_primer (optional)
159             -attach_primers Whether or not to attach primers to Amplicon objects. Default: 0 (off)
160             Returns : A Bio::Tools::AmpliconSearch object
161              
162             =cut
163              
164             sub new {
165 17     17 1 322 my ($class, @args) = @_;
166 17         64 my $self = $class->SUPER::new(@args);
167 17         77 my ($template, $primer_file, $fwd_primer, $rev_primer, $attach_primers) =
168             $self->_rearrange([qw(TEMPLATE PRIMER_FILE FWD_PRIMER REV_PRIMER ATTACH_PRIMERS)],
169             @args);
170              
171             # Get primers
172 17 100       51 if (defined $primer_file) {
173 3         11 $self->primer_file($primer_file);
174             } else {
175 14   100     53 $self->fwd_primer($fwd_primer || '');
176 14   100     46 $self->rev_primer($rev_primer || '');
177             }
178              
179             # Get template sequence
180 17 100       58 $self->template($template) if defined $template;
181              
182 17 100       38 $self->attach_primers($attach_primers) if defined $attach_primers;
183              
184 17         58 return $self;
185             }
186              
187              
188             =head2 template
189              
190             Title : template
191             Usage : my $template = $search->template;
192             Function : Get/set the template sequence. Setting a new template resets any
193             search in progress.
194             Args : Optional Bio::Seq object
195             Returns : A Bio::Seq object
196              
197             =cut
198              
199             sub template {
200 83     83 1 119 my ($self, $template) = @_;
201 83 100       124 if (defined $template) {
202 17 50 33     78 if ( not(ref $template) || not $template->isa('Bio::PrimarySeqI') ) {
203             # Not a Bio::Seq or Bio::PrimarySeq
204 0         0 $self->throw("Expected a sequence object as input but got a '".ref($template)."'\n");
205             }
206 17 50       71 if (not $template->isa('Bio::SeqI')) {
207             # Convert sequence object to Bio::Seq Seq so that features can be added
208 17         19 my $primary_seq = $template;
209 17         61 $template = Bio::Seq->new();
210 17         41 $template->primary_seq($primary_seq);
211             }
212 17         28 $self->{template} = $template;
213             # Reset search in progress
214 17         26 $template_str = undef;
215             }
216 83         246 return $self->{template};
217             }
218              
219              
220             =head2 fwd_primer
221              
222             Title : fwd_primer
223             Usage : my $primer = $search->fwd_primer;
224             Function : Get/set the forward primer. Setting a new forward primer resets any
225             search in progress.
226             Args : Optional sequence object or primer object or '' to match beginning
227             of sequence.
228             Returns : A sequence object or primer object or undef
229              
230             =cut
231              
232             sub fwd_primer {
233 46     46 1 74 my ($self, $primer) = @_;
234 46 100       81 if (defined $primer) {
235 19         42 $self->_set_primer('fwd', $primer);
236             }
237 46         101 return $self->{fwd_primer};
238             }
239              
240              
241             =head2 rev_primer
242              
243             Title : rev_primer
244             Usage : my $primer = $search->rev_primer;
245             Function : Get/set the reverse primer. Setting a new reverse primer resets any
246             search in progress.
247             Args : Optional sequence object or primer object or '' to match end of
248             sequence.
249             Returns : A sequence object or primer object or undef
250              
251             =cut
252              
253             sub rev_primer {
254 29     29 1 52 my ($self, $primer) = @_;
255 29 100       50 if (defined $primer) {
256 19         32 $self->_set_primer('rev', $primer);
257             }
258 29         73 return $self->{rev_primer};
259             }
260              
261              
262             sub _set_primer {
263             # Save a primer (sequence object) and convert it to regexp. Type is 'fwd' for
264             # the forward primer or 'rev' for the reverse primer.
265 38     38   58 my ($self, $type, $primer) = @_;
266 38         41 my $re;
267 38         41 my $match_rna = 1;
268 38 100       79 if ($primer eq '') {
269 7 100       15 $re = $type eq 'fwd' ? '^' : '$';
270             } else {
271 31 50 66     147 if ( not(ref $primer) || (
      66        
272             not($primer->isa('Bio::PrimarySeqI')) &&
273             not($primer->isa('Bio::SeqFeature::Primer')) ) ) {
274 0         0 $self->throw('Expected a sequence or primer object as input but got a '.ref($primer)."\n");
275             }
276 31         180 $self->{$type.'_primer'} = $primer;
277 31 100       110 my $seq = $primer->isa('Bio::SeqFeature::Primer') ? $primer->seq : $primer;
278 31 100       182 $re = Bio::Tools::IUPAC->new(
279             -seq => $type eq 'fwd' ? $seq : $seq->revcom,
280             )->regexp($match_rna);
281             }
282 38         114 $self->{$type.'_regexp'} = $re;
283             # Reset search in progress
284 38         70 $template_str = undef;
285 38         57 $self->{regexp} = undef;
286 38         73 return $self->{$type.'_primer'};
287             }
288              
289              
290             =head2 primer_file
291              
292             Title : primer_file
293             Usage : my ($fwd, $rev) = $search->primer_file;
294             Function : Get/set a sequence file to read the primer from. The first sequence
295             must be the forward primer, and the second is the optional reverse
296             primer. After reading the file, the primers are set using fwd_primer()
297             and rev_primer() and returned.
298             Args : Sequence file
299             Returns : Array containing forward and reverse primers as sequence objects.
300              
301             =cut
302              
303             sub primer_file {
304 3     3 1 8 my ($self, $primer_file) = @_;
305             # Read primer file and convert primers into regular expressions to catch
306             # amplicons present in the database
307              
308 3 50       8 if (not defined $primer_file) {
309 0         0 $self->throw("Need to provide an input file\n");
310             }
311              
312             # Mandatory first primer
313 3         748 require Bio::SeqIO;
314 3         18 my $in = Bio::SeqIO->new( -file => $primer_file );
315 3         7 my $fwd_primer = $in->next_seq;
316 3 50       8 if (not defined $fwd_primer) {
317 0         0 $self->throw("The file '$primer_file' contains no primers\n");
318             }
319 3         12 $fwd_primer->alphabet('dna'); # Force the alphabet since degenerate primers can look like protein sequences
320              
321             # Optional reverse primers
322 3         35 my $rev_primer = $in->next_seq;
323 3 100       6 if (defined $rev_primer) {
324 2         6 $rev_primer->alphabet('dna');
325             } else {
326 1         3 $rev_primer = '';
327             }
328            
329 3         14 $in->close;
330              
331 3         11 $self->fwd_primer($fwd_primer);
332 3         9 $self->rev_primer($rev_primer);
333              
334 3         9 return ($fwd_primer, $rev_primer);
335             }
336              
337              
338             =head2 attach_primers
339              
340             Title : attach_primers
341             Usage : my $attached = $search->attach_primers;
342             Function : Get/set whether or not to attach primer objects to the amplicon
343             objects.
344             Args : Optional integer (1 for yes, 0 for no)
345             Returns : Integer (1 for yes, 0 for no)
346              
347             =cut
348              
349             sub attach_primers {
350 23     23 1 33 my ($self, $attach) = @_;
351 23 100       43 if (defined $attach) {
352 2         4 $self->{attach_primers} = $attach;
353 2         19 require Bio::SeqFeature::Primer;
354             }
355 23   100     84 return $self->{attach_primers} || 0;
356             }
357              
358              
359             =head2 next_amplicon
360              
361             Title : next_amplicon
362             Usage : my $amplicon = $search->next_amplicon;
363             Function : Get the next amplicon
364             Args : None
365             Returns : A Bio::SeqFeature::Amplicon object
366              
367             =cut
368              
369             sub next_amplicon {
370 40     40 1 1042 my ($self) = @_;
371              
372             # Initialize search
373 40 100       82 if (not defined $template_str) {
374 18         41 $self->_init;
375             }
376              
377 40         70 my $re = $self->_regexp;
378              
379 40         47 my $amplicon;
380 40 100       356 if ($template_str =~ m/$re/g) {
381 21         69 my ($match, $rev_match) = ($1, $2);
382 21 100       39 my $strand = $rev_match ? -1 : 1;
383 21   66     46 $match = $match || $rev_match;
384 21         27 my $end = pos($template_str);
385 21         33 my $start = $end - length($match) + 1;
386 21         46 $amplicon = $self->_attach_amplicon($start, $end, $strand);
387             }
388              
389             # If no more matches. Make sure calls to next_amplicon() will return undef.
390 40 100       68 if (not $amplicon) {
391 19         33 $template_str = '';
392             }
393              
394 40         146 return $amplicon;
395             }
396              
397              
398             sub _init {
399 18     18   26 my ($self) = @_;
400             # Sanity checks
401 18 50       38 if ( not $self->template ) {
402 0         0 $self->throw('Need to provide a template sequence');
403             }
404 18 50 66     33 if ( not($self->fwd_primer) && not($self->rev_primer) ) {
405 0         0 $self->throw('Need to provide at least a primer');
406             }
407             # Set the template sequence string
408 18         38 $template_str = $self->template->seq;
409             # Set the regular expression to match amplicons
410 18         45 $self->_regexp;
411              
412 18         28 return 1;
413             }
414              
415              
416             sub _regexp {
417             # Get the regexp to match amplicon. If the regexp is not set, initialize it.
418 58     58   91 my ($self, $regexp) = @_;
419              
420 58 100       108 if ( not defined $self->{regexp} ) {
421             # Build regexp that matches amplicons on both strands and reports shortest
422             # amplicon when there are several overlapping amplicons
423              
424 17         34 my $fwd_regexp = $self->_fwd_regexp;
425 17         33 my $rev_regexp = $self->_rev_regexp;
426              
427 17         27 my ($fwd_regexp_rc, $basic_fwd_match, $rev_regexp_rc, $basic_rev_match);
428 17 100       32 if ($fwd_regexp eq '^') {
429 1         2 $fwd_regexp_rc = '';
430 1         3 $basic_fwd_match = "(?:.*?$rev_regexp)";
431             } else {
432 16         76 $fwd_regexp_rc = Bio::Tools::SeqPattern->new(
433             -seq => $fwd_regexp,
434             -type => 'dna',
435             )->revcom->str;
436 16         43 $basic_fwd_match = "(?:$fwd_regexp.*?$rev_regexp)";
437             }
438              
439 17 100       46 if ($rev_regexp eq '$') {
440 2         4 $rev_regexp_rc = '';
441 2         5 $basic_rev_match = "(?:.*?$fwd_regexp_rc)";
442             } else {
443 15         45 $rev_regexp_rc = Bio::Tools::SeqPattern->new(
444             -seq => $rev_regexp,
445             -type => 'dna',
446             )->revcom->str;
447 15         38 $basic_rev_match = "(?:$rev_regexp_rc.*?$fwd_regexp_rc)";
448             }
449              
450 17 100       61 my $fwd_exclude = "(?!$basic_rev_match".
451             ($fwd_regexp eq '^' ? '' : "|$fwd_regexp").
452             ")";
453              
454 17 100       44 my $rev_exclude = "(?!$basic_fwd_match".
455             ($rev_regexp eq '$' ? '' : "|$rev_regexp_rc").
456             ')';
457              
458 17         942 $self->{regexp} = qr/
459             ( $fwd_regexp (?:$fwd_exclude.)*? $rev_regexp ) |
460             ( $rev_regexp_rc (?:$rev_exclude.)*? $fwd_regexp_rc )
461             /xi;
462             }
463              
464 58         110 return $self->{regexp};
465             }
466              
467              
468             =head2 annotate_template
469              
470             Title : annotate_template
471             Usage : my $template = $search->annotate_template;
472             Function : Search for all amplicons and attach them to the template.
473             This is equivalent to running:
474             while (my $amplicon = $self->next_amplicon) {
475             # do something
476             }
477             my $annotated = $self->template;
478             Args : None
479             Returns : A Bio::Seq object with attached Bio::SeqFeature::Amplicons (and
480             Bio::SeqFeature::Primers if you set -attach_primers to 1).
481              
482             =cut
483              
484             sub annotate_template {
485 1     1 1 2 my ($self) = @_;
486             # Search all amplicons and attach them to template
487 1         4 1 while $self->next_amplicon;
488             # Return annotated template
489 1         4 return $self->template;
490             }
491              
492              
493             sub _fwd_regexp {
494 17     17   23 my ($self) = @_;
495 17         30 return $self->{fwd_regexp};
496             }
497              
498              
499             sub _rev_regexp {
500 17     17   25 my ($self) = @_;
501 17         27 return $self->{rev_regexp};
502             }
503              
504              
505             sub _attach_amplicon {
506             # Create an amplicon object and attach it to template
507 21     21   41 my ($self, $start, $end, $strand) = @_;
508              
509             # Create Bio::SeqFeature::Amplicon feature and attach it to the template
510 21         45 my $amplicon = Bio::SeqFeature::Amplicon->new(
511             -start => $start,
512             -end => $end,
513             -strand => $strand,
514             -template => $self->template,
515             );
516              
517             # Create Bio::SeqFeature::Primer feature and attach them to the amplicon
518 21 100       57 if ($self->attach_primers) {
519 1         4 for my $type ('fwd', 'rev') {
520 2         4 my ($pstart, $pend, $pstrand, $primer_seq);
521              
522             # Coordinates relative to amplicon
523 2 100       5 if ($type eq 'fwd') {
524             # Forward primer
525 1         3 $primer_seq = $self->fwd_primer;
526 1 50       4 next if not defined $primer_seq;
527 1         1 $pstart = 1;
528 1         6 $pend = $primer_seq->length;
529 1         3 $pstrand = $amplicon->strand;
530             } else {
531             # Optional reverse primer
532 1         3 $primer_seq = $self->rev_primer;
533 1 50       3 next if not defined $primer_seq;
534 0         0 $pstart = $end - $primer_seq->length + 1;
535 0         0 $pend = $end;
536 0         0 $pstrand = -1 * $amplicon->strand;
537             }
538              
539             # Absolute coordinates needed
540 1         2 $pstart += $start - 1;
541 1         2 $pend += $start - 1;
542              
543 1         8 my $primer = Bio::SeqFeature::Primer->new(
544             -start => $pstart,
545             -end => $pend,
546             -strand => $pstrand,
547             -template => $amplicon,
548             );
549              
550             # Attach primer to amplicon
551 1 50       3 if ($type eq 'fwd') {
552 1         4 $amplicon->fwd_primer($primer);
553             } else {
554 0         0 $amplicon->rev_primer($primer);
555             }
556              
557             }
558             }
559              
560 21         44 return $amplicon;
561             }
562              
563              
564             1;