| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # BioPerl module for Bio::Tools::SiRNA | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # Please direct questions and support issues to | 
| 5 |  |  |  |  |  |  | # | 
| 6 |  |  |  |  |  |  | # Cared for by Donald Jackson, donald.jackson@bms.com | 
| 7 |  |  |  |  |  |  | # | 
| 8 |  |  |  |  |  |  | # Copyright Bristol-Myers Squibb | 
| 9 |  |  |  |  |  |  | # | 
| 10 |  |  |  |  |  |  | # You may distribute this module under the same terms as perl itself | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | # POD documentation - main docs before the code | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =head1 NAME | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | SiRNA - Perl object for designing small inhibitory RNAs. | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | use Bio::Tools::SiRNA; | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | my $sirna_designer = Bio::Tools::SiRNA->new( -target => $bio_seq, | 
| 23 |  |  |  |  |  |  | -rules  => 'saigo' | 
| 24 |  |  |  |  |  |  | ); | 
| 25 |  |  |  |  |  |  | my @pairs = $sirna_designer->design; | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | foreach $pair (@pairs) { | 
| 28 |  |  |  |  |  |  | my $sense_oligo_sequence = $pair->sense->seq; | 
| 29 |  |  |  |  |  |  | my $antisense_oligo_sequence = $pair->antisense->seq; | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | # print out results | 
| 32 |  |  |  |  |  |  | print join ("\t", $pair->start, $pair->end, $pair->rank, | 
| 33 |  |  |  |  |  |  | $sense_oligo_sequence, $antisense_oligo_sequence), "\n"; | 
| 34 |  |  |  |  |  |  | } | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | Package for designing siRNA reagents. | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | Input is a L-compliant object (the target). | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | Output is a list of Bio::SeqFeature::SiRNA::Pair objects, which are | 
| 43 |  |  |  |  |  |  | added to the feature table of the target sequence.  Each | 
| 44 |  |  |  |  |  |  | Bio::SeqFeature::SiRNA::Pair contains two subfeatures | 
| 45 |  |  |  |  |  |  | (Bio::SeqFeature::Oligo objects) which correspond to the individual | 
| 46 |  |  |  |  |  |  | oligos.  These objects provide accessors for the information on the | 
| 47 |  |  |  |  |  |  | individual reagent pairs. | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | This verion of Bio::Tools::SiRNA represents a major change in architecture. | 
| 50 |  |  |  |  |  |  | Specific 'rulesets' for siRNA selection as developed by various groups are | 
| 51 |  |  |  |  |  |  | implemented as Bio::Tools::SiRNA::Ruleset objects, which inherit from | 
| 52 |  |  |  |  |  |  | Bio::Tools::SiRNA.  This will make it easier to add new rule sets or modify | 
| 53 |  |  |  |  |  |  | existing approaches. Currently the Tuschl and Ui-Tei (2004) rules are | 
| 54 |  |  |  |  |  |  | implemented. For consistency, the Tuschl rules are implemented by default. | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | In addition, this module provides three 'extra' rules which can be added | 
| 57 |  |  |  |  |  |  | above and beyond any ruleset. | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | =over 3 | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | =item 1. | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | SiRNAs that overlap known SNPs (identified as SeqFeatures with | 
| 64 |  |  |  |  |  |  | primary tag = variation) can be avoided. | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | =item 2. | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | Other regions (with primary tag = 'Excluded') can also be skipped.  I | 
| 69 |  |  |  |  |  |  | use this with Bio::Tools::Run::Mdust to avoid low-complexity regions | 
| 70 |  |  |  |  |  |  | (must be run separately), but other programs could also be used. | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | =item 3. | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | SiRNAs may also be selected in the 3 prime UTR of a gene by setting | 
| 75 |  |  |  |  |  |  | $sirna_designer-Einclude_3pr() to true. | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | =back | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | =head2 EXPORT | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | None. | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | L, L, | 
| 86 |  |  |  |  |  |  | L.. | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | =head1 FEEDBACK | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | =head2 Mailing Lists | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | User feedback is an integral part of the evolution of this and other | 
| 93 |  |  |  |  |  |  | Bioperl modules. Send your comments and suggestions preferably to | 
| 94 |  |  |  |  |  |  | the Bioperl mailing list.  Your participation is much appreciated. | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | bioperl-l@bioperl.org                  - General discussion | 
| 97 |  |  |  |  |  |  | http://bioperl.org/wiki/Mailing_lists  - About the mailing lists | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | =head2 Support | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | Please direct usage questions or support issues to the mailing list: | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | I | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | rather than to the module maintainer directly. Many experienced and | 
| 106 |  |  |  |  |  |  | reponsive experts will be able look at the problem and quickly | 
| 107 |  |  |  |  |  |  | address it. Please include a thorough description of the problem | 
| 108 |  |  |  |  |  |  | with code and data examples if at all possible. | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | =head2 Reporting Bugs | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | Report bugs to the Bioperl bug tracking system to help us keep track | 
| 113 |  |  |  |  |  |  | of the bugs and their resolution. Bug reports can be submitted via | 
| 114 |  |  |  |  |  |  | the web: | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | https://github.com/bioperl/bioperl-live/issues | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | =head1 AUTHOR | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | Donald Jackson (donald.jackson@bms.com) | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | =head1 APPENDIX | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | The rest of the documentation details each of the object methods. | 
| 125 |  |  |  |  |  |  | Internal methods are usually preceded with a _ | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | =cut | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | package Bio::Tools::SiRNA; | 
| 130 |  |  |  |  |  |  |  | 
| 131 | 1 |  |  | 1 |  | 401 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 22 |  | 
| 132 | 1 |  |  | 1 |  | 3 | use warnings; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 22 |  | 
| 133 |  |  |  |  |  |  |  | 
| 134 | 1 |  |  | 1 |  | 3 | use vars qw($AUTOLOAD); | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 135 |  |  |  |  |  |  |  | 
| 136 | 1 |  |  | 1 |  | 265 | use Bio::Seq::RichSeq; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 29 |  | 
| 137 | 1 |  |  | 1 |  | 331 | use Bio::SeqFeature::Generic; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 24 |  | 
| 138 | 1 |  |  | 1 |  | 285 | use Bio::SeqFeature::SiRNA::Oligo; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 25 |  | 
| 139 | 1 |  |  | 1 |  | 298 | use Bio::SeqFeature::SiRNA::Pair; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 29 |  | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  |  | 
| 142 | 1 |  |  | 1 |  | 4 | use base qw(Bio::Root::Root); | 
|  | 1 |  |  |  |  | 0 |  | 
|  | 1 |  |  |  |  | 1096 |  | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | our %COMP = ( A => 'T', | 
| 146 |  |  |  |  |  |  | T => 'A', | 
| 147 |  |  |  |  |  |  | C => 'G', | 
| 148 |  |  |  |  |  |  | G => 'C', | 
| 149 |  |  |  |  |  |  | N => 'N', | 
| 150 |  |  |  |  |  |  | ); | 
| 151 |  |  |  |  |  |  |  | 
| 152 |  |  |  |  |  |  | our @ARGNAMES = qw(RULES START_PAD END_PAD MIN_GC CUTOFF OLIGOS AVOID_SNPS | 
| 153 |  |  |  |  |  |  | GSTRING TMPDIR TARGET DEBUG); | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | =head2 new | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | Title		: new | 
| 159 |  |  |  |  |  |  | Usage		: my $sirna_designer = Bio::Tools::SiRNA->new(); | 
| 160 |  |  |  |  |  |  | Function	: Constructor for designer object | 
| 161 |  |  |  |  |  |  | Returns	: Bio::Tools::SiRNA object | 
| 162 |  |  |  |  |  |  | Args		: target - the target sequence for the SiRNAs as a Bio::Seq::RichSeq | 
| 163 |  |  |  |  |  |  | start_pad - distance from the CDS start to skip (default 75) | 
| 164 |  |  |  |  |  |  | end_pad - distance from the CDS end to skip (default 50) | 
| 165 |  |  |  |  |  |  | include_3pr - set to true to include SiRNAs in the 3prime UTR (default false) | 
| 166 |  |  |  |  |  |  | rules - rules for selecting siRNAs, currently supporting saigo and tuschl | 
| 167 |  |  |  |  |  |  | min_gc - minimum GC fraction (NOT percent) (default 0.4) | 
| 168 |  |  |  |  |  |  | max_gc - maximum GC fraction (NOT percent) (default 0.6) | 
| 169 |  |  |  |  |  |  | cutoff - worst 'rank' accepted(default 3) | 
| 170 |  |  |  |  |  |  | avoid_snps - boolean - reject oligos that overlap a variation | 
| 171 |  |  |  |  |  |  | SeqFeature in the target (default true) | 
| 172 |  |  |  |  |  |  | gstring - maximum allowed consecutive Gs. | 
| 173 |  |  |  |  |  |  | Too many can cause problems in synthesis (default 4) | 
| 174 |  |  |  |  |  |  | Note		: All arguments can also be changed/accessed using autoloaded | 
| 175 |  |  |  |  |  |  | methods such as: | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | my $start_pad = $sirna_designer->start_pad(). | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | =cut | 
| 180 |  |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | sub new { | 
| 182 | 1 |  |  | 1 | 1 | 698 | my ($proto, @args) = @_; | 
| 183 | 1 |  | 33 |  |  | 6 | my $pkg = ref($proto) || $proto; | 
| 184 |  |  |  |  |  |  |  | 
| 185 | 1 |  |  |  |  | 1 | my $self = {}; | 
| 186 | 1 |  |  |  |  | 2 | bless ($self, $pkg); | 
| 187 |  |  |  |  |  |  |  | 
| 188 | 1 |  |  |  |  | 1 | my %args; | 
| 189 |  |  |  |  |  |  |  | 
| 190 | 1 |  |  |  |  | 7 | @args{@ARGNAMES} = $self->_rearrange(\@ARGNAMES, @args); | 
| 191 |  |  |  |  |  |  |  | 
| 192 | 1 | 50 |  |  |  | 3 | if ($args{'RULES'}) { | 
| 193 | 0 |  |  |  |  | 0 | $self->rules($args{'RULES'}); | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  |  | 
| 196 | 1 |  | 50 |  |  | 8 | $self->{'start_pad'} = $args{'START_PAD'} || 75; # nt from start to mask | 
| 197 | 1 |  | 50 |  |  | 8 | $self->{'end_pad'} = $args{'END_PAD'} || 50; # nt from end to mask | 
| 198 | 1 |  | 50 |  |  | 5 | $self->{'include_3pr'} = $args{'INCLUDE_3PR'} || 0; # look for oligos in 3prime UTR | 
| 199 | 1 |  | 50 |  |  | 4 | $self->{'min_gc'} = $args{'MIN_GC'} || 0.40; | 
| 200 | 1 |  | 50 |  |  | 5 | $self->{'max_gc'} = $args{'MAX_GC'} || 0.60; | 
| 201 | 1 |  | 50 |  |  | 4 | $self->{'cutoff'} = $args{'CUTOFF'} || 3; # highest (worst) rank wanted | 
| 202 | 1 |  |  |  |  | 4 | $self->{'oligos'} = []; | 
| 203 |  |  |  |  |  |  | defined($args{'AVOID_SNPS'}) ? $self->{'avoid_snps'} = $args{'AVOID_SNPS'} : | 
| 204 | 1 | 50 |  |  |  | 5 | $self->{'avoid_snps'} = 1; # (t/f to avoid or include reagents that cover SNPs) | 
| 205 | 1 |  | 50 |  |  | 4 | $self->{'gstring'} = $args{'GSTRING'} || 4; # maximum allowed consecutive Gs - too many can cause problems in oligo synthesis | 
| 206 | 1 |  | 50 |  |  | 9 | $self->{'tmpdir'} = $args{'TMPDIR'}  || $ENV{'TMPDIR'} || $ENV{'TMP'} || ''; | 
| 207 | 1 |  | 50 |  |  | 4 | $self->{'debug'} = $args{'DEBUG'} || 0; | 
| 208 |  |  |  |  |  |  |  | 
| 209 | 1 | 50 |  |  |  | 5 | $self->target($args{'TARGET'}) if ($args{'TARGET'}); | 
| 210 |  |  |  |  |  |  |  | 
| 211 | 1 |  |  |  |  | 3 | return $self; | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | =head2 target | 
| 216 |  |  |  |  |  |  |  | 
| 217 |  |  |  |  |  |  | Title		: target | 
| 218 |  |  |  |  |  |  | Usage		: my $target_seq = $sirna_designer->target(); # get the current target | 
| 219 |  |  |  |  |  |  | OR | 
| 220 |  |  |  |  |  |  | $sirna_designer->target($new_target_seq); # set a new target | 
| 221 |  |  |  |  |  |  | Function	: Set/get the target as a Bio::SeqI-compliant object | 
| 222 |  |  |  |  |  |  | Returns	: a Bio::SeqI-compliant object | 
| 223 |  |  |  |  |  |  | Args		: a Bio::SeqI-compliant object (optional) | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | =cut | 
| 226 |  |  |  |  |  |  |  | 
| 227 |  |  |  |  |  |  | sub target { | 
| 228 | 338 |  |  | 338 | 1 | 638 | my ($self, $target) = @_; | 
| 229 |  |  |  |  |  |  |  | 
| 230 | 338 | 100 |  |  |  | 578 | if ($target) { | 
|  |  | 50 |  |  |  |  |  | 
| 231 | 2 | 50 |  |  |  | 9 | unless ($target->isa('Bio::SeqI')) { | 
| 232 | 0 |  |  |  |  | 0 | $self->throw(  -class => 'Bio::Root::BadParameter', | 
| 233 |  |  |  |  |  |  | -text  => "Target must be passed as a Bio::Seq object" ); | 
| 234 |  |  |  |  |  |  | } | 
| 235 | 2 | 100 |  |  |  | 18 | if ($target->can('molecule')) { | 
| 236 | 1 | 50 |  |  |  | 1 | ( grep { uc($target->molecule) eq $_ } qw(DNA MRNA CDNA)) or | 
|  | 3 |  |  |  |  | 5 |  | 
| 237 |  |  |  |  |  |  | $self->throw(  -class => 'Bio::Root::BadParameter', | 
| 238 |  |  |  |  |  |  | -text  =>  "Sequences of type ". $target->molecule. " are not supported" | 
| 239 |  |  |  |  |  |  | ); | 
| 240 |  |  |  |  |  |  | } | 
| 241 |  |  |  |  |  |  | else { | 
| 242 | 1 | 50 |  |  |  | 4 | ($target->alphabet eq 'dna') or | 
| 243 |  |  |  |  |  |  | $self->throw(  -class => 'Bio::Root::BadParameter', | 
| 244 |  |  |  |  |  |  | -text  =>  "Sequences of alphabet ". $target->alphabet. " are not supported" | 
| 245 |  |  |  |  |  |  | ); | 
| 246 |  |  |  |  |  |  | } | 
| 247 |  |  |  |  |  |  |  | 
| 248 | 2 |  |  |  |  | 4 | $self->{'target'} = $target; | 
| 249 | 2 |  |  |  |  | 7 | return 1; | 
| 250 |  |  |  |  |  |  |  | 
| 251 |  |  |  |  |  |  | } | 
| 252 |  |  |  |  |  |  | elsif ($self->{'target'}) { | 
| 253 | 336 |  |  |  |  | 836 | return $self->{'target'}; | 
| 254 |  |  |  |  |  |  | } | 
| 255 |  |  |  |  |  |  | else { | 
| 256 | 0 |  |  |  |  | 0 | $self->throw("Target sequence not defined"); | 
| 257 |  |  |  |  |  |  | } | 
| 258 |  |  |  |  |  |  | } | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  | =head2 rules | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | Title	: rules | 
| 263 |  |  |  |  |  |  | Usage	: $sirna->rules('ruleset') | 
| 264 |  |  |  |  |  |  | Purpose	: set/get ruleset to use for selecting SiRNA oligo pairs. | 
| 265 |  |  |  |  |  |  | Returns	: not sure yet | 
| 266 |  |  |  |  |  |  | Args	: a ruleset name (currently supported: Tuschl, Saigo) | 
| 267 |  |  |  |  |  |  | or a Bio::Tools::SiRNA::RulesetI compliant object | 
| 268 |  |  |  |  |  |  |  | 
| 269 |  |  |  |  |  |  | =cut | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | sub rules { | 
| 272 | 3 |  |  | 3 | 1 | 3 | my ($self, $rules) = @_; | 
| 273 |  |  |  |  |  |  |  | 
| 274 | 3 | 50 |  |  |  | 9 | if ($rules) { | 
| 275 | 0 |  |  |  |  | 0 | $self->_load_ruleset($rules); | 
| 276 |  |  |  |  |  |  | } | 
| 277 |  |  |  |  |  |  | # default: use tuschl rules | 
| 278 | 3 | 100 |  |  |  | 8 | unless ($self->{_rules}) { | 
| 279 | 1 |  |  |  |  | 3 | $self->_load_ruleset('tuschl'); | 
| 280 |  |  |  |  |  |  | } | 
| 281 | 3 |  |  |  |  | 15 | return $self->{_rules}; | 
| 282 |  |  |  |  |  |  | } | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | sub _load_ruleset { | 
| 285 | 1 |  |  | 1 |  | 2 | my ($self, $ruleset) = @_; | 
| 286 |  |  |  |  |  |  |  | 
| 287 | 1 |  |  |  |  | 3 | my $rule_module = join('::', ref($self), 'Ruleset', lc($ruleset)); | 
| 288 |  |  |  |  |  |  |  | 
| 289 | 1 |  |  |  |  | 84 | eval "require $rule_module"; | 
| 290 |  |  |  |  |  |  |  | 
| 291 | 1 | 50 |  |  |  | 5 | if ($@) { | 
| 292 |  |  |  |  |  |  | #warn join("\n", '@INC contains:', @INC, undef); | 
| 293 | 0 |  |  |  |  | 0 | $self->throw("Unable to load $rule_module: $@"); | 
| 294 | 0 |  |  |  |  | 0 | return; | 
| 295 |  |  |  |  |  |  | } | 
| 296 |  |  |  |  |  |  |  | 
| 297 |  |  |  |  |  |  | else { | 
| 298 | 1 |  |  |  |  | 3 | $self->{_rules} = $rule_module; | 
| 299 | 1 |  |  |  |  | 2 | bless($self, $rule_module); # recast as subclass | 
| 300 |  |  |  |  |  |  | } | 
| 301 |  |  |  |  |  |  |  | 
| 302 | 1 |  |  |  |  | 1 | return 1; | 
| 303 |  |  |  |  |  |  | } | 
| 304 |  |  |  |  |  |  |  | 
| 305 |  |  |  |  |  |  | =head2 design | 
| 306 |  |  |  |  |  |  |  | 
| 307 |  |  |  |  |  |  | Title		: design | 
| 308 |  |  |  |  |  |  | Usage		: my @pairs = $sirna_designer->design(); | 
| 309 |  |  |  |  |  |  | Purpose	: Design SiRNA oligo pairs. | 
| 310 |  |  |  |  |  |  | Returns	: A list of SiRNA pairs as Bio::SeqFeature::SiRNA::Pair objects | 
| 311 |  |  |  |  |  |  | Args		: none | 
| 312 |  |  |  |  |  |  |  | 
| 313 |  |  |  |  |  |  | =cut | 
| 314 |  |  |  |  |  |  |  | 
| 315 |  |  |  |  |  |  | sub design { | 
| 316 | 3 |  |  | 3 | 1 | 357 | my ($self) = @_; | 
| 317 |  |  |  |  |  |  |  | 
| 318 | 3 | 50 |  |  |  | 9 | ($self->rules) or $self->throw('Unable to design siRNAs: no rule set specified'); | 
| 319 |  |  |  |  |  |  |  | 
| 320 |  |  |  |  |  |  | #     unless ( grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures ) { | 
| 321 |  |  |  |  |  |  | # 	$self->_define_target(); | 
| 322 |  |  |  |  |  |  | #     } | 
| 323 |  |  |  |  |  |  |  | 
| 324 | 3 |  |  |  |  | 8 | my @oligos = $self->_get_oligos(); | 
| 325 |  |  |  |  |  |  |  | 
| 326 | 3 |  |  |  |  | 8 | return ( grep { $_->isa('Bio::SeqFeature::SiRNA::Pair') } $self->target->top_SeqFeatures ); | 
|  | 329 |  |  |  |  | 534 |  | 
| 327 |  |  |  |  |  |  | } | 
| 328 |  |  |  |  |  |  |  | 
| 329 |  |  |  |  |  |  | sub _define_target { | 
| 330 | 3 |  |  | 3 |  | 3 | my ($self) = @_; | 
| 331 | 3 |  |  |  |  | 4 | my ($feat, $cds, $left, $right); | 
| 332 |  |  |  |  |  |  |  | 
| 333 | 3 | 50 |  |  |  | 5 | my $target = $self->target or | 
| 334 |  |  |  |  |  |  | $self->throw("Unable to design oligos - no target provided"); | 
| 335 |  |  |  |  |  |  |  | 
| 336 | 3 | 50 |  |  |  | 15 | ($cds) = grep { $_->primary_tag eq 'CDS' } $target->top_SeqFeatures if ($target->can('top_SeqFeatures')); | 
|  | 14 |  |  |  |  | 17 |  | 
| 337 |  |  |  |  |  |  |  | 
| 338 | 3 | 100 |  |  |  | 7 | if ($cds) { | 
| 339 | 2 |  |  |  |  | 5 | $left = $cds->start + $self->start_pad; | 
| 340 | 2 | 100 |  |  |  | 7 | if (!$self->include_3pr) { | 
| 341 | 1 |  |  |  |  | 3 | $right = $cds->end - $self->end_pad; | 
| 342 |  |  |  |  |  |  | } | 
| 343 |  |  |  |  |  |  | else { | 
| 344 | 1 |  |  |  |  | 7 | $right = $target->length - $self->end_pad; | 
| 345 |  |  |  |  |  |  | } | 
| 346 |  |  |  |  |  |  | } | 
| 347 |  |  |  |  |  |  | else { | 
| 348 | 1 |  |  |  |  | 6 | $left = 0 + $self->start_pad; | 
| 349 | 1 |  |  |  |  | 4 | $right = $target->length - $self->end_pad; | 
| 350 |  |  |  |  |  |  | } | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | # is there anything left? | 
| 354 | 3 | 50 |  |  |  | 10 | if (($right - $left) < 20) { | 
| 355 | 0 |  |  |  |  | 0 | $self->throw("There isn't enough sequence to design oligos.  Please reduce start_pad and end_pad or supply more sequence"); | 
| 356 |  |  |  |  |  |  | } | 
| 357 |  |  |  |  |  |  | # define target region | 
| 358 | 3 |  |  |  |  | 18 | my $targregion = Bio::SeqFeature::Generic->new( -start 		=> $left, | 
| 359 |  |  |  |  |  |  | -end 		=> $right, | 
| 360 |  |  |  |  |  |  | -primary		=> 'Target' ); | 
| 361 | 3 |  |  |  |  | 8 | $self->target->add_SeqFeature($targregion); | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | # locate excluded regions | 
| 364 | 3 |  |  |  |  | 7 | my @excluded = grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures; | 
|  | 17 |  |  |  |  | 22 |  | 
| 365 |  |  |  |  |  |  |  | 
| 366 | 3 | 50 |  |  |  | 17 | if ($self->avoid_snps) { | 
| 367 | 3 |  |  |  |  | 8 | my @snps =  grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures; | 
|  | 17 |  |  |  |  | 20 |  | 
| 368 | 3 |  |  |  |  | 6 | push(@excluded, @snps); | 
| 369 |  |  |  |  |  |  | } | 
| 370 |  |  |  |  |  |  |  | 
| 371 | 3 |  |  |  |  | 13 | $self->excluded(\@excluded); | 
| 372 |  |  |  |  |  |  |  | 
| 373 | 3 |  |  |  |  | 8 | return $targregion; | 
| 374 |  |  |  |  |  |  | } | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | sub _get_targetregion { | 
| 377 | 9 |  |  | 9 |  | 12 | my ($self) = @_; | 
| 378 |  |  |  |  |  |  |  | 
| 379 | 9 |  |  |  |  | 24 | my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures; | 
|  | 177 |  |  |  |  | 204 |  | 
| 380 | 9 |  | 66 |  |  | 30 | $targregion ||= $self->_define_target; | 
| 381 |  |  |  |  |  |  |  | 
| 382 | 9 | 50 |  |  |  | 16 | $self->throw("Target region for SiRNA design not defined") unless ($targregion); | 
| 383 |  |  |  |  |  |  |  | 
| 384 | 9 |  |  |  |  | 28 | my $seq = $targregion->seq->seq; | 
| 385 |  |  |  |  |  |  | # but this way I loose start info | 
| 386 | 9 |  |  |  |  | 34 | my $targstart = $targregion->start; | 
| 387 |  |  |  |  |  |  |  | 
| 388 | 9 |  |  |  |  | 20 | return ($seq, $targstart); | 
| 389 |  |  |  |  |  |  | } | 
| 390 |  |  |  |  |  |  |  | 
| 391 |  |  |  |  |  |  | # MOVE to SiRNA::Ruleset::tuschl | 
| 392 |  |  |  |  |  |  | # sub _regex { | 
| 393 |  |  |  |  |  |  | #     my ($self, $rank) = @_; | 
| 394 |  |  |  |  |  |  | #     return $PATTERNS{$rank}; | 
| 395 |  |  |  |  |  |  | # } | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | # sub _get_oligos { | 
| 398 |  |  |  |  |  |  | #     # use regular expressions to pull out oligos | 
| 399 |  |  |  |  |  |  |  | 
| 400 |  |  |  |  |  |  | #     my ($self, $rank) = @_; | 
| 401 |  |  |  |  |  |  | #     my $regex = $self->_regex($rank); | 
| 402 |  |  |  |  |  |  | #     my @exclude; | 
| 403 |  |  |  |  |  |  |  | 
| 404 |  |  |  |  |  |  |  | 
| 405 |  |  |  |  |  |  | #     my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures; | 
| 406 |  |  |  |  |  |  | #     my $seq = $targregion->seq->seq; | 
| 407 |  |  |  |  |  |  | #     # but this way I loose start info | 
| 408 |  |  |  |  |  |  | #     my $targstart = $targregion->start; | 
| 409 |  |  |  |  |  |  |  | 
| 410 |  |  |  |  |  |  | #     # exclude masked region | 
| 411 |  |  |  |  |  |  | #     push(@exclude, grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures); | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | #     # add SNP checking | 
| 414 |  |  |  |  |  |  | #     if ($self->avoid_snps) { | 
| 415 |  |  |  |  |  |  | # 	my @snps =  grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures; | 
| 416 |  |  |  |  |  |  | # 	push(@exclude, @snps); | 
| 417 |  |  |  |  |  |  | #     } | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | #     while ( $seq =~ /$regex/gi ) { | 
| 420 |  |  |  |  |  |  | # 	my $target = $1; | 
| 421 |  |  |  |  |  |  |  | 
| 422 |  |  |  |  |  |  | # 	# check for too many Gs (or Cs on the other strand) | 
| 423 |  |  |  |  |  |  | # 	next if ( $target =~ /G{ $self->gstring,}/io ); | 
| 424 |  |  |  |  |  |  | # 	next if ( $target =~ /C{ $self->gstring,}/io ); | 
| 425 |  |  |  |  |  |  | # 	# skip Ns (for filtering) | 
| 426 |  |  |  |  |  |  | # 	next if ( $target =~ /N/i); | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | # 	my $start = length($`) + $targstart; | 
| 429 |  |  |  |  |  |  | # 	my $stop = $start + length($target) -1; | 
| 430 |  |  |  |  |  |  |  | 
| 431 |  |  |  |  |  |  | # 	my @gc = ( $target =~ /G|C/gi); | 
| 432 |  |  |  |  |  |  | # 	my $fxGC = sprintf("%2.2f", (scalar(@gc) / length($target))); | 
| 433 |  |  |  |  |  |  | # 	next if ($fxGC < $self->min_gc); | 
| 434 |  |  |  |  |  |  | # 	next if ($fxGC > $self->max_gc); | 
| 435 |  |  |  |  |  |  |  | 
| 436 |  |  |  |  |  |  | # 	my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start 		=> $start, | 
| 437 |  |  |  |  |  |  | # 							-end 		=> $stop, | 
| 438 |  |  |  |  |  |  | # 							-strand 	=> 1, | 
| 439 |  |  |  |  |  |  | # 							-seq 		=> _get_sense($target), | 
| 440 |  |  |  |  |  |  | # 							-source_tag	=> ref($self), | 
| 441 |  |  |  |  |  |  | # 						       ); | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | # 	my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start 	=> $start, | 
| 444 |  |  |  |  |  |  | # 							 -end		=> $stop, | 
| 445 |  |  |  |  |  |  | # 							 -strand	=> -1, | 
| 446 |  |  |  |  |  |  | # 							 -seq 		=> _get_anti($target), | 
| 447 |  |  |  |  |  |  | # 							 -source_tag	=> ref($self), | 
| 448 |  |  |  |  |  |  | # 							 ); | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  | #   	my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank 		=> $rank, | 
| 451 |  |  |  |  |  |  | # 						       -fxGC		=> $fxGC, | 
| 452 |  |  |  |  |  |  | # 						       -sense 		=> $sense, | 
| 453 |  |  |  |  |  |  | # 						       -antisense 	=> $asense, | 
| 454 |  |  |  |  |  |  | # 						       -source_tag	=> ref($self), | 
| 455 |  |  |  |  |  |  | # 						       ); | 
| 456 |  |  |  |  |  |  |  | 
| 457 |  |  |  |  |  |  | # 	unless ($self->_has_overlap($sirna, \@exclude)) { | 
| 458 |  |  |  |  |  |  | # 	    $self->target->add_SeqFeature($sirna); | 
| 459 |  |  |  |  |  |  | # 	} | 
| 460 |  |  |  |  |  |  | #     } | 
| 461 |  |  |  |  |  |  | # } | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | =head2 add_oligos | 
| 464 |  |  |  |  |  |  |  | 
| 465 |  |  |  |  |  |  | Title		: add_oligos | 
| 466 |  |  |  |  |  |  | Usage	 	: $sirna_designer->add_oligos($sequence, $start, $rank); | 
| 467 |  |  |  |  |  |  | Purpose	: Add SiRNA olgos to target Bio::Seq as Bio::SeqFeature::SiRNA::Pair objects | 
| 468 |  |  |  |  |  |  | Args		: Oligo sequence and start position (required), rank/score (optional) | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  | =cut | 
| 471 |  |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | sub add_oligos { | 
| 473 | 312 |  |  | 312 | 1 | 345 | my ($self, $seq, $start, $rank) = @_; | 
| 474 |  |  |  |  |  |  |  | 
| 475 | 312 | 50 |  |  |  | 365 | ($seq) or throw ('No sequence supplied for add_oligos'); | 
| 476 | 312 | 50 |  |  |  | 399 | (defined $start) or throw ('No start position specified for  add_oligos'); | 
| 477 |  |  |  |  |  |  |  | 
| 478 | 312 |  |  |  |  | 278 | my ($end) = $start + length($seq); | 
| 479 |  |  |  |  |  |  |  | 
| 480 | 312 |  |  |  |  | 495 | my ($sseq) = $self->_get_sense($seq); | 
| 481 | 312 |  |  |  |  | 1321 | my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start 		=> $start, | 
| 482 |  |  |  |  |  |  | -end 		=> ($start + length($sseq)), | 
| 483 |  |  |  |  |  |  | -strand 	=> 1, | 
| 484 |  |  |  |  |  |  | -seq 		=> $sseq, | 
| 485 |  |  |  |  |  |  | -source_tag	=> ref($self), | 
| 486 |  |  |  |  |  |  | ); | 
| 487 |  |  |  |  |  |  |  | 
| 488 | 312 |  |  |  |  | 724 | my $aseq = $self->_get_anti($seq); | 
| 489 | 312 |  |  |  |  | 1206 | my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start 		=> $end, | 
| 490 |  |  |  |  |  |  | -end		=> ($end - length($aseq)), | 
| 491 |  |  |  |  |  |  | -strand		=> -1, | 
| 492 |  |  |  |  |  |  | -seq 		=> $aseq, | 
| 493 |  |  |  |  |  |  | -source_tag	=> ref($self), | 
| 494 |  |  |  |  |  |  | ); | 
| 495 |  |  |  |  |  |  |  | 
| 496 | 312 |  |  |  |  | 1142 | my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank 		=> $rank, | 
| 497 |  |  |  |  |  |  | # -fxGC		=> $fxGC, | 
| 498 |  |  |  |  |  |  | -sense 		=> $sense, | 
| 499 |  |  |  |  |  |  | -antisense 	=> $asense, | 
| 500 |  |  |  |  |  |  | -source_tag	=> ref($self), | 
| 501 |  |  |  |  |  |  | ); | 
| 502 |  |  |  |  |  |  |  | 
| 503 | 312 | 50 |  |  |  | 1615 | unless ($self->_has_overlap($sirna, $self->excluded)) { | 
| 504 | 312 |  |  |  |  | 451 | $self->target->add_SeqFeature($sirna); | 
| 505 |  |  |  |  |  |  | } | 
| 506 |  |  |  |  |  |  | } | 
| 507 |  |  |  |  |  |  |  | 
| 508 |  |  |  |  |  |  | sub _has_overlap { | 
| 509 |  |  |  |  |  |  | # flag any pairs that overlap an UNDESIRED feature (eg SNP) | 
| 510 |  |  |  |  |  |  | # return true if there is overlap, false if not | 
| 511 |  |  |  |  |  |  |  | 
| 512 | 312 |  |  | 312 |  | 257 | my ($self, $test, $flist) = @_; | 
| 513 | 312 | 50 |  |  |  | 529 | print STDERR "Checking oligo at ", $test->start, " to ",$test->end, "\n" | 
| 514 |  |  |  |  |  |  | if ($self->debug); | 
| 515 |  |  |  |  |  |  |  | 
| 516 | 312 |  |  |  |  | 553 | foreach my $feat (@$flist) { | 
| 517 | 0 | 0 | 0 |  |  | 0 | if (($test->start <= $feat->end) and ($test->end >= $feat->start)) { | 
| 518 | 0 | 0 |  |  |  | 0 | print STDERR "Overlaps ", $feat->primary_tag, " at ", | 
| 519 |  |  |  |  |  |  | $feat->start, " to ", $feat->end, "\n" if ($self->debug); | 
| 520 | 0 |  |  |  |  | 0 | return 1; | 
| 521 |  |  |  |  |  |  | } | 
| 522 |  |  |  |  |  |  | } | 
| 523 | 312 |  |  |  |  | 485 | return 0; # default - no overlap | 
| 524 |  |  |  |  |  |  | } | 
| 525 |  |  |  |  |  |  |  | 
| 526 |  |  |  |  |  |  | # MOVE to SiRNA::Ruleset::tuschl | 
| 527 |  |  |  |  |  |  |  | 
| 528 |  |  |  |  |  |  | # sub _get_sense { | 
| 529 |  |  |  |  |  |  | #     my ($target) = @_; | 
| 530 |  |  |  |  |  |  | #     # trim off 1st 2 nt to get overhang | 
| 531 |  |  |  |  |  |  | #     $target =~ s/^..//; | 
| 532 |  |  |  |  |  |  | #     # convert T's to U's (transcribe) | 
| 533 |  |  |  |  |  |  | #     $target =~ s/T/U/gi; | 
| 534 |  |  |  |  |  |  | #     # force last 2 nt to be T's | 
| 535 |  |  |  |  |  |  | #     $target =~ s/..$/TT/; | 
| 536 |  |  |  |  |  |  |  | 
| 537 |  |  |  |  |  |  | #     return $target; | 
| 538 |  |  |  |  |  |  | # } | 
| 539 |  |  |  |  |  |  |  | 
| 540 |  |  |  |  |  |  | # sub _get_anti { | 
| 541 |  |  |  |  |  |  | #     my ($target) = @_; | 
| 542 |  |  |  |  |  |  | #     my @target = split(//, $target); | 
| 543 |  |  |  |  |  |  | #     my ($nt,@antitarget); | 
| 544 |  |  |  |  |  |  |  | 
| 545 |  |  |  |  |  |  | #     while ($nt = pop @target) { | 
| 546 |  |  |  |  |  |  | # 	push(@antitarget, $COMP{$nt}); | 
| 547 |  |  |  |  |  |  | #     } | 
| 548 |  |  |  |  |  |  | #     my $anti = join('', @antitarget); | 
| 549 |  |  |  |  |  |  | #     # trim off 1st 2 nt to get overhang | 
| 550 |  |  |  |  |  |  | #     $anti =~ s/^..//; | 
| 551 |  |  |  |  |  |  | #     # convert T's to U's | 
| 552 |  |  |  |  |  |  | #     $anti =~ s/T/U/gi; | 
| 553 |  |  |  |  |  |  | #     # convert last 2 NT's to T | 
| 554 |  |  |  |  |  |  | #     $anti =~ s/..$/TT/; | 
| 555 |  |  |  |  |  |  |  | 
| 556 |  |  |  |  |  |  | #     return $anti; | 
| 557 |  |  |  |  |  |  | # } | 
| 558 |  |  |  |  |  |  |  | 
| 559 |  |  |  |  |  |  |  | 
| 560 |  |  |  |  |  |  | sub AUTOLOAD { | 
| 561 | 2047 |  |  | 2047 |  | 1655 | my ($self, $value) = @_; | 
| 562 | 2047 |  |  |  |  | 1493 | my $name = $AUTOLOAD; | 
| 563 | 2047 |  |  |  |  | 4399 | $name =~ s/.+:://; | 
| 564 |  |  |  |  |  |  |  | 
| 565 | 2047 | 50 |  |  |  | 3038 | return if ($name eq 'DESTROY'); | 
| 566 |  |  |  |  |  |  |  | 
| 567 |  |  |  |  |  |  |  | 
| 568 | 2047 | 100 |  |  |  | 2248 | if (defined $value) { | 
| 569 | 4 |  |  |  |  | 9 | $self->{$name} = $value; | 
| 570 |  |  |  |  |  |  | } | 
| 571 |  |  |  |  |  |  |  | 
| 572 | 2047 | 50 |  |  |  | 2632 | unless (exists $self->{$name}) { | 
| 573 | 0 |  |  |  |  | 0 | $self->throw("Attribute $name not defined for ". ref($self)); | 
| 574 |  |  |  |  |  |  | } | 
| 575 |  |  |  |  |  |  |  | 
| 576 | 2047 |  |  |  |  | 5294 | return $self->{$name}; | 
| 577 |  |  |  |  |  |  | } | 
| 578 |  |  |  |  |  |  |  | 
| 579 |  |  |  |  |  |  | sub _comp { | 
| 580 | 7176 |  |  | 7176 |  | 4782 | my ($self, $char) = @_; | 
| 581 |  |  |  |  |  |  |  | 
| 582 | 7176 | 50 |  |  |  | 7688 | return unless ($char); | 
| 583 | 7176 |  |  |  |  | 4300 | $char = uc($char); | 
| 584 | 7176 |  |  |  |  | 12335 | return $COMP{ $char }; | 
| 585 |  |  |  |  |  |  | } | 
| 586 |  |  |  |  |  |  | 1; |