| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Tools::SiRNA |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Cared for by Donald Jackson, donald.jackson@bms.com |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# Copyright Bristol-Myers Squibb |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
SiRNA - Perl object for designing small inhibitory RNAs. |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Bio::Tools::SiRNA; |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $sirna_designer = Bio::Tools::SiRNA->new( -target => $bio_seq, |
|
23
|
|
|
|
|
|
|
-rules => 'saigo' |
|
24
|
|
|
|
|
|
|
); |
|
25
|
|
|
|
|
|
|
my @pairs = $sirna_designer->design; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
foreach $pair (@pairs) { |
|
28
|
|
|
|
|
|
|
my $sense_oligo_sequence = $pair->sense->seq; |
|
29
|
|
|
|
|
|
|
my $antisense_oligo_sequence = $pair->antisense->seq; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# print out results |
|
32
|
|
|
|
|
|
|
print join ("\t", $pair->start, $pair->end, $pair->rank, |
|
33
|
|
|
|
|
|
|
$sense_oligo_sequence, $antisense_oligo_sequence), "\n"; |
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
Package for designing siRNA reagents. |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Input is a L-compliant object (the target). |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Output is a list of Bio::SeqFeature::SiRNA::Pair objects, which are |
|
43
|
|
|
|
|
|
|
added to the feature table of the target sequence. Each |
|
44
|
|
|
|
|
|
|
Bio::SeqFeature::SiRNA::Pair contains two subfeatures |
|
45
|
|
|
|
|
|
|
(Bio::SeqFeature::Oligo objects) which correspond to the individual |
|
46
|
|
|
|
|
|
|
oligos. These objects provide accessors for the information on the |
|
47
|
|
|
|
|
|
|
individual reagent pairs. |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
This verion of Bio::Tools::SiRNA represents a major change in architecture. |
|
50
|
|
|
|
|
|
|
Specific 'rulesets' for siRNA selection as developed by various groups are |
|
51
|
|
|
|
|
|
|
implemented as Bio::Tools::SiRNA::Ruleset objects, which inherit from |
|
52
|
|
|
|
|
|
|
Bio::Tools::SiRNA. This will make it easier to add new rule sets or modify |
|
53
|
|
|
|
|
|
|
existing approaches. Currently the Tuschl and Ui-Tei (2004) rules are |
|
54
|
|
|
|
|
|
|
implemented. For consistency, the Tuschl rules are implemented by default. |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
In addition, this module provides three 'extra' rules which can be added |
|
57
|
|
|
|
|
|
|
above and beyond any ruleset. |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=over 3 |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item 1. |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
SiRNAs that overlap known SNPs (identified as SeqFeatures with |
|
64
|
|
|
|
|
|
|
primary tag = variation) can be avoided. |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item 2. |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Other regions (with primary tag = 'Excluded') can also be skipped. I |
|
69
|
|
|
|
|
|
|
use this with Bio::Tools::Run::Mdust to avoid low-complexity regions |
|
70
|
|
|
|
|
|
|
(must be run separately), but other programs could also be used. |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=item 3. |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
SiRNAs may also be selected in the 3 prime UTR of a gene by setting |
|
75
|
|
|
|
|
|
|
$sirna_designer-Einclude_3pr() to true. |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=back |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head2 EXPORT |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
None. |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
L, L, |
|
86
|
|
|
|
|
|
|
L.. |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
93
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
|
94
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
97
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head2 Support |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
I |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
106
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
107
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
108
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
113
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
|
114
|
|
|
|
|
|
|
the web: |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head1 AUTHOR |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Donald Jackson (donald.jackson@bms.com) |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head1 APPENDIX |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
|
125
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=cut |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
package Bio::Tools::SiRNA; |
|
130
|
|
|
|
|
|
|
|
|
131
|
1
|
|
|
1
|
|
486
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
22
|
|
|
132
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
23
|
|
|
133
|
|
|
|
|
|
|
|
|
134
|
1
|
|
|
1
|
|
3
|
use vars qw($AUTOLOAD); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
28
|
|
|
135
|
|
|
|
|
|
|
|
|
136
|
1
|
|
|
1
|
|
265
|
use Bio::Seq::RichSeq; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
31
|
|
|
137
|
1
|
|
|
1
|
|
366
|
use Bio::SeqFeature::Generic; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
26
|
|
|
138
|
1
|
|
|
1
|
|
293
|
use Bio::SeqFeature::SiRNA::Oligo; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
28
|
|
|
139
|
1
|
|
|
1
|
|
300
|
use Bio::SeqFeature::SiRNA::Pair; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
28
|
|
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
|
142
|
1
|
|
|
1
|
|
4
|
use base qw(Bio::Root::Root); |
|
|
1
|
|
|
|
|
0
|
|
|
|
1
|
|
|
|
|
1148
|
|
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
our %COMP = ( A => 'T', |
|
146
|
|
|
|
|
|
|
T => 'A', |
|
147
|
|
|
|
|
|
|
C => 'G', |
|
148
|
|
|
|
|
|
|
G => 'C', |
|
149
|
|
|
|
|
|
|
N => 'N', |
|
150
|
|
|
|
|
|
|
); |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
our @ARGNAMES = qw(RULES START_PAD END_PAD MIN_GC CUTOFF OLIGOS AVOID_SNPS |
|
153
|
|
|
|
|
|
|
GSTRING TMPDIR TARGET DEBUG); |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head2 new |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Title : new |
|
159
|
|
|
|
|
|
|
Usage : my $sirna_designer = Bio::Tools::SiRNA->new(); |
|
160
|
|
|
|
|
|
|
Function : Constructor for designer object |
|
161
|
|
|
|
|
|
|
Returns : Bio::Tools::SiRNA object |
|
162
|
|
|
|
|
|
|
Args : target - the target sequence for the SiRNAs as a Bio::Seq::RichSeq |
|
163
|
|
|
|
|
|
|
start_pad - distance from the CDS start to skip (default 75) |
|
164
|
|
|
|
|
|
|
end_pad - distance from the CDS end to skip (default 50) |
|
165
|
|
|
|
|
|
|
include_3pr - set to true to include SiRNAs in the 3prime UTR (default false) |
|
166
|
|
|
|
|
|
|
rules - rules for selecting siRNAs, currently supporting saigo and tuschl |
|
167
|
|
|
|
|
|
|
min_gc - minimum GC fraction (NOT percent) (default 0.4) |
|
168
|
|
|
|
|
|
|
max_gc - maximum GC fraction (NOT percent) (default 0.6) |
|
169
|
|
|
|
|
|
|
cutoff - worst 'rank' accepted(default 3) |
|
170
|
|
|
|
|
|
|
avoid_snps - boolean - reject oligos that overlap a variation |
|
171
|
|
|
|
|
|
|
SeqFeature in the target (default true) |
|
172
|
|
|
|
|
|
|
gstring - maximum allowed consecutive Gs. |
|
173
|
|
|
|
|
|
|
Too many can cause problems in synthesis (default 4) |
|
174
|
|
|
|
|
|
|
Note : All arguments can also be changed/accessed using autoloaded |
|
175
|
|
|
|
|
|
|
methods such as: |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
my $start_pad = $sirna_designer->start_pad(). |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=cut |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub new { |
|
182
|
1
|
|
|
1
|
1
|
511
|
my ($proto, @args) = @_; |
|
183
|
1
|
|
33
|
|
|
6
|
my $pkg = ref($proto) || $proto; |
|
184
|
|
|
|
|
|
|
|
|
185
|
1
|
|
|
|
|
1
|
my $self = {}; |
|
186
|
1
|
|
|
|
|
2
|
bless ($self, $pkg); |
|
187
|
|
|
|
|
|
|
|
|
188
|
1
|
|
|
|
|
2
|
my %args; |
|
189
|
|
|
|
|
|
|
|
|
190
|
1
|
|
|
|
|
6
|
@args{@ARGNAMES} = $self->_rearrange(\@ARGNAMES, @args); |
|
191
|
|
|
|
|
|
|
|
|
192
|
1
|
50
|
|
|
|
8
|
if ($args{'RULES'}) { |
|
193
|
0
|
|
|
|
|
0
|
$self->rules($args{'RULES'}); |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
1
|
|
50
|
|
|
9
|
$self->{'start_pad'} = $args{'START_PAD'} || 75; # nt from start to mask |
|
197
|
1
|
|
50
|
|
|
6
|
$self->{'end_pad'} = $args{'END_PAD'} || 50; # nt from end to mask |
|
198
|
1
|
|
50
|
|
|
4
|
$self->{'include_3pr'} = $args{'INCLUDE_3PR'} || 0; # look for oligos in 3prime UTR |
|
199
|
1
|
|
50
|
|
|
4
|
$self->{'min_gc'} = $args{'MIN_GC'} || 0.40; |
|
200
|
1
|
|
50
|
|
|
5
|
$self->{'max_gc'} = $args{'MAX_GC'} || 0.60; |
|
201
|
1
|
|
50
|
|
|
5
|
$self->{'cutoff'} = $args{'CUTOFF'} || 3; # highest (worst) rank wanted |
|
202
|
1
|
|
|
|
|
2
|
$self->{'oligos'} = []; |
|
203
|
|
|
|
|
|
|
defined($args{'AVOID_SNPS'}) ? $self->{'avoid_snps'} = $args{'AVOID_SNPS'} : |
|
204
|
1
|
50
|
|
|
|
4
|
$self->{'avoid_snps'} = 1; # (t/f to avoid or include reagents that cover SNPs) |
|
205
|
1
|
|
50
|
|
|
4
|
$self->{'gstring'} = $args{'GSTRING'} || 4; # maximum allowed consecutive Gs - too many can cause problems in oligo synthesis |
|
206
|
1
|
|
50
|
|
|
12
|
$self->{'tmpdir'} = $args{'TMPDIR'} || $ENV{'TMPDIR'} || $ENV{'TMP'} || ''; |
|
207
|
1
|
|
50
|
|
|
4
|
$self->{'debug'} = $args{'DEBUG'} || 0; |
|
208
|
|
|
|
|
|
|
|
|
209
|
1
|
50
|
|
|
|
5
|
$self->target($args{'TARGET'}) if ($args{'TARGET'}); |
|
210
|
|
|
|
|
|
|
|
|
211
|
1
|
|
|
|
|
4
|
return $self; |
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head2 target |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Title : target |
|
218
|
|
|
|
|
|
|
Usage : my $target_seq = $sirna_designer->target(); # get the current target |
|
219
|
|
|
|
|
|
|
OR |
|
220
|
|
|
|
|
|
|
$sirna_designer->target($new_target_seq); # set a new target |
|
221
|
|
|
|
|
|
|
Function : Set/get the target as a Bio::SeqI-compliant object |
|
222
|
|
|
|
|
|
|
Returns : a Bio::SeqI-compliant object |
|
223
|
|
|
|
|
|
|
Args : a Bio::SeqI-compliant object (optional) |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=cut |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub target { |
|
228
|
338
|
|
|
338
|
1
|
517
|
my ($self, $target) = @_; |
|
229
|
|
|
|
|
|
|
|
|
230
|
338
|
100
|
|
|
|
586
|
if ($target) { |
|
|
|
50
|
|
|
|
|
|
|
231
|
2
|
50
|
|
|
|
11
|
unless ($target->isa('Bio::SeqI')) { |
|
232
|
0
|
|
|
|
|
0
|
$self->throw( -class => 'Bio::Root::BadParameter', |
|
233
|
|
|
|
|
|
|
-text => "Target must be passed as a Bio::Seq object" ); |
|
234
|
|
|
|
|
|
|
} |
|
235
|
2
|
100
|
|
|
|
15
|
if ($target->can('molecule')) { |
|
236
|
1
|
50
|
|
|
|
2
|
( grep { uc($target->molecule) eq $_ } qw(DNA MRNA CDNA)) or |
|
|
3
|
|
|
|
|
9
|
|
|
237
|
|
|
|
|
|
|
$self->throw( -class => 'Bio::Root::BadParameter', |
|
238
|
|
|
|
|
|
|
-text => "Sequences of type ". $target->molecule. " are not supported" |
|
239
|
|
|
|
|
|
|
); |
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
else { |
|
242
|
1
|
50
|
|
|
|
5
|
($target->alphabet eq 'dna') or |
|
243
|
|
|
|
|
|
|
$self->throw( -class => 'Bio::Root::BadParameter', |
|
244
|
|
|
|
|
|
|
-text => "Sequences of alphabet ". $target->alphabet. " are not supported" |
|
245
|
|
|
|
|
|
|
); |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
2
|
|
|
|
|
4
|
$self->{'target'} = $target; |
|
249
|
2
|
|
|
|
|
6
|
return 1; |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
elsif ($self->{'target'}) { |
|
253
|
336
|
|
|
|
|
828
|
return $self->{'target'}; |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
else { |
|
256
|
0
|
|
|
|
|
0
|
$self->throw("Target sequence not defined"); |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 rules |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Title : rules |
|
263
|
|
|
|
|
|
|
Usage : $sirna->rules('ruleset') |
|
264
|
|
|
|
|
|
|
Purpose : set/get ruleset to use for selecting SiRNA oligo pairs. |
|
265
|
|
|
|
|
|
|
Returns : not sure yet |
|
266
|
|
|
|
|
|
|
Args : a ruleset name (currently supported: Tuschl, Saigo) |
|
267
|
|
|
|
|
|
|
or a Bio::Tools::SiRNA::RulesetI compliant object |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=cut |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub rules { |
|
272
|
3
|
|
|
3
|
1
|
5
|
my ($self, $rules) = @_; |
|
273
|
|
|
|
|
|
|
|
|
274
|
3
|
50
|
|
|
|
10
|
if ($rules) { |
|
275
|
0
|
|
|
|
|
0
|
$self->_load_ruleset($rules); |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
# default: use tuschl rules |
|
278
|
3
|
100
|
|
|
|
7
|
unless ($self->{_rules}) { |
|
279
|
1
|
|
|
|
|
10
|
$self->_load_ruleset('tuschl'); |
|
280
|
|
|
|
|
|
|
} |
|
281
|
3
|
|
|
|
|
14
|
return $self->{_rules}; |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
sub _load_ruleset { |
|
285
|
1
|
|
|
1
|
|
3
|
my ($self, $ruleset) = @_; |
|
286
|
|
|
|
|
|
|
|
|
287
|
1
|
|
|
|
|
5
|
my $rule_module = join('::', ref($self), 'Ruleset', lc($ruleset)); |
|
288
|
|
|
|
|
|
|
|
|
289
|
1
|
|
|
|
|
92
|
eval "require $rule_module"; |
|
290
|
|
|
|
|
|
|
|
|
291
|
1
|
50
|
|
|
|
4
|
if ($@) { |
|
292
|
|
|
|
|
|
|
#warn join("\n", '@INC contains:', @INC, undef); |
|
293
|
0
|
|
|
|
|
0
|
$self->throw("Unable to load $rule_module: $@"); |
|
294
|
0
|
|
|
|
|
0
|
return; |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
else { |
|
298
|
1
|
|
|
|
|
3
|
$self->{_rules} = $rule_module; |
|
299
|
1
|
|
|
|
|
3
|
bless($self, $rule_module); # recast as subclass |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
|
|
302
|
1
|
|
|
|
|
2
|
return 1; |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=head2 design |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
Title : design |
|
308
|
|
|
|
|
|
|
Usage : my @pairs = $sirna_designer->design(); |
|
309
|
|
|
|
|
|
|
Purpose : Design SiRNA oligo pairs. |
|
310
|
|
|
|
|
|
|
Returns : A list of SiRNA pairs as Bio::SeqFeature::SiRNA::Pair objects |
|
311
|
|
|
|
|
|
|
Args : none |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=cut |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
sub design { |
|
316
|
3
|
|
|
3
|
1
|
216
|
my ($self) = @_; |
|
317
|
|
|
|
|
|
|
|
|
318
|
3
|
50
|
|
|
|
12
|
($self->rules) or $self->throw('Unable to design siRNAs: no rule set specified'); |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# unless ( grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures ) { |
|
321
|
|
|
|
|
|
|
# $self->_define_target(); |
|
322
|
|
|
|
|
|
|
# } |
|
323
|
|
|
|
|
|
|
|
|
324
|
3
|
|
|
|
|
11
|
my @oligos = $self->_get_oligos(); |
|
325
|
|
|
|
|
|
|
|
|
326
|
3
|
|
|
|
|
22
|
return ( grep { $_->isa('Bio::SeqFeature::SiRNA::Pair') } $self->target->top_SeqFeatures ); |
|
|
329
|
|
|
|
|
525
|
|
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
sub _define_target { |
|
330
|
3
|
|
|
3
|
|
4
|
my ($self) = @_; |
|
331
|
3
|
|
|
|
|
4
|
my ($feat, $cds, $left, $right); |
|
332
|
|
|
|
|
|
|
|
|
333
|
3
|
50
|
|
|
|
6
|
my $target = $self->target or |
|
334
|
|
|
|
|
|
|
$self->throw("Unable to design oligos - no target provided"); |
|
335
|
|
|
|
|
|
|
|
|
336
|
3
|
50
|
|
|
|
14
|
($cds) = grep { $_->primary_tag eq 'CDS' } $target->top_SeqFeatures if ($target->can('top_SeqFeatures')); |
|
|
14
|
|
|
|
|
16
|
|
|
337
|
|
|
|
|
|
|
|
|
338
|
3
|
100
|
|
|
|
8
|
if ($cds) { |
|
339
|
2
|
|
|
|
|
5
|
$left = $cds->start + $self->start_pad; |
|
340
|
2
|
100
|
|
|
|
12
|
if (!$self->include_3pr) { |
|
341
|
1
|
|
|
|
|
4
|
$right = $cds->end - $self->end_pad; |
|
342
|
|
|
|
|
|
|
} |
|
343
|
|
|
|
|
|
|
else { |
|
344
|
1
|
|
|
|
|
10
|
$right = $target->length - $self->end_pad; |
|
345
|
|
|
|
|
|
|
} |
|
346
|
|
|
|
|
|
|
} |
|
347
|
|
|
|
|
|
|
else { |
|
348
|
1
|
|
|
|
|
6
|
$left = 0 + $self->start_pad; |
|
349
|
1
|
|
|
|
|
5
|
$right = $target->length - $self->end_pad; |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# is there anything left? |
|
354
|
3
|
50
|
|
|
|
9
|
if (($right - $left) < 20) { |
|
355
|
0
|
|
|
|
|
0
|
$self->throw("There isn't enough sequence to design oligos. Please reduce start_pad and end_pad or supply more sequence"); |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
# define target region |
|
358
|
3
|
|
|
|
|
25
|
my $targregion = Bio::SeqFeature::Generic->new( -start => $left, |
|
359
|
|
|
|
|
|
|
-end => $right, |
|
360
|
|
|
|
|
|
|
-primary => 'Target' ); |
|
361
|
3
|
|
|
|
|
10
|
$self->target->add_SeqFeature($targregion); |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# locate excluded regions |
|
364
|
3
|
|
|
|
|
7
|
my @excluded = grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures; |
|
|
17
|
|
|
|
|
20
|
|
|
365
|
|
|
|
|
|
|
|
|
366
|
3
|
50
|
|
|
|
17
|
if ($self->avoid_snps) { |
|
367
|
3
|
|
|
|
|
10
|
my @snps = grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures; |
|
|
17
|
|
|
|
|
20
|
|
|
368
|
3
|
|
|
|
|
6
|
push(@excluded, @snps); |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
|
|
371
|
3
|
|
|
|
|
16
|
$self->excluded(\@excluded); |
|
372
|
|
|
|
|
|
|
|
|
373
|
3
|
|
|
|
|
9
|
return $targregion; |
|
374
|
|
|
|
|
|
|
} |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub _get_targetregion { |
|
377
|
9
|
|
|
9
|
|
14
|
my ($self) = @_; |
|
378
|
|
|
|
|
|
|
|
|
379
|
9
|
|
|
|
|
23
|
my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures; |
|
|
177
|
|
|
|
|
213
|
|
|
380
|
9
|
|
66
|
|
|
30
|
$targregion ||= $self->_define_target; |
|
381
|
|
|
|
|
|
|
|
|
382
|
9
|
50
|
|
|
|
13
|
$self->throw("Target region for SiRNA design not defined") unless ($targregion); |
|
383
|
|
|
|
|
|
|
|
|
384
|
9
|
|
|
|
|
25
|
my $seq = $targregion->seq->seq; |
|
385
|
|
|
|
|
|
|
# but this way I loose start info |
|
386
|
9
|
|
|
|
|
38
|
my $targstart = $targregion->start; |
|
387
|
|
|
|
|
|
|
|
|
388
|
9
|
|
|
|
|
27
|
return ($seq, $targstart); |
|
389
|
|
|
|
|
|
|
} |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# MOVE to SiRNA::Ruleset::tuschl |
|
392
|
|
|
|
|
|
|
# sub _regex { |
|
393
|
|
|
|
|
|
|
# my ($self, $rank) = @_; |
|
394
|
|
|
|
|
|
|
# return $PATTERNS{$rank}; |
|
395
|
|
|
|
|
|
|
# } |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# sub _get_oligos { |
|
398
|
|
|
|
|
|
|
# # use regular expressions to pull out oligos |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# my ($self, $rank) = @_; |
|
401
|
|
|
|
|
|
|
# my $regex = $self->_regex($rank); |
|
402
|
|
|
|
|
|
|
# my @exclude; |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures; |
|
406
|
|
|
|
|
|
|
# my $seq = $targregion->seq->seq; |
|
407
|
|
|
|
|
|
|
# # but this way I loose start info |
|
408
|
|
|
|
|
|
|
# my $targstart = $targregion->start; |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# # exclude masked region |
|
411
|
|
|
|
|
|
|
# push(@exclude, grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures); |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
# # add SNP checking |
|
414
|
|
|
|
|
|
|
# if ($self->avoid_snps) { |
|
415
|
|
|
|
|
|
|
# my @snps = grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures; |
|
416
|
|
|
|
|
|
|
# push(@exclude, @snps); |
|
417
|
|
|
|
|
|
|
# } |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
# while ( $seq =~ /$regex/gi ) { |
|
420
|
|
|
|
|
|
|
# my $target = $1; |
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
# # check for too many Gs (or Cs on the other strand) |
|
423
|
|
|
|
|
|
|
# next if ( $target =~ /G{ $self->gstring,}/io ); |
|
424
|
|
|
|
|
|
|
# next if ( $target =~ /C{ $self->gstring,}/io ); |
|
425
|
|
|
|
|
|
|
# # skip Ns (for filtering) |
|
426
|
|
|
|
|
|
|
# next if ( $target =~ /N/i); |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# my $start = length($`) + $targstart; |
|
429
|
|
|
|
|
|
|
# my $stop = $start + length($target) -1; |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
# my @gc = ( $target =~ /G|C/gi); |
|
432
|
|
|
|
|
|
|
# my $fxGC = sprintf("%2.2f", (scalar(@gc) / length($target))); |
|
433
|
|
|
|
|
|
|
# next if ($fxGC < $self->min_gc); |
|
434
|
|
|
|
|
|
|
# next if ($fxGC > $self->max_gc); |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
# my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start, |
|
437
|
|
|
|
|
|
|
# -end => $stop, |
|
438
|
|
|
|
|
|
|
# -strand => 1, |
|
439
|
|
|
|
|
|
|
# -seq => _get_sense($target), |
|
440
|
|
|
|
|
|
|
# -source_tag => ref($self), |
|
441
|
|
|
|
|
|
|
# ); |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
# my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start, |
|
444
|
|
|
|
|
|
|
# -end => $stop, |
|
445
|
|
|
|
|
|
|
# -strand => -1, |
|
446
|
|
|
|
|
|
|
# -seq => _get_anti($target), |
|
447
|
|
|
|
|
|
|
# -source_tag => ref($self), |
|
448
|
|
|
|
|
|
|
# ); |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank => $rank, |
|
451
|
|
|
|
|
|
|
# -fxGC => $fxGC, |
|
452
|
|
|
|
|
|
|
# -sense => $sense, |
|
453
|
|
|
|
|
|
|
# -antisense => $asense, |
|
454
|
|
|
|
|
|
|
# -source_tag => ref($self), |
|
455
|
|
|
|
|
|
|
# ); |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
# unless ($self->_has_overlap($sirna, \@exclude)) { |
|
458
|
|
|
|
|
|
|
# $self->target->add_SeqFeature($sirna); |
|
459
|
|
|
|
|
|
|
# } |
|
460
|
|
|
|
|
|
|
# } |
|
461
|
|
|
|
|
|
|
# } |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=head2 add_oligos |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
Title : add_oligos |
|
466
|
|
|
|
|
|
|
Usage : $sirna_designer->add_oligos($sequence, $start, $rank); |
|
467
|
|
|
|
|
|
|
Purpose : Add SiRNA olgos to target Bio::Seq as Bio::SeqFeature::SiRNA::Pair objects |
|
468
|
|
|
|
|
|
|
Args : Oligo sequence and start position (required), rank/score (optional) |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub add_oligos { |
|
473
|
312
|
|
|
312
|
1
|
311
|
my ($self, $seq, $start, $rank) = @_; |
|
474
|
|
|
|
|
|
|
|
|
475
|
312
|
50
|
|
|
|
369
|
($seq) or throw ('No sequence supplied for add_oligos'); |
|
476
|
312
|
50
|
|
|
|
344
|
(defined $start) or throw ('No start position specified for add_oligos'); |
|
477
|
|
|
|
|
|
|
|
|
478
|
312
|
|
|
|
|
261
|
my ($end) = $start + length($seq); |
|
479
|
|
|
|
|
|
|
|
|
480
|
312
|
|
|
|
|
483
|
my ($sseq) = $self->_get_sense($seq); |
|
481
|
312
|
|
|
|
|
1187
|
my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start, |
|
482
|
|
|
|
|
|
|
-end => ($start + length($sseq)), |
|
483
|
|
|
|
|
|
|
-strand => 1, |
|
484
|
|
|
|
|
|
|
-seq => $sseq, |
|
485
|
|
|
|
|
|
|
-source_tag => ref($self), |
|
486
|
|
|
|
|
|
|
); |
|
487
|
|
|
|
|
|
|
|
|
488
|
312
|
|
|
|
|
688
|
my $aseq = $self->_get_anti($seq); |
|
489
|
312
|
|
|
|
|
1145
|
my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $end, |
|
490
|
|
|
|
|
|
|
-end => ($end - length($aseq)), |
|
491
|
|
|
|
|
|
|
-strand => -1, |
|
492
|
|
|
|
|
|
|
-seq => $aseq, |
|
493
|
|
|
|
|
|
|
-source_tag => ref($self), |
|
494
|
|
|
|
|
|
|
); |
|
495
|
|
|
|
|
|
|
|
|
496
|
312
|
|
|
|
|
1535
|
my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank => $rank, |
|
497
|
|
|
|
|
|
|
# -fxGC => $fxGC, |
|
498
|
|
|
|
|
|
|
-sense => $sense, |
|
499
|
|
|
|
|
|
|
-antisense => $asense, |
|
500
|
|
|
|
|
|
|
-source_tag => ref($self), |
|
501
|
|
|
|
|
|
|
); |
|
502
|
|
|
|
|
|
|
|
|
503
|
312
|
50
|
|
|
|
1406
|
unless ($self->_has_overlap($sirna, $self->excluded)) { |
|
504
|
312
|
|
|
|
|
449
|
$self->target->add_SeqFeature($sirna); |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
} |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
sub _has_overlap { |
|
509
|
|
|
|
|
|
|
# flag any pairs that overlap an UNDESIRED feature (eg SNP) |
|
510
|
|
|
|
|
|
|
# return true if there is overlap, false if not |
|
511
|
|
|
|
|
|
|
|
|
512
|
312
|
|
|
312
|
|
301
|
my ($self, $test, $flist) = @_; |
|
513
|
312
|
50
|
|
|
|
572
|
print STDERR "Checking oligo at ", $test->start, " to ",$test->end, "\n" |
|
514
|
|
|
|
|
|
|
if ($self->debug); |
|
515
|
|
|
|
|
|
|
|
|
516
|
312
|
|
|
|
|
578
|
foreach my $feat (@$flist) { |
|
517
|
0
|
0
|
0
|
|
|
0
|
if (($test->start <= $feat->end) and ($test->end >= $feat->start)) { |
|
518
|
0
|
0
|
|
|
|
0
|
print STDERR "Overlaps ", $feat->primary_tag, " at ", |
|
519
|
|
|
|
|
|
|
$feat->start, " to ", $feat->end, "\n" if ($self->debug); |
|
520
|
0
|
|
|
|
|
0
|
return 1; |
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
} |
|
523
|
312
|
|
|
|
|
543
|
return 0; # default - no overlap |
|
524
|
|
|
|
|
|
|
} |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
# MOVE to SiRNA::Ruleset::tuschl |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
# sub _get_sense { |
|
529
|
|
|
|
|
|
|
# my ($target) = @_; |
|
530
|
|
|
|
|
|
|
# # trim off 1st 2 nt to get overhang |
|
531
|
|
|
|
|
|
|
# $target =~ s/^..//; |
|
532
|
|
|
|
|
|
|
# # convert T's to U's (transcribe) |
|
533
|
|
|
|
|
|
|
# $target =~ s/T/U/gi; |
|
534
|
|
|
|
|
|
|
# # force last 2 nt to be T's |
|
535
|
|
|
|
|
|
|
# $target =~ s/..$/TT/; |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
# return $target; |
|
538
|
|
|
|
|
|
|
# } |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
# sub _get_anti { |
|
541
|
|
|
|
|
|
|
# my ($target) = @_; |
|
542
|
|
|
|
|
|
|
# my @target = split(//, $target); |
|
543
|
|
|
|
|
|
|
# my ($nt,@antitarget); |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
# while ($nt = pop @target) { |
|
546
|
|
|
|
|
|
|
# push(@antitarget, $COMP{$nt}); |
|
547
|
|
|
|
|
|
|
# } |
|
548
|
|
|
|
|
|
|
# my $anti = join('', @antitarget); |
|
549
|
|
|
|
|
|
|
# # trim off 1st 2 nt to get overhang |
|
550
|
|
|
|
|
|
|
# $anti =~ s/^..//; |
|
551
|
|
|
|
|
|
|
# # convert T's to U's |
|
552
|
|
|
|
|
|
|
# $anti =~ s/T/U/gi; |
|
553
|
|
|
|
|
|
|
# # convert last 2 NT's to T |
|
554
|
|
|
|
|
|
|
# $anti =~ s/..$/TT/; |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
# return $anti; |
|
557
|
|
|
|
|
|
|
# } |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
sub AUTOLOAD { |
|
561
|
2047
|
|
|
2047
|
|
1684
|
my ($self, $value) = @_; |
|
562
|
2047
|
|
|
|
|
1485
|
my $name = $AUTOLOAD; |
|
563
|
2047
|
|
|
|
|
4528
|
$name =~ s/.+:://; |
|
564
|
|
|
|
|
|
|
|
|
565
|
2047
|
50
|
|
|
|
3133
|
return if ($name eq 'DESTROY'); |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
|
|
568
|
2047
|
100
|
|
|
|
2313
|
if (defined $value) { |
|
569
|
4
|
|
|
|
|
5
|
$self->{$name} = $value; |
|
570
|
|
|
|
|
|
|
} |
|
571
|
|
|
|
|
|
|
|
|
572
|
2047
|
50
|
|
|
|
2591
|
unless (exists $self->{$name}) { |
|
573
|
0
|
|
|
|
|
0
|
$self->throw("Attribute $name not defined for ". ref($self)); |
|
574
|
|
|
|
|
|
|
} |
|
575
|
|
|
|
|
|
|
|
|
576
|
2047
|
|
|
|
|
5387
|
return $self->{$name}; |
|
577
|
|
|
|
|
|
|
} |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
sub _comp { |
|
580
|
7176
|
|
|
7176
|
|
4649
|
my ($self, $char) = @_; |
|
581
|
|
|
|
|
|
|
|
|
582
|
7176
|
50
|
|
|
|
7399
|
return unless ($char); |
|
583
|
7176
|
|
|
|
|
4278
|
$char = uc($char); |
|
584
|
7176
|
|
|
|
|
12360
|
return $COMP{ $char }; |
|
585
|
|
|
|
|
|
|
} |
|
586
|
|
|
|
|
|
|
1; |