line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Tools::SiRNA |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Donald Jackson, donald.jackson@bms.com |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Bristol-Myers Squibb |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
SiRNA - Perl object for designing small inhibitory RNAs. |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Bio::Tools::SiRNA; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $sirna_designer = Bio::Tools::SiRNA->new( -target => $bio_seq, |
23
|
|
|
|
|
|
|
-rules => 'saigo' |
24
|
|
|
|
|
|
|
); |
25
|
|
|
|
|
|
|
my @pairs = $sirna_designer->design; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
foreach $pair (@pairs) { |
28
|
|
|
|
|
|
|
my $sense_oligo_sequence = $pair->sense->seq; |
29
|
|
|
|
|
|
|
my $antisense_oligo_sequence = $pair->antisense->seq; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# print out results |
32
|
|
|
|
|
|
|
print join ("\t", $pair->start, $pair->end, $pair->rank, |
33
|
|
|
|
|
|
|
$sense_oligo_sequence, $antisense_oligo_sequence), "\n"; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 DESCRIPTION |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
Package for designing siRNA reagents. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Input is a L-compliant object (the target). |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Output is a list of Bio::SeqFeature::SiRNA::Pair objects, which are |
43
|
|
|
|
|
|
|
added to the feature table of the target sequence. Each |
44
|
|
|
|
|
|
|
Bio::SeqFeature::SiRNA::Pair contains two subfeatures |
45
|
|
|
|
|
|
|
(Bio::SeqFeature::Oligo objects) which correspond to the individual |
46
|
|
|
|
|
|
|
oligos. These objects provide accessors for the information on the |
47
|
|
|
|
|
|
|
individual reagent pairs. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
This verion of Bio::Tools::SiRNA represents a major change in architecture. |
50
|
|
|
|
|
|
|
Specific 'rulesets' for siRNA selection as developed by various groups are |
51
|
|
|
|
|
|
|
implemented as Bio::Tools::SiRNA::Ruleset objects, which inherit from |
52
|
|
|
|
|
|
|
Bio::Tools::SiRNA. This will make it easier to add new rule sets or modify |
53
|
|
|
|
|
|
|
existing approaches. Currently the Tuschl and Ui-Tei (2004) rules are |
54
|
|
|
|
|
|
|
implemented. For consistency, the Tuschl rules are implemented by default. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
In addition, this module provides three 'extra' rules which can be added |
57
|
|
|
|
|
|
|
above and beyond any ruleset. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=over 3 |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item 1. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
SiRNAs that overlap known SNPs (identified as SeqFeatures with |
64
|
|
|
|
|
|
|
primary tag = variation) can be avoided. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item 2. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Other regions (with primary tag = 'Excluded') can also be skipped. I |
69
|
|
|
|
|
|
|
use this with Bio::Tools::Run::Mdust to avoid low-complexity regions |
70
|
|
|
|
|
|
|
(must be run separately), but other programs could also be used. |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=item 3. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
SiRNAs may also be selected in the 3 prime UTR of a gene by setting |
75
|
|
|
|
|
|
|
$sirna_designer-Einclude_3pr() to true. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=back |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head2 EXPORT |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
None. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head1 SEE ALSO |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
L, L, |
86
|
|
|
|
|
|
|
L.. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head1 FEEDBACK |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head2 Mailing Lists |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
93
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
94
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
97
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head2 Support |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
I |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
106
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
107
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
108
|
|
|
|
|
|
|
with code and data examples if at all possible. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 Reporting Bugs |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
113
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
114
|
|
|
|
|
|
|
the web: |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head1 AUTHOR |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Donald Jackson (donald.jackson@bms.com) |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head1 APPENDIX |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
125
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=cut |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
package Bio::Tools::SiRNA; |
130
|
|
|
|
|
|
|
|
131
|
1
|
|
|
1
|
|
401
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
132
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
22
|
|
133
|
|
|
|
|
|
|
|
134
|
1
|
|
|
1
|
|
3
|
use vars qw($AUTOLOAD); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
32
|
|
135
|
|
|
|
|
|
|
|
136
|
1
|
|
|
1
|
|
265
|
use Bio::Seq::RichSeq; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
137
|
1
|
|
|
1
|
|
331
|
use Bio::SeqFeature::Generic; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
24
|
|
138
|
1
|
|
|
1
|
|
285
|
use Bio::SeqFeature::SiRNA::Oligo; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
139
|
1
|
|
|
1
|
|
298
|
use Bio::SeqFeature::SiRNA::Pair; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
142
|
1
|
|
|
1
|
|
4
|
use base qw(Bio::Root::Root); |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
1096
|
|
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
our %COMP = ( A => 'T', |
146
|
|
|
|
|
|
|
T => 'A', |
147
|
|
|
|
|
|
|
C => 'G', |
148
|
|
|
|
|
|
|
G => 'C', |
149
|
|
|
|
|
|
|
N => 'N', |
150
|
|
|
|
|
|
|
); |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
our @ARGNAMES = qw(RULES START_PAD END_PAD MIN_GC CUTOFF OLIGOS AVOID_SNPS |
153
|
|
|
|
|
|
|
GSTRING TMPDIR TARGET DEBUG); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head2 new |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Title : new |
159
|
|
|
|
|
|
|
Usage : my $sirna_designer = Bio::Tools::SiRNA->new(); |
160
|
|
|
|
|
|
|
Function : Constructor for designer object |
161
|
|
|
|
|
|
|
Returns : Bio::Tools::SiRNA object |
162
|
|
|
|
|
|
|
Args : target - the target sequence for the SiRNAs as a Bio::Seq::RichSeq |
163
|
|
|
|
|
|
|
start_pad - distance from the CDS start to skip (default 75) |
164
|
|
|
|
|
|
|
end_pad - distance from the CDS end to skip (default 50) |
165
|
|
|
|
|
|
|
include_3pr - set to true to include SiRNAs in the 3prime UTR (default false) |
166
|
|
|
|
|
|
|
rules - rules for selecting siRNAs, currently supporting saigo and tuschl |
167
|
|
|
|
|
|
|
min_gc - minimum GC fraction (NOT percent) (default 0.4) |
168
|
|
|
|
|
|
|
max_gc - maximum GC fraction (NOT percent) (default 0.6) |
169
|
|
|
|
|
|
|
cutoff - worst 'rank' accepted(default 3) |
170
|
|
|
|
|
|
|
avoid_snps - boolean - reject oligos that overlap a variation |
171
|
|
|
|
|
|
|
SeqFeature in the target (default true) |
172
|
|
|
|
|
|
|
gstring - maximum allowed consecutive Gs. |
173
|
|
|
|
|
|
|
Too many can cause problems in synthesis (default 4) |
174
|
|
|
|
|
|
|
Note : All arguments can also be changed/accessed using autoloaded |
175
|
|
|
|
|
|
|
methods such as: |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
my $start_pad = $sirna_designer->start_pad(). |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=cut |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub new { |
182
|
1
|
|
|
1
|
1
|
698
|
my ($proto, @args) = @_; |
183
|
1
|
|
33
|
|
|
6
|
my $pkg = ref($proto) || $proto; |
184
|
|
|
|
|
|
|
|
185
|
1
|
|
|
|
|
1
|
my $self = {}; |
186
|
1
|
|
|
|
|
2
|
bless ($self, $pkg); |
187
|
|
|
|
|
|
|
|
188
|
1
|
|
|
|
|
1
|
my %args; |
189
|
|
|
|
|
|
|
|
190
|
1
|
|
|
|
|
7
|
@args{@ARGNAMES} = $self->_rearrange(\@ARGNAMES, @args); |
191
|
|
|
|
|
|
|
|
192
|
1
|
50
|
|
|
|
3
|
if ($args{'RULES'}) { |
193
|
0
|
|
|
|
|
0
|
$self->rules($args{'RULES'}); |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
1
|
|
50
|
|
|
8
|
$self->{'start_pad'} = $args{'START_PAD'} || 75; # nt from start to mask |
197
|
1
|
|
50
|
|
|
8
|
$self->{'end_pad'} = $args{'END_PAD'} || 50; # nt from end to mask |
198
|
1
|
|
50
|
|
|
5
|
$self->{'include_3pr'} = $args{'INCLUDE_3PR'} || 0; # look for oligos in 3prime UTR |
199
|
1
|
|
50
|
|
|
4
|
$self->{'min_gc'} = $args{'MIN_GC'} || 0.40; |
200
|
1
|
|
50
|
|
|
5
|
$self->{'max_gc'} = $args{'MAX_GC'} || 0.60; |
201
|
1
|
|
50
|
|
|
4
|
$self->{'cutoff'} = $args{'CUTOFF'} || 3; # highest (worst) rank wanted |
202
|
1
|
|
|
|
|
4
|
$self->{'oligos'} = []; |
203
|
|
|
|
|
|
|
defined($args{'AVOID_SNPS'}) ? $self->{'avoid_snps'} = $args{'AVOID_SNPS'} : |
204
|
1
|
50
|
|
|
|
5
|
$self->{'avoid_snps'} = 1; # (t/f to avoid or include reagents that cover SNPs) |
205
|
1
|
|
50
|
|
|
4
|
$self->{'gstring'} = $args{'GSTRING'} || 4; # maximum allowed consecutive Gs - too many can cause problems in oligo synthesis |
206
|
1
|
|
50
|
|
|
9
|
$self->{'tmpdir'} = $args{'TMPDIR'} || $ENV{'TMPDIR'} || $ENV{'TMP'} || ''; |
207
|
1
|
|
50
|
|
|
4
|
$self->{'debug'} = $args{'DEBUG'} || 0; |
208
|
|
|
|
|
|
|
|
209
|
1
|
50
|
|
|
|
5
|
$self->target($args{'TARGET'}) if ($args{'TARGET'}); |
210
|
|
|
|
|
|
|
|
211
|
1
|
|
|
|
|
3
|
return $self; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head2 target |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Title : target |
218
|
|
|
|
|
|
|
Usage : my $target_seq = $sirna_designer->target(); # get the current target |
219
|
|
|
|
|
|
|
OR |
220
|
|
|
|
|
|
|
$sirna_designer->target($new_target_seq); # set a new target |
221
|
|
|
|
|
|
|
Function : Set/get the target as a Bio::SeqI-compliant object |
222
|
|
|
|
|
|
|
Returns : a Bio::SeqI-compliant object |
223
|
|
|
|
|
|
|
Args : a Bio::SeqI-compliant object (optional) |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=cut |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub target { |
228
|
338
|
|
|
338
|
1
|
638
|
my ($self, $target) = @_; |
229
|
|
|
|
|
|
|
|
230
|
338
|
100
|
|
|
|
578
|
if ($target) { |
|
|
50
|
|
|
|
|
|
231
|
2
|
50
|
|
|
|
9
|
unless ($target->isa('Bio::SeqI')) { |
232
|
0
|
|
|
|
|
0
|
$self->throw( -class => 'Bio::Root::BadParameter', |
233
|
|
|
|
|
|
|
-text => "Target must be passed as a Bio::Seq object" ); |
234
|
|
|
|
|
|
|
} |
235
|
2
|
100
|
|
|
|
18
|
if ($target->can('molecule')) { |
236
|
1
|
50
|
|
|
|
1
|
( grep { uc($target->molecule) eq $_ } qw(DNA MRNA CDNA)) or |
|
3
|
|
|
|
|
5
|
|
237
|
|
|
|
|
|
|
$self->throw( -class => 'Bio::Root::BadParameter', |
238
|
|
|
|
|
|
|
-text => "Sequences of type ". $target->molecule. " are not supported" |
239
|
|
|
|
|
|
|
); |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
else { |
242
|
1
|
50
|
|
|
|
4
|
($target->alphabet eq 'dna') or |
243
|
|
|
|
|
|
|
$self->throw( -class => 'Bio::Root::BadParameter', |
244
|
|
|
|
|
|
|
-text => "Sequences of alphabet ". $target->alphabet. " are not supported" |
245
|
|
|
|
|
|
|
); |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
2
|
|
|
|
|
4
|
$self->{'target'} = $target; |
249
|
2
|
|
|
|
|
7
|
return 1; |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
elsif ($self->{'target'}) { |
253
|
336
|
|
|
|
|
836
|
return $self->{'target'}; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
else { |
256
|
0
|
|
|
|
|
0
|
$self->throw("Target sequence not defined"); |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 rules |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Title : rules |
263
|
|
|
|
|
|
|
Usage : $sirna->rules('ruleset') |
264
|
|
|
|
|
|
|
Purpose : set/get ruleset to use for selecting SiRNA oligo pairs. |
265
|
|
|
|
|
|
|
Returns : not sure yet |
266
|
|
|
|
|
|
|
Args : a ruleset name (currently supported: Tuschl, Saigo) |
267
|
|
|
|
|
|
|
or a Bio::Tools::SiRNA::RulesetI compliant object |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=cut |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub rules { |
272
|
3
|
|
|
3
|
1
|
3
|
my ($self, $rules) = @_; |
273
|
|
|
|
|
|
|
|
274
|
3
|
50
|
|
|
|
9
|
if ($rules) { |
275
|
0
|
|
|
|
|
0
|
$self->_load_ruleset($rules); |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
# default: use tuschl rules |
278
|
3
|
100
|
|
|
|
8
|
unless ($self->{_rules}) { |
279
|
1
|
|
|
|
|
3
|
$self->_load_ruleset('tuschl'); |
280
|
|
|
|
|
|
|
} |
281
|
3
|
|
|
|
|
15
|
return $self->{_rules}; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
sub _load_ruleset { |
285
|
1
|
|
|
1
|
|
2
|
my ($self, $ruleset) = @_; |
286
|
|
|
|
|
|
|
|
287
|
1
|
|
|
|
|
3
|
my $rule_module = join('::', ref($self), 'Ruleset', lc($ruleset)); |
288
|
|
|
|
|
|
|
|
289
|
1
|
|
|
|
|
84
|
eval "require $rule_module"; |
290
|
|
|
|
|
|
|
|
291
|
1
|
50
|
|
|
|
5
|
if ($@) { |
292
|
|
|
|
|
|
|
#warn join("\n", '@INC contains:', @INC, undef); |
293
|
0
|
|
|
|
|
0
|
$self->throw("Unable to load $rule_module: $@"); |
294
|
0
|
|
|
|
|
0
|
return; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
else { |
298
|
1
|
|
|
|
|
3
|
$self->{_rules} = $rule_module; |
299
|
1
|
|
|
|
|
2
|
bless($self, $rule_module); # recast as subclass |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
1
|
|
|
|
|
1
|
return 1; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=head2 design |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
Title : design |
308
|
|
|
|
|
|
|
Usage : my @pairs = $sirna_designer->design(); |
309
|
|
|
|
|
|
|
Purpose : Design SiRNA oligo pairs. |
310
|
|
|
|
|
|
|
Returns : A list of SiRNA pairs as Bio::SeqFeature::SiRNA::Pair objects |
311
|
|
|
|
|
|
|
Args : none |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=cut |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
sub design { |
316
|
3
|
|
|
3
|
1
|
357
|
my ($self) = @_; |
317
|
|
|
|
|
|
|
|
318
|
3
|
50
|
|
|
|
9
|
($self->rules) or $self->throw('Unable to design siRNAs: no rule set specified'); |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# unless ( grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures ) { |
321
|
|
|
|
|
|
|
# $self->_define_target(); |
322
|
|
|
|
|
|
|
# } |
323
|
|
|
|
|
|
|
|
324
|
3
|
|
|
|
|
8
|
my @oligos = $self->_get_oligos(); |
325
|
|
|
|
|
|
|
|
326
|
3
|
|
|
|
|
8
|
return ( grep { $_->isa('Bio::SeqFeature::SiRNA::Pair') } $self->target->top_SeqFeatures ); |
|
329
|
|
|
|
|
534
|
|
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
sub _define_target { |
330
|
3
|
|
|
3
|
|
3
|
my ($self) = @_; |
331
|
3
|
|
|
|
|
4
|
my ($feat, $cds, $left, $right); |
332
|
|
|
|
|
|
|
|
333
|
3
|
50
|
|
|
|
5
|
my $target = $self->target or |
334
|
|
|
|
|
|
|
$self->throw("Unable to design oligos - no target provided"); |
335
|
|
|
|
|
|
|
|
336
|
3
|
50
|
|
|
|
15
|
($cds) = grep { $_->primary_tag eq 'CDS' } $target->top_SeqFeatures if ($target->can('top_SeqFeatures')); |
|
14
|
|
|
|
|
17
|
|
337
|
|
|
|
|
|
|
|
338
|
3
|
100
|
|
|
|
7
|
if ($cds) { |
339
|
2
|
|
|
|
|
5
|
$left = $cds->start + $self->start_pad; |
340
|
2
|
100
|
|
|
|
7
|
if (!$self->include_3pr) { |
341
|
1
|
|
|
|
|
3
|
$right = $cds->end - $self->end_pad; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
else { |
344
|
1
|
|
|
|
|
7
|
$right = $target->length - $self->end_pad; |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
else { |
348
|
1
|
|
|
|
|
6
|
$left = 0 + $self->start_pad; |
349
|
1
|
|
|
|
|
4
|
$right = $target->length - $self->end_pad; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# is there anything left? |
354
|
3
|
50
|
|
|
|
10
|
if (($right - $left) < 20) { |
355
|
0
|
|
|
|
|
0
|
$self->throw("There isn't enough sequence to design oligos. Please reduce start_pad and end_pad or supply more sequence"); |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
# define target region |
358
|
3
|
|
|
|
|
18
|
my $targregion = Bio::SeqFeature::Generic->new( -start => $left, |
359
|
|
|
|
|
|
|
-end => $right, |
360
|
|
|
|
|
|
|
-primary => 'Target' ); |
361
|
3
|
|
|
|
|
8
|
$self->target->add_SeqFeature($targregion); |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# locate excluded regions |
364
|
3
|
|
|
|
|
7
|
my @excluded = grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures; |
|
17
|
|
|
|
|
22
|
|
365
|
|
|
|
|
|
|
|
366
|
3
|
50
|
|
|
|
17
|
if ($self->avoid_snps) { |
367
|
3
|
|
|
|
|
8
|
my @snps = grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures; |
|
17
|
|
|
|
|
20
|
|
368
|
3
|
|
|
|
|
6
|
push(@excluded, @snps); |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
3
|
|
|
|
|
13
|
$self->excluded(\@excluded); |
372
|
|
|
|
|
|
|
|
373
|
3
|
|
|
|
|
8
|
return $targregion; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub _get_targetregion { |
377
|
9
|
|
|
9
|
|
12
|
my ($self) = @_; |
378
|
|
|
|
|
|
|
|
379
|
9
|
|
|
|
|
24
|
my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures; |
|
177
|
|
|
|
|
204
|
|
380
|
9
|
|
66
|
|
|
30
|
$targregion ||= $self->_define_target; |
381
|
|
|
|
|
|
|
|
382
|
9
|
50
|
|
|
|
16
|
$self->throw("Target region for SiRNA design not defined") unless ($targregion); |
383
|
|
|
|
|
|
|
|
384
|
9
|
|
|
|
|
28
|
my $seq = $targregion->seq->seq; |
385
|
|
|
|
|
|
|
# but this way I loose start info |
386
|
9
|
|
|
|
|
34
|
my $targstart = $targregion->start; |
387
|
|
|
|
|
|
|
|
388
|
9
|
|
|
|
|
20
|
return ($seq, $targstart); |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# MOVE to SiRNA::Ruleset::tuschl |
392
|
|
|
|
|
|
|
# sub _regex { |
393
|
|
|
|
|
|
|
# my ($self, $rank) = @_; |
394
|
|
|
|
|
|
|
# return $PATTERNS{$rank}; |
395
|
|
|
|
|
|
|
# } |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# sub _get_oligos { |
398
|
|
|
|
|
|
|
# # use regular expressions to pull out oligos |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# my ($self, $rank) = @_; |
401
|
|
|
|
|
|
|
# my $regex = $self->_regex($rank); |
402
|
|
|
|
|
|
|
# my @exclude; |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures; |
406
|
|
|
|
|
|
|
# my $seq = $targregion->seq->seq; |
407
|
|
|
|
|
|
|
# # but this way I loose start info |
408
|
|
|
|
|
|
|
# my $targstart = $targregion->start; |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# # exclude masked region |
411
|
|
|
|
|
|
|
# push(@exclude, grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures); |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
# # add SNP checking |
414
|
|
|
|
|
|
|
# if ($self->avoid_snps) { |
415
|
|
|
|
|
|
|
# my @snps = grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures; |
416
|
|
|
|
|
|
|
# push(@exclude, @snps); |
417
|
|
|
|
|
|
|
# } |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
# while ( $seq =~ /$regex/gi ) { |
420
|
|
|
|
|
|
|
# my $target = $1; |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
# # check for too many Gs (or Cs on the other strand) |
423
|
|
|
|
|
|
|
# next if ( $target =~ /G{ $self->gstring,}/io ); |
424
|
|
|
|
|
|
|
# next if ( $target =~ /C{ $self->gstring,}/io ); |
425
|
|
|
|
|
|
|
# # skip Ns (for filtering) |
426
|
|
|
|
|
|
|
# next if ( $target =~ /N/i); |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# my $start = length($`) + $targstart; |
429
|
|
|
|
|
|
|
# my $stop = $start + length($target) -1; |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
# my @gc = ( $target =~ /G|C/gi); |
432
|
|
|
|
|
|
|
# my $fxGC = sprintf("%2.2f", (scalar(@gc) / length($target))); |
433
|
|
|
|
|
|
|
# next if ($fxGC < $self->min_gc); |
434
|
|
|
|
|
|
|
# next if ($fxGC > $self->max_gc); |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
# my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start, |
437
|
|
|
|
|
|
|
# -end => $stop, |
438
|
|
|
|
|
|
|
# -strand => 1, |
439
|
|
|
|
|
|
|
# -seq => _get_sense($target), |
440
|
|
|
|
|
|
|
# -source_tag => ref($self), |
441
|
|
|
|
|
|
|
# ); |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
# my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start, |
444
|
|
|
|
|
|
|
# -end => $stop, |
445
|
|
|
|
|
|
|
# -strand => -1, |
446
|
|
|
|
|
|
|
# -seq => _get_anti($target), |
447
|
|
|
|
|
|
|
# -source_tag => ref($self), |
448
|
|
|
|
|
|
|
# ); |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank => $rank, |
451
|
|
|
|
|
|
|
# -fxGC => $fxGC, |
452
|
|
|
|
|
|
|
# -sense => $sense, |
453
|
|
|
|
|
|
|
# -antisense => $asense, |
454
|
|
|
|
|
|
|
# -source_tag => ref($self), |
455
|
|
|
|
|
|
|
# ); |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
# unless ($self->_has_overlap($sirna, \@exclude)) { |
458
|
|
|
|
|
|
|
# $self->target->add_SeqFeature($sirna); |
459
|
|
|
|
|
|
|
# } |
460
|
|
|
|
|
|
|
# } |
461
|
|
|
|
|
|
|
# } |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=head2 add_oligos |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
Title : add_oligos |
466
|
|
|
|
|
|
|
Usage : $sirna_designer->add_oligos($sequence, $start, $rank); |
467
|
|
|
|
|
|
|
Purpose : Add SiRNA olgos to target Bio::Seq as Bio::SeqFeature::SiRNA::Pair objects |
468
|
|
|
|
|
|
|
Args : Oligo sequence and start position (required), rank/score (optional) |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub add_oligos { |
473
|
312
|
|
|
312
|
1
|
345
|
my ($self, $seq, $start, $rank) = @_; |
474
|
|
|
|
|
|
|
|
475
|
312
|
50
|
|
|
|
365
|
($seq) or throw ('No sequence supplied for add_oligos'); |
476
|
312
|
50
|
|
|
|
399
|
(defined $start) or throw ('No start position specified for add_oligos'); |
477
|
|
|
|
|
|
|
|
478
|
312
|
|
|
|
|
278
|
my ($end) = $start + length($seq); |
479
|
|
|
|
|
|
|
|
480
|
312
|
|
|
|
|
495
|
my ($sseq) = $self->_get_sense($seq); |
481
|
312
|
|
|
|
|
1321
|
my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start, |
482
|
|
|
|
|
|
|
-end => ($start + length($sseq)), |
483
|
|
|
|
|
|
|
-strand => 1, |
484
|
|
|
|
|
|
|
-seq => $sseq, |
485
|
|
|
|
|
|
|
-source_tag => ref($self), |
486
|
|
|
|
|
|
|
); |
487
|
|
|
|
|
|
|
|
488
|
312
|
|
|
|
|
724
|
my $aseq = $self->_get_anti($seq); |
489
|
312
|
|
|
|
|
1206
|
my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $end, |
490
|
|
|
|
|
|
|
-end => ($end - length($aseq)), |
491
|
|
|
|
|
|
|
-strand => -1, |
492
|
|
|
|
|
|
|
-seq => $aseq, |
493
|
|
|
|
|
|
|
-source_tag => ref($self), |
494
|
|
|
|
|
|
|
); |
495
|
|
|
|
|
|
|
|
496
|
312
|
|
|
|
|
1142
|
my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank => $rank, |
497
|
|
|
|
|
|
|
# -fxGC => $fxGC, |
498
|
|
|
|
|
|
|
-sense => $sense, |
499
|
|
|
|
|
|
|
-antisense => $asense, |
500
|
|
|
|
|
|
|
-source_tag => ref($self), |
501
|
|
|
|
|
|
|
); |
502
|
|
|
|
|
|
|
|
503
|
312
|
50
|
|
|
|
1615
|
unless ($self->_has_overlap($sirna, $self->excluded)) { |
504
|
312
|
|
|
|
|
451
|
$self->target->add_SeqFeature($sirna); |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
sub _has_overlap { |
509
|
|
|
|
|
|
|
# flag any pairs that overlap an UNDESIRED feature (eg SNP) |
510
|
|
|
|
|
|
|
# return true if there is overlap, false if not |
511
|
|
|
|
|
|
|
|
512
|
312
|
|
|
312
|
|
257
|
my ($self, $test, $flist) = @_; |
513
|
312
|
50
|
|
|
|
529
|
print STDERR "Checking oligo at ", $test->start, " to ",$test->end, "\n" |
514
|
|
|
|
|
|
|
if ($self->debug); |
515
|
|
|
|
|
|
|
|
516
|
312
|
|
|
|
|
553
|
foreach my $feat (@$flist) { |
517
|
0
|
0
|
0
|
|
|
0
|
if (($test->start <= $feat->end) and ($test->end >= $feat->start)) { |
518
|
0
|
0
|
|
|
|
0
|
print STDERR "Overlaps ", $feat->primary_tag, " at ", |
519
|
|
|
|
|
|
|
$feat->start, " to ", $feat->end, "\n" if ($self->debug); |
520
|
0
|
|
|
|
|
0
|
return 1; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
} |
523
|
312
|
|
|
|
|
485
|
return 0; # default - no overlap |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
# MOVE to SiRNA::Ruleset::tuschl |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
# sub _get_sense { |
529
|
|
|
|
|
|
|
# my ($target) = @_; |
530
|
|
|
|
|
|
|
# # trim off 1st 2 nt to get overhang |
531
|
|
|
|
|
|
|
# $target =~ s/^..//; |
532
|
|
|
|
|
|
|
# # convert T's to U's (transcribe) |
533
|
|
|
|
|
|
|
# $target =~ s/T/U/gi; |
534
|
|
|
|
|
|
|
# # force last 2 nt to be T's |
535
|
|
|
|
|
|
|
# $target =~ s/..$/TT/; |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
# return $target; |
538
|
|
|
|
|
|
|
# } |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
# sub _get_anti { |
541
|
|
|
|
|
|
|
# my ($target) = @_; |
542
|
|
|
|
|
|
|
# my @target = split(//, $target); |
543
|
|
|
|
|
|
|
# my ($nt,@antitarget); |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
# while ($nt = pop @target) { |
546
|
|
|
|
|
|
|
# push(@antitarget, $COMP{$nt}); |
547
|
|
|
|
|
|
|
# } |
548
|
|
|
|
|
|
|
# my $anti = join('', @antitarget); |
549
|
|
|
|
|
|
|
# # trim off 1st 2 nt to get overhang |
550
|
|
|
|
|
|
|
# $anti =~ s/^..//; |
551
|
|
|
|
|
|
|
# # convert T's to U's |
552
|
|
|
|
|
|
|
# $anti =~ s/T/U/gi; |
553
|
|
|
|
|
|
|
# # convert last 2 NT's to T |
554
|
|
|
|
|
|
|
# $anti =~ s/..$/TT/; |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
# return $anti; |
557
|
|
|
|
|
|
|
# } |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
sub AUTOLOAD { |
561
|
2047
|
|
|
2047
|
|
1655
|
my ($self, $value) = @_; |
562
|
2047
|
|
|
|
|
1493
|
my $name = $AUTOLOAD; |
563
|
2047
|
|
|
|
|
4399
|
$name =~ s/.+:://; |
564
|
|
|
|
|
|
|
|
565
|
2047
|
50
|
|
|
|
3038
|
return if ($name eq 'DESTROY'); |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
|
568
|
2047
|
100
|
|
|
|
2248
|
if (defined $value) { |
569
|
4
|
|
|
|
|
9
|
$self->{$name} = $value; |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
|
572
|
2047
|
50
|
|
|
|
2632
|
unless (exists $self->{$name}) { |
573
|
0
|
|
|
|
|
0
|
$self->throw("Attribute $name not defined for ". ref($self)); |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
2047
|
|
|
|
|
5294
|
return $self->{$name}; |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
sub _comp { |
580
|
7176
|
|
|
7176
|
|
4782
|
my ($self, $char) = @_; |
581
|
|
|
|
|
|
|
|
582
|
7176
|
50
|
|
|
|
7688
|
return unless ($char); |
583
|
7176
|
|
|
|
|
4300
|
$char = uc($char); |
584
|
7176
|
|
|
|
|
12335
|
return $COMP{ $char }; |
585
|
|
|
|
|
|
|
} |
586
|
|
|
|
|
|
|
1; |