line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Bio::Polloc::Rule::crispr - A rule of type CRISPR |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 DESCRIPTION |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Runs CRISPRFinder v3 to search CRISPRs |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 AUTHOR - Luis M. Rodriguez-R |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Email lmrodriguezr at gmail dot com |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=cut |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
package Bio::Polloc::Rule::crispr; |
16
|
1
|
|
|
1
|
|
912
|
use base qw(Bio::Polloc::RuleI); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
728
|
|
17
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
26
|
|
18
|
1
|
|
|
1
|
|
5
|
use Bio::Polloc::Polloc::IO; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
18
|
|
19
|
1
|
|
|
1
|
|
590
|
use Bio::Polloc::LocusI; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
31
|
|
20
|
1
|
|
|
1
|
|
1350
|
use Bio::SeqIO; |
|
1
|
|
|
|
|
69433
|
|
|
1
|
|
|
|
|
57
|
|
21
|
|
|
|
|
|
|
# For CRISPRFinder: |
22
|
1
|
|
|
1
|
|
10
|
use Cwd; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2181
|
|
23
|
|
|
|
|
|
|
our $VERSION = 1.0503; # [a-version] from Bio::Polloc::Polloc::Version |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 APPENDIX |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Methods provided by the package |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head2 new |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=over |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=item |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Generic initialization method. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=back |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=cut |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub new { |
43
|
0
|
|
|
0
|
1
|
0
|
my($caller,@args) = @_; |
44
|
0
|
|
|
|
|
0
|
my $self = $caller->SUPER::new(@args); |
45
|
0
|
|
|
|
|
0
|
$self->_initialize(@args); |
46
|
0
|
|
|
|
|
0
|
return $self; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head2 execute |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=over |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=item |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Runs CRISPRfinder and parses the output. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=item Arguments |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=over |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item -seq I<Bio::Seq or Bio::SeqIO obj> |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
The sequence(s). |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=back |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item Returns |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
An array reference populated with L<Bio::Polloc::Locus::crispr> objects |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=back |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
sub execute { |
76
|
0
|
|
|
0
|
1
|
0
|
my($self,@args) = @_; |
77
|
0
|
|
|
|
|
0
|
my($seq) = $self->_rearrange([qw(SEQ)], @args); |
78
|
|
|
|
|
|
|
|
79
|
0
|
0
|
|
|
|
0
|
$self->throw("You must provide a sequence to evaluate the rule", $seq) unless $seq; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# For Bio::SeqIO objects |
82
|
0
|
0
|
|
|
|
0
|
if($seq->isa('Bio::SeqIO')){ |
83
|
0
|
|
|
|
|
0
|
my @feats = (); |
84
|
0
|
|
|
|
|
0
|
while(my $s = $seq->next_seq){ |
85
|
0
|
|
|
|
|
0
|
push(@feats, @{$self->execute(-seq=>$s)}) |
|
0
|
|
|
|
|
0
|
|
86
|
|
|
|
|
|
|
} |
87
|
0
|
0
|
|
|
|
0
|
return wantarray ? @feats : \@feats; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
0
|
0
|
|
|
|
0
|
$self->throw("Illegal class of sequence '".ref($seq)."'", $seq) unless $seq->isa('Bio::Seq'); |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# Create the IO master |
93
|
0
|
|
|
|
|
0
|
my $io = Bio::Polloc::Polloc::IO->new(); |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# Search for CRISPRFinder |
96
|
0
|
|
|
|
|
0
|
$self->source('CRISPRFinder'); |
97
|
0
|
|
|
|
|
0
|
my $cf_script; |
98
|
0
|
0
|
|
|
|
0
|
if(defined $self->ruleset){ |
99
|
0
|
|
|
|
|
0
|
$cf_script = $self->_executable($self->ruleset->value("path")); |
100
|
0
|
|
0
|
|
|
0
|
$cf_script||= $self->_executable($self->ruleset->value("root")); |
101
|
0
|
|
0
|
|
|
0
|
$cf_script||= $self->_executable($self->ruleset->value("crisprfinder")); |
102
|
|
|
|
|
|
|
}else{ |
103
|
0
|
|
|
|
|
0
|
$cf_script = $self->_executable; |
104
|
|
|
|
|
|
|
} |
105
|
0
|
0
|
|
|
|
0
|
$cf_script or $self->throw("Could not find the CRISPRFinder executable"); |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# Write the sequence |
108
|
0
|
|
|
|
|
0
|
my($seq_fh, $seq_file) = $io->tempfile(-suffix=>'.fasta'); # required by CRISPRFinder |
109
|
0
|
|
|
|
|
0
|
close $seq_fh; |
110
|
0
|
|
|
|
|
0
|
my $seqO = Bio::SeqIO->new(-file=>">$seq_file", -format=>'Fasta'); |
111
|
0
|
|
|
|
|
0
|
$seqO->write_seq($seq); |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# A tmp output directory |
114
|
0
|
|
|
|
|
0
|
my $out_dir = $io->tempdir(); |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# Run it |
117
|
0
|
|
|
|
|
0
|
$self->debug("Sequence file: $seq_file (".(-s $seq_file).")"); |
118
|
0
|
|
|
|
|
0
|
my $cwd = cwd(); |
119
|
0
|
|
|
|
|
0
|
my @run = ($cf_script, $seq_file, "result"); |
120
|
0
|
|
|
|
|
0
|
push @run, "2>&1"; |
121
|
0
|
|
|
|
|
0
|
push @run, "|"; |
122
|
0
|
0
|
|
|
|
0
|
chdir $out_dir or $self->throw("I can not move myself to the temporal directory: $!", $out_dir); |
123
|
0
|
|
|
|
|
0
|
$self->debug("Hello from ".cwd()); |
124
|
0
|
|
|
|
|
0
|
$self->debug("Running: ".join(" ",@run)); |
125
|
0
|
|
|
|
|
0
|
my $run = Bio::Polloc::Polloc::IO->new(-file=>join(" ",@run)); |
126
|
0
|
|
|
|
|
0
|
my $gff_output; |
127
|
0
|
|
|
|
|
0
|
while(my $line = $run->_readline){ |
128
|
0
|
|
|
|
|
0
|
chomp $line; |
129
|
0
|
0
|
|
|
|
0
|
if($line =~ m/^GFF results are be in (.*)/){ $gff_output = $1 } |
|
0
|
0
|
|
|
|
0
|
|
130
|
0
|
|
|
|
|
0
|
elsif($line =~ m/^failure: (.*)/){ $self->throw("CRISPRFinder error", $1) } |
131
|
|
|
|
|
|
|
} |
132
|
0
|
|
|
|
|
0
|
$run->close(); |
133
|
0
|
0
|
|
|
|
0
|
unless (-e $gff_output){ |
134
|
0
|
|
|
|
|
0
|
$self->warn("Unexistent CRISPRFinder GFF output, probably something went wrong"); |
135
|
0
|
|
|
|
|
0
|
return []; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# Unfortunatelly, CRISPRFinder's GFF contains unsupported fields in the last column, |
139
|
|
|
|
|
|
|
# therefore I should not directly import it using Polloc::LocusIO |
140
|
0
|
|
|
|
|
0
|
$self->debug("Reading GFF at $gff_output"); |
141
|
0
|
|
|
|
|
0
|
my $gff = Bio::Polloc::Polloc::IO->new(-file=>$gff_output); |
142
|
0
|
|
|
|
|
0
|
my $loci = {}; |
143
|
0
|
|
|
|
|
0
|
while(my $line = $gff->_readline){ |
144
|
0
|
|
|
|
|
0
|
chomp $line; |
145
|
0
|
0
|
|
|
|
0
|
next if $line =~ /^#/; |
146
|
0
|
0
|
|
|
|
0
|
next if $line =~ /^\s*$/; |
147
|
0
|
|
|
|
|
0
|
my @f = split /\t/, $line; |
148
|
0
|
0
|
0
|
|
|
0
|
next if $f[2] eq 'PossibleCRISPR' and $self->_search_value("IGNOREPROBABLE"); |
149
|
0
|
|
|
|
|
0
|
my %par = map { split /=/, $_, 2 } split /;/, $f[8]; |
|
0
|
|
|
|
|
0
|
|
150
|
0
|
0
|
0
|
|
|
0
|
if($f[2] =~ /^(?:Possible)?CRISPR$/){ |
|
|
0
|
|
|
|
|
|
151
|
0
|
|
|
|
|
0
|
my $id = $self->_next_child_id; |
152
|
0
|
0
|
|
|
|
0
|
$loci->{$par{ID}} = { |
|
|
0
|
|
|
|
|
|
153
|
|
|
|
|
|
|
-type=>$self->type, -rule=>$self, -seq=>$seq, |
154
|
|
|
|
|
|
|
-from=>$f[3]+0, -to=>$f[4]+0, -strand=>'.', |
155
|
|
|
|
|
|
|
-name=>$self->name, |
156
|
|
|
|
|
|
|
-id=>(defined $id ? $id : ''), |
157
|
|
|
|
|
|
|
-score=>($f[2] eq 'CRISPR' ? 100 : 50), |
158
|
|
|
|
|
|
|
-dr=>$par{DR}, -spacers_no=>$par{Number_of_spacers}, |
159
|
|
|
|
|
|
|
-spacers=>[], |
160
|
|
|
|
|
|
|
}; |
161
|
|
|
|
|
|
|
}elsif(defined $loci->{$par{Parent}} and $f[2] eq 'CRISPRspacer'){ |
162
|
0
|
|
|
|
|
0
|
push @{$loci->{$par{Parent}}->{-spacers}}, {-from=>$f[3]+0, -to=>$f[4]+0, -raw_seq=>$par{sequence}}; |
|
0
|
|
|
|
|
0
|
|
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
} |
165
|
0
|
|
|
|
|
0
|
$gff->close(); |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# Clean the mess |
168
|
0
|
|
|
|
|
0
|
$self->rrmdir('result'); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# Back to reality |
171
|
0
|
0
|
|
|
|
0
|
chdir $cwd or $self->throw("I can not come back to the previous folder: $!", $cwd); |
172
|
0
|
|
|
|
|
0
|
$self->debug("Hello from ".cwd()); |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Create loci |
175
|
|
|
|
|
|
|
# This is not directly done above because of the different CWD, which could cause problems |
176
|
|
|
|
|
|
|
# while dynamically loading Bio::Polloc::Locus::crispr from Bio::Polloc::LocusI |
177
|
0
|
|
|
|
|
0
|
my $out = []; |
178
|
0
|
|
|
|
|
0
|
for my $locus (values %$loci){ |
179
|
0
|
|
|
|
|
0
|
$self->debug("Creating locus"); |
180
|
0
|
|
|
|
|
0
|
my $L = new Bio::Polloc::LocusI(%$locus); |
181
|
0
|
|
|
|
|
0
|
for my $s (@{$locus->{-spacers}}){ |
|
0
|
|
|
|
|
0
|
|
182
|
0
|
|
|
|
|
0
|
$L->add_spacer(%$s); |
183
|
|
|
|
|
|
|
} |
184
|
0
|
|
|
|
|
0
|
push @$out, $L; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
0
|
|
|
|
|
0
|
return $out; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head2 stringify_value |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=over |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=item |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Stringifies the requested value |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=back |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=cut |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub stringify_value { |
203
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
204
|
0
|
|
|
|
|
0
|
my $out = ""; |
205
|
0
|
|
|
|
|
0
|
for my $k (keys %{$self->value}){ |
|
0
|
|
|
|
|
0
|
|
206
|
0
|
0
|
|
|
|
0
|
$out.= "$k=>".(defined $self->value->{$k} ? $self->value->{$k} : "")." "; |
207
|
|
|
|
|
|
|
} |
208
|
0
|
|
|
|
|
0
|
return $out; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 value |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=over |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=item Arguments |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Value (I<str> or I<hashref> or I<arrayref>). The supported keys are: |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=over |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=item -ignoreprobable |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Should I ignore the I<ProbableCrispr> results? |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=back |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=item Returns |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
Value (I<hashref> or C<undef>). |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=back |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head1 INTERNAL METHODS |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
Methods intended to be used only within the scope of Bio::Polloc::* |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head2 _parameters |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=cut |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub _parameters { |
242
|
0
|
|
|
0
|
|
0
|
return [qw(IGNOREPROBABLE)]; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=head2 _executable |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
Attempts to find the CRISPRfinder script. |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=cut |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub _executable { |
252
|
1
|
|
|
1
|
|
53870
|
my($self, $path) = @_; |
253
|
1
|
|
|
|
|
4
|
my $exe; |
254
|
|
|
|
|
|
|
my $bin; |
255
|
1
|
|
|
|
|
3
|
my $name = 'CRISPRFinder'; |
256
|
1
|
|
|
|
|
3
|
my $io = "Bio::Polloc::Polloc::IO"; |
257
|
1
|
|
|
|
|
19
|
$self->debug("Searching the $name binary for $^O"); |
258
|
1
|
|
|
|
|
3
|
my @pre = (''); |
259
|
1
|
50
|
|
|
|
4
|
unshift @pre, $path if $path; |
260
|
1
|
|
|
|
|
3
|
for my $p (@pre){ |
261
|
|
|
|
|
|
|
# Try first WITH version, to avoid v2 |
262
|
1
|
|
|
|
|
4
|
for my $v (("-v3.1", "-v3", "-v3-LK", '')){ |
263
|
4
|
|
|
|
|
8
|
for my $e (('.pl', '')){ |
264
|
8
|
|
|
|
|
14
|
for my $n (("CRISPRFinder", "CRISPRfinder", "crisprfinder")){ |
265
|
24
|
|
|
|
|
111
|
$exe = $io->exists_exe($p . $n . $v . $e); |
266
|
24
|
50
|
|
|
|
78
|
return $exe if $exe; |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=head2 _qualify_value |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
Implements the C<_qualify_value()> from the L<Bio::Polloc::RuleI> interface |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=head2 Return |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
Value (ref-to-hash or undef) |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=cut |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub _qualify_value { |
284
|
0
|
|
|
0
|
|
|
my($self,$value) = @_; |
285
|
0
|
0
|
|
|
|
|
unless (defined $value){ |
286
|
0
|
|
|
|
|
|
$self->warn("Empty value"); |
287
|
0
|
|
|
|
|
|
return; |
288
|
|
|
|
|
|
|
} |
289
|
0
|
0
|
|
|
|
|
if(ref($value) =~ m/hash/i){ |
290
|
0
|
|
|
|
|
|
my @arr = %{$value}; |
|
0
|
|
|
|
|
|
|
291
|
0
|
|
|
|
|
|
$value = \@arr; |
292
|
|
|
|
|
|
|
} |
293
|
0
|
0
|
|
|
|
|
my @args = ref($value) =~ /array/i ? @{$value} : split/\s+/, $value; |
|
0
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
|
my $out = {}; |
295
|
|
|
|
|
|
|
|
296
|
0
|
0
|
|
|
|
|
return $out unless defined $args[0]; |
297
|
0
|
0
|
|
|
|
|
if($args[0] !~ /^-/){ |
298
|
0
|
|
|
|
|
|
$self->warn("Expecting parameters in the format -parameter value", @args); |
299
|
0
|
|
|
|
|
|
return; |
300
|
|
|
|
|
|
|
} |
301
|
0
|
0
|
|
|
|
|
unless($#args%2){ |
302
|
0
|
|
|
|
|
|
$self->warn("Unexpected (odd) number of parameters", @args); |
303
|
0
|
|
|
|
|
|
return; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
0
|
|
|
|
|
|
my %vals = @args; |
307
|
0
|
|
|
|
|
|
for my $k ( @{$self->_parameters} ){ |
|
0
|
|
|
|
|
|
|
308
|
0
|
|
|
|
|
|
my $p = $self->_rearrange([$k], @args); |
309
|
0
|
0
|
|
|
|
|
next unless defined $p; |
310
|
0
|
0
|
|
|
|
|
if( $p !~ /^([\d\.eE+-]+|t(rue)?|f(alse)?)$/i ){ |
311
|
0
|
|
|
|
|
|
$self->warn("Unexpected value for ".$k, $p); |
312
|
0
|
|
|
|
|
|
return; |
313
|
|
|
|
|
|
|
} |
314
|
0
|
0
|
|
|
|
|
$out->{"-".lc $k} = $p=~m/^f(alse)$/i ? 0 : $p; # This is because the str 'false' evaluates as true ;-) |
315
|
|
|
|
|
|
|
} |
316
|
0
|
|
|
|
|
|
return $out; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=head2 _initialize |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=cut |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub _initialize { |
324
|
0
|
|
|
0
|
|
|
my($self,@args) = @_; |
325
|
0
|
|
|
|
|
|
$self->type('CRISPR'); |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
1; |