line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::DOOP::Util::Run::Fuzznuc; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
33
|
|
4
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
30
|
|
5
|
1
|
|
|
1
|
|
5
|
use Carp qw(cluck carp verbose); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2004
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Bio::DOOP::Util::Run::Fuzznuc - Fuzznuc module |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 VERSION |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Version 0.7 |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=cut |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our $VERSION = '0.7'; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use Bio::DOOP::DOOP; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
$db = Bio::DOOP::DBSQL->connect("user","pass","doop-plant-1_5","localhost"); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
@list = ("81001020","81001110","81001200","81001225","81001230","81001290","81001470","81001580","81001610","81001620","81001680"); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$fuzznuc = Bio::DOOP::Util::Run::Fuzznuc->new($db,'500','M',\@list,"/data/DOOP/dummy.txt"); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
print $fuzznuc->get_tmp_file_name,"\n"; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
$error = $fuzznuc->run('TTGGGC' , 1 , 0); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
if ($error == -1){ |
36
|
|
|
|
|
|
|
die "No results or error!\n"; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
@res = @{$fuzznuc->get_results}; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
for $result (@res){ |
42
|
|
|
|
|
|
|
print $$result[0]->get_id,"| ",$$result[1]," ",$$result[2]," ",$$result[3]," ",$$result[4],"\n"; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 DESCRIPTION |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
This module is a wrapper for the EMBOSS (http://emboss.sourceforge.net) program fuzznuc. You can search |
48
|
|
|
|
|
|
|
for patterns in the promoter sequences. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head1 AUTHORS |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
Tibor Nagy, Godollo, Hungary and Endre Sebestyen, Martonvasar, Hungary |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head1 METHODS |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head2 new |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Create new Fuzznuc object. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
Arguments: |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
1. Bio::DOOP::DBSQL object |
63
|
|
|
|
|
|
|
2. promoter type (500, 1000, 3000) |
64
|
|
|
|
|
|
|
3. subset type (depends on reference species) |
65
|
|
|
|
|
|
|
4. arrayref of clusters |
66
|
|
|
|
|
|
|
5. temporary file name (default: /tmp/fuzznuc_run.txt, will contain fasta sequences) |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
$fuzznuc = Bio::DOOP::Util::Run::Fuzznuc->new($db,500,'M',\@list,'/tmp/tmpfile'); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=cut |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub new { |
73
|
0
|
|
|
0
|
1
|
|
my $self = {}; |
74
|
0
|
|
|
|
|
|
my $dummy = shift; |
75
|
0
|
|
|
|
|
|
my $db = shift; |
76
|
0
|
|
|
|
|
|
my $promo_type = shift; |
77
|
0
|
|
|
|
|
|
my $subset_type = shift; |
78
|
0
|
|
|
|
|
|
my $cluster_id_list = shift; |
79
|
0
|
|
|
|
|
|
my $tmp_filename = shift; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# TODO use File::Temp module |
82
|
0
|
0
|
|
|
|
|
if (!$tmp_filename) { $tmp_filename = "/tmp/fuzznuc_run.txt" } |
|
0
|
|
|
|
|
|
|
83
|
0
|
|
|
|
|
|
open TMP,">$tmp_filename"; |
84
|
0
|
|
|
|
|
|
for my $cl_id (@{$cluster_id_list}){ |
|
0
|
|
|
|
|
|
|
85
|
0
|
|
|
|
|
|
my $cl = Bio::DOOP::Cluster->new($db,,$cl_id,$promo_type); |
86
|
0
|
0
|
|
|
|
|
if ($cl == -1){ next } |
|
0
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
my $subset = $cl->get_subset_by_type($subset_type); |
88
|
0
|
0
|
|
|
|
|
if ($subset == -1){ next } |
|
0
|
|
|
|
|
|
|
89
|
0
|
|
|
|
|
|
my @seqs = @{$subset->get_all_seqs}; |
|
0
|
|
|
|
|
|
|
90
|
0
|
|
|
|
|
|
for my $seq (@seqs){ |
91
|
0
|
|
|
|
|
|
print TMP ">",$seq->get_id,"\n"; |
92
|
0
|
|
|
|
|
|
print TMP $seq->get_raw_seq,"\n\n"; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
} |
95
|
0
|
|
|
|
|
|
close TMP; |
96
|
0
|
|
|
|
|
|
$self->{DB} = $db; |
97
|
0
|
|
|
|
|
|
$self->{CLLIST} = $cluster_id_list; |
98
|
0
|
|
|
|
|
|
$self->{TMP_FILE} = $tmp_filename; |
99
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
bless $self; |
101
|
0
|
|
|
|
|
|
return($self); |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head2 new_by_file |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Create new fuzznuc object from query file, containing cluster ids. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Arguments: |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
1. Bio::DOOP::DBSQL object |
111
|
|
|
|
|
|
|
2. promoter type (500, 1000, 3000) |
112
|
|
|
|
|
|
|
3. subset type (depends on reference species) |
113
|
|
|
|
|
|
|
4. file containing cluster ids |
114
|
|
|
|
|
|
|
5. temporary file name (default: /tmp/fuzznuc_run.txt, will contain fasta sequences) |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
$fuzznuc = Bio::DOOP::Util::Run::Fuzznuc->new($db,500,'M','/tmp/clusters.txt','/tmp/tmpfile'); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=cut |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub new_by_file { |
121
|
0
|
|
|
0
|
1
|
|
my $self = {}; |
122
|
0
|
|
|
|
|
|
my $dummy = shift; |
123
|
0
|
|
|
|
|
|
my $db = shift; |
124
|
0
|
|
|
|
|
|
my $promo_type = shift; |
125
|
0
|
|
|
|
|
|
my $subset_type = shift; |
126
|
0
|
|
|
|
|
|
my $filename = shift; |
127
|
0
|
|
|
|
|
|
my $tmp_filename = shift; |
128
|
0
|
|
|
|
|
|
my @cluster_id_list; |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# TODO use File::Temp module |
131
|
0
|
0
|
|
|
|
|
if (!$tmp_filename) { $tmp_filename = "/tmp/fuzznuc_run.txt" } |
|
0
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
|
133
|
0
|
0
|
|
|
|
|
open CLUSTER_ID_FILE,$filename or cluck("No such file or directory!\n"); |
134
|
0
|
0
|
|
|
|
|
open TMP,">$tmp_filename" or cluck("Can't write to the temporary file!\n"); |
135
|
0
|
|
|
|
|
|
while(){ |
136
|
0
|
|
|
|
|
|
chomp; |
137
|
0
|
|
|
|
|
|
my $cl_id = $_; |
138
|
0
|
|
|
|
|
|
push @cluster_id_list,$cl_id; |
139
|
0
|
|
|
|
|
|
my $cl = Bio::DOOP::Cluster->new($db,,$cl_id,$promo_type); |
140
|
0
|
|
|
|
|
|
my $subset = $cl->get_subset_by_type($subset_type); |
141
|
0
|
0
|
|
|
|
|
if ($subset == -1) { next } |
|
0
|
|
|
|
|
|
|
142
|
0
|
|
|
|
|
|
my @seqs = @{$subset->get_all_seqs}; |
|
0
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
|
for my $seq (@seqs){ |
144
|
0
|
|
|
|
|
|
print TMP ">",$seq->get_id,"\n"; |
145
|
0
|
|
|
|
|
|
print TMP $seq->get_raw_seq,"\n\n"; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
} |
148
|
0
|
|
|
|
|
|
close CLUSTER_ID_FILE; |
149
|
0
|
|
|
|
|
|
close TMP; |
150
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
|
$self->{DB} = $db; |
152
|
0
|
|
|
|
|
|
$self->{CLLIST} = \@cluster_id_list; |
153
|
0
|
|
|
|
|
|
$self->{TMP_FILE} = $tmp_filename; |
154
|
|
|
|
|
|
|
|
155
|
0
|
|
|
|
|
|
bless $self; |
156
|
0
|
|
|
|
|
|
return($self); |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 new_by_tmp |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Create new fuzznuc object from existing temporary file, |
162
|
|
|
|
|
|
|
containing query sequences in fasta format. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Arguments: |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
1. Bio::DOOP::DBSQL object |
167
|
|
|
|
|
|
|
2. file containing fasta sequences |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
$fuzznuc = Bio::DOOP::Util::Run::Fuzznuc->new($db,'/tmp/sequences.fasta'); |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub new_by_tmp { |
174
|
0
|
|
|
0
|
1
|
|
my $self = {}; |
175
|
0
|
|
|
|
|
|
my $dummy = shift; |
176
|
0
|
|
|
|
|
|
my $db = shift; |
177
|
0
|
|
|
|
|
|
my $tmp_filename = shift; |
178
|
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
|
$self->{DB} = $db; |
180
|
0
|
|
|
|
|
|
$self->{TMP_FILE} = $tmp_filename; |
181
|
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
|
bless $self; |
183
|
0
|
|
|
|
|
|
return($self); |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=head2 get_tmp_file_name |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
Get the temporary file name. |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
$tempname = $fuzznuc->get_tmp_file_name; |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=cut |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub get_tmp_file_name { |
195
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
196
|
0
|
|
|
|
|
|
return($self->{TMP_FILE}); |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=head2 get_emboss_version |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
Get the installed emboss version. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
$version = $fuzznuc->get_emboss_version; |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=cut |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub get_emboss_version { |
208
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
209
|
0
|
|
|
|
|
|
return($self->{EMBOSSVER}); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=head2 run |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
Runs fuzznuc, returns 0 on success, otherwise -1. |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
Arguments : |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
1. query pattern |
219
|
|
|
|
|
|
|
2. mismatch number |
220
|
|
|
|
|
|
|
3. complement (0 or 1) |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
$fuzznuc_error = $fuzznuc->run('AACCAGGTT','1','1'); |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=cut |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
sub run { |
227
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
228
|
0
|
|
|
|
|
|
my $pattern = shift; |
229
|
0
|
|
|
|
|
|
my $mismatch = shift; |
230
|
0
|
|
|
|
|
|
my $complement = shift; |
231
|
|
|
|
|
|
|
|
232
|
0
|
|
|
|
|
|
my $file = $self->{TMP_FILE}; |
233
|
|
|
|
|
|
|
|
234
|
0
|
|
|
|
|
|
my @result = `fuzznuc $file -pattern='$pattern' -sformat=fasta -pmismatch=$mismatch -complement=$complement -stdout -auto`; |
235
|
|
|
|
|
|
|
|
236
|
0
|
|
|
|
|
|
my $seq_id; |
237
|
|
|
|
|
|
|
my $start; |
238
|
0
|
|
|
|
|
|
my $end; |
239
|
0
|
|
|
|
|
|
my $mism; |
240
|
0
|
|
|
|
|
|
my $hitseq; |
241
|
0
|
|
|
|
|
|
my @parsed; |
242
|
0
|
|
|
|
|
|
my $strand; |
243
|
|
|
|
|
|
|
|
244
|
0
|
0
|
|
|
|
|
if ($#result == -1) { return(-1) } #No results or an error happened. |
|
0
|
|
|
|
|
|
|
245
|
0
|
|
|
|
|
|
for my $line (@result){ |
246
|
0
|
0
|
|
|
|
|
if ($line =~ / Sequence: (\S+)/){ |
247
|
0
|
|
|
|
|
|
$seq_id = $1; |
248
|
|
|
|
|
|
|
} |
249
|
0
|
0
|
|
|
|
|
if ($line =~ /\s+(\d+)\s+(\d+)\s+(\w+)\s+([0123456789.]+)\s+(\w+)/){ |
250
|
0
|
|
|
|
|
|
$start = $1; |
251
|
0
|
|
|
|
|
|
$end = $2; |
252
|
0
|
|
|
|
|
|
$mism = $4; |
253
|
0
|
|
|
|
|
|
$hitseq = $5; |
254
|
0
|
|
|
|
|
|
$mism =~ s/\./0/; |
255
|
0
|
0
|
|
|
|
|
$strand = $start < $end ? 1 : -1; |
256
|
0
|
|
|
|
|
|
push @parsed, "$seq_id $start $end $mism $hitseq $strand"; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
0
|
|
|
|
|
|
$self->{RESULT} = \@parsed; |
261
|
0
|
|
|
|
|
|
return(0); |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=head2 run_background |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
Runs fuzznuc in background, returns the process id. |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
Arguments : |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
1. query pattern |
271
|
|
|
|
|
|
|
2. mismatch number |
272
|
|
|
|
|
|
|
3. complement (0 or 1) |
273
|
|
|
|
|
|
|
4. output filename |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
$fuzznuc_pid = $fuzznuc->run_background('AACCAGGTT','1','1','/tmp/fuzznuc_result.txt'); |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=cut |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
sub run_background { |
280
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
281
|
0
|
|
|
|
|
|
my $pattern = shift; |
282
|
0
|
|
|
|
|
|
my $mismatch = shift; |
283
|
0
|
|
|
|
|
|
my $complement = shift; |
284
|
0
|
|
|
|
|
|
my $outfile = shift; |
285
|
0
|
|
|
|
|
|
my $file = $self->{TMP_FILE}; |
286
|
0
|
|
|
|
|
|
my $pid; |
287
|
|
|
|
|
|
|
|
288
|
0
|
0
|
|
|
|
|
unless($pid = fork){ |
289
|
0
|
|
|
|
|
|
`fuzznuc $file -pattern='$pattern' -pmismatch=$mismatch -sformat=fasta -complement=$complement -outfile=$outfile`; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
0
|
|
|
|
|
|
return($pid); |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=head2 get_raw_results |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
Returns an arrayref of arrays with the raw fuzznuc results, without Bio::DOOP objects. |
298
|
|
|
|
|
|
|
This is much faster as it does not use the database. |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
The results contain the following: |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
1. sequence ID |
303
|
|
|
|
|
|
|
2. hit start |
304
|
|
|
|
|
|
|
3. hit end |
305
|
|
|
|
|
|
|
4. mismatch number |
306
|
|
|
|
|
|
|
5. hit sequence |
307
|
|
|
|
|
|
|
6. hit strand |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
@result = @{$fuzznuc->get_raw_results}; |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=cut |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
sub get_raw_results { |
314
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
315
|
|
|
|
|
|
|
|
316
|
0
|
|
|
|
|
|
my @fuzznuc_res; |
317
|
0
|
|
|
|
|
|
my $res = $self->{RESULT}; |
318
|
0
|
|
|
|
|
|
my $seq_id; |
319
|
|
|
|
|
|
|
my $start; |
320
|
0
|
|
|
|
|
|
my $end; |
321
|
0
|
|
|
|
|
|
my $mism; |
322
|
0
|
|
|
|
|
|
my $hitseq; |
323
|
0
|
|
|
|
|
|
my $strand; |
324
|
|
|
|
|
|
|
|
325
|
0
|
|
|
|
|
|
for my $line (@{$res}){ |
|
0
|
|
|
|
|
|
|
326
|
0
|
|
|
|
|
|
($seq_id,$start,$end,$mism,$hitseq,$strand) = split(/\s+/,$line); |
327
|
|
|
|
|
|
|
|
328
|
0
|
|
|
|
|
|
push @fuzznuc_res,[$seq_id,$start,$end,$mism,$hitseq,$strand]; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
0
|
|
|
|
|
|
return(\@fuzznuc_res); |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=head2 get_results |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Returns an arrayref of arrays with sequence objects and other information of the results. |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
The results contain the following: |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
1. Bio::DOOP::Sequence object |
341
|
|
|
|
|
|
|
2. hit start |
342
|
|
|
|
|
|
|
3. hit end |
343
|
|
|
|
|
|
|
4. mismatch number |
344
|
|
|
|
|
|
|
5. hit sequence |
345
|
|
|
|
|
|
|
6. hit strand |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
@result = @{$fuzznuc->get_raw_results}; |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=cut |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub get_results { |
352
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
353
|
|
|
|
|
|
|
|
354
|
0
|
|
|
|
|
|
my @fuzznuc_res; |
355
|
0
|
|
|
|
|
|
my $res = $self->{RESULT}; |
356
|
0
|
|
|
|
|
|
my $seq_id; |
357
|
|
|
|
|
|
|
my $start; |
358
|
0
|
|
|
|
|
|
my $end; |
359
|
0
|
|
|
|
|
|
my $mism; |
360
|
0
|
|
|
|
|
|
my $hitseq; |
361
|
0
|
|
|
|
|
|
my $strand; |
362
|
|
|
|
|
|
|
|
363
|
0
|
|
|
|
|
|
for my $line (@{$res}){ |
|
0
|
|
|
|
|
|
|
364
|
0
|
|
|
|
|
|
($seq_id,$start,$end,$mism,$hitseq,$strand) = split(/\s+/,$line); |
365
|
|
|
|
|
|
|
|
366
|
0
|
|
|
|
|
|
my $seq = Bio::DOOP::Sequence->new_from_dbid($self->{DB},$seq_id); |
367
|
0
|
|
|
|
|
|
push @fuzznuc_res,[$seq,$start,$end,$mism,$hitseq,$strand]; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
|
370
|
0
|
|
|
|
|
|
return(\@fuzznuc_res); |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head2 get_results_from_file |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
Returns an arrayref of arrays with sequence objects and other information of the results |
376
|
|
|
|
|
|
|
from a results file. With this method you can fetch the results of different fuzznuc objects. |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
The results contain the following: |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
1. Bio::DOOP::Sequence object |
381
|
|
|
|
|
|
|
2. hit start |
382
|
|
|
|
|
|
|
3. hit end |
383
|
|
|
|
|
|
|
4. mismatch number |
384
|
|
|
|
|
|
|
5. hit sequence |
385
|
|
|
|
|
|
|
6. hit strand |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
@result = @{$fuzznuc->get_results_from_file}; |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=cut |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub get_results_from_file { |
392
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
393
|
0
|
|
|
|
|
|
my $filename = shift; |
394
|
|
|
|
|
|
|
|
395
|
0
|
|
|
|
|
|
my $seq_id; |
396
|
|
|
|
|
|
|
my $start; |
397
|
0
|
|
|
|
|
|
my $end; |
398
|
0
|
|
|
|
|
|
my $mism; |
399
|
0
|
|
|
|
|
|
my $hitseq; |
400
|
0
|
|
|
|
|
|
my @parsed; |
401
|
0
|
|
|
|
|
|
my $strand; |
402
|
|
|
|
|
|
|
|
403
|
0
|
0
|
|
|
|
|
open FILE, $filename or return(-1); |
404
|
0
|
|
|
|
|
|
while(){ |
405
|
0
|
|
|
|
|
|
chomp; |
406
|
0
|
|
|
|
|
|
my $line = $_; |
407
|
0
|
0
|
|
|
|
|
if ($line =~ / Sequence: (\S+)/){ |
408
|
0
|
|
|
|
|
|
$seq_id = $1; |
409
|
|
|
|
|
|
|
} |
410
|
0
|
0
|
|
|
|
|
if ($line =~ /\s+(\d+)\s+(\d+)\s+(\w+)\s+([0123456789.]+)\s+(\w+)/){ |
411
|
0
|
|
|
|
|
|
$start = $1; |
412
|
0
|
|
|
|
|
|
$end = $2; |
413
|
0
|
|
|
|
|
|
$mism = $4; |
414
|
0
|
|
|
|
|
|
$hitseq = $5; |
415
|
0
|
|
|
|
|
|
$mism =~ s/\./0/; |
416
|
0
|
0
|
|
|
|
|
$strand = $start < $end ? 1 : -1; |
417
|
0
|
|
|
|
|
|
my $seq = Bio::DOOP::Sequence->new($self->{DB},$seq_id); |
418
|
0
|
|
|
|
|
|
push @parsed, [$seq,$start,$end,$mism,$hitseq,$strand]; |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
} |
421
|
0
|
|
|
|
|
|
close FILE; |
422
|
|
|
|
|
|
|
|
423
|
0
|
|
|
|
|
|
$self->{RESULT} = \@parsed; |
424
|
0
|
|
|
|
|
|
return(\@parsed); |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
1; |