line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# BioPerl module for Bio::Index::Abstract |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Please direct questions and support issues to |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Cared for by Ewan Birney |
8
|
|
|
|
|
|
|
# and James Gilbert |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Index::Abstract - Abstract interface for indexing a flat file |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
You should not be using this module directly |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 USING DB_FILE |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
To use DB_File and not SDBM for this index, pass the value: |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
-dbm_package => 'DB_File' |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
to new (see below). |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 DESCRIPTION |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
This object provides the basic mechanism to associate positions |
33
|
|
|
|
|
|
|
in files with names. The position and filenames are stored in DBM |
34
|
|
|
|
|
|
|
which can then be accessed later on. It is the equivalent of flat |
35
|
|
|
|
|
|
|
file indexing (eg, SRS or efetch). |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
This object is the guts to the mechanism, which will be used by the |
38
|
|
|
|
|
|
|
specific objects inheriting from it. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 FEEDBACK |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head2 Mailing Lists |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
45
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
46
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
49
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 Support |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
I |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
58
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
59
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
60
|
|
|
|
|
|
|
with code and data examples if at all possible. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head2 Reporting Bugs |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
65
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
66
|
|
|
|
|
|
|
web: |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head1 AUTHOR - Ewan Birney, James Gilbert |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Email - birney@sanger.ac.uk, jgrg@sanger.ac.uk |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head1 APPENDIX |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. Internal |
77
|
|
|
|
|
|
|
methods are usually preceded with an "_" (underscore). |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Let the code begin... |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
package Bio::Index::Abstract; |
85
|
|
|
|
|
|
|
|
86
|
2
|
|
|
2
|
|
14
|
use strict; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
54
|
|
87
|
2
|
|
|
2
|
|
8
|
use Fcntl qw( O_RDWR O_CREAT O_RDONLY ); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
101
|
|
88
|
2
|
|
|
|
|
117
|
use vars qw( $TYPE_AND_VERSION_KEY |
89
|
2
|
|
|
2
|
|
10
|
$USE_DBM_TYPE $DB_HASH ); |
|
2
|
|
|
|
|
4
|
|
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
|
92
|
2
|
|
|
2
|
|
16
|
use Bio::Root::IO; |
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
42
|
|
93
|
2
|
|
|
2
|
|
8
|
use Symbol; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
96
|
|
94
|
|
|
|
|
|
|
|
95
|
2
|
|
|
2
|
|
9
|
use base qw(Bio::Root::Root); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
154
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Generate accessor methods for simple object fields |
98
|
|
|
|
|
|
|
BEGIN { |
99
|
2
|
|
|
2
|
|
6
|
foreach my $func (qw(filename write_flag)) { |
100
|
2
|
|
|
2
|
|
13
|
no strict 'refs'; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
202
|
|
101
|
4
|
|
|
|
|
9
|
my $field = "_$func"; |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
*$func = sub { |
104
|
20
|
|
|
20
|
|
28
|
my( $self, $value ) = @_; |
105
|
|
|
|
|
|
|
|
106
|
20
|
100
|
|
|
|
34
|
if (defined $value) { |
107
|
8
|
|
|
|
|
16
|
$self->{$field} = $value; |
108
|
|
|
|
|
|
|
} |
109
|
20
|
|
|
|
|
83
|
return $self->{$field}; |
110
|
|
|
|
|
|
|
} |
111
|
4
|
|
|
|
|
3783
|
} |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head2 new |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Usage : $index = Bio::Index::Abstract->new( |
117
|
|
|
|
|
|
|
-filename => $dbm_file, |
118
|
|
|
|
|
|
|
-write_flag => 0, |
119
|
|
|
|
|
|
|
-dbm_package => 'DB_File', |
120
|
|
|
|
|
|
|
-verbose => 0); |
121
|
|
|
|
|
|
|
Function: Returns a new index object. If filename is |
122
|
|
|
|
|
|
|
specified, then open_dbm() is immediately called. |
123
|
|
|
|
|
|
|
Bio::Index::Abstract->new() will usually be called |
124
|
|
|
|
|
|
|
directly only when opening an existing index. |
125
|
|
|
|
|
|
|
Returns : A new index object |
126
|
|
|
|
|
|
|
Args : -filename The name of the dbm index file. |
127
|
|
|
|
|
|
|
-write_flag TRUE if write access to the dbm file is |
128
|
|
|
|
|
|
|
needed. |
129
|
|
|
|
|
|
|
-dbm_package The Perl dbm module to use for the |
130
|
|
|
|
|
|
|
index. |
131
|
|
|
|
|
|
|
-verbose Print debugging output to STDERR if |
132
|
|
|
|
|
|
|
TRUE. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub new { |
137
|
4
|
|
|
4
|
1
|
11
|
my($class, @args) = @_; |
138
|
4
|
|
|
|
|
26
|
my $self = $class->SUPER::new(@args); |
139
|
4
|
|
|
|
|
29
|
my( $filename, $write_flag, $dbm_package, $cachesize, $ffactor, $pathtype ) = |
140
|
|
|
|
|
|
|
$self->_rearrange([qw(FILENAME |
141
|
|
|
|
|
|
|
WRITE_FLAG |
142
|
|
|
|
|
|
|
DBM_PACKAGE |
143
|
|
|
|
|
|
|
CACHESIZE |
144
|
|
|
|
|
|
|
FFACTOR |
145
|
|
|
|
|
|
|
PATHTYPE |
146
|
|
|
|
|
|
|
)], @args); |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# Store any parameters passed |
149
|
4
|
50
|
|
|
|
30
|
$self->filename($filename) if $filename; |
150
|
4
|
50
|
|
|
|
11
|
$self->cachesize($cachesize) if $cachesize; |
151
|
4
|
50
|
|
|
|
12
|
$self->ffactor($ffactor) if $ffactor; |
152
|
4
|
50
|
|
|
|
22
|
$self->write_flag($write_flag) if $write_flag; |
153
|
4
|
50
|
|
|
|
10
|
$self->dbm_package($dbm_package) if $dbm_package; |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
#If user doesn't give a path, we default it to absolute |
156
|
4
|
50
|
|
|
|
24
|
$pathtype ? $self->pathtype($pathtype) : $self->pathtype('absolute'); |
157
|
|
|
|
|
|
|
|
158
|
4
|
|
|
|
|
8
|
$self->{'_filehandle'} = []; # Array in which to cache SeqIO objects |
159
|
4
|
|
|
|
|
9
|
$self->{'_DB'} = {}; # Gets tied to the DBM file |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
# Open database |
162
|
4
|
50
|
|
|
|
21
|
$self->open_dbm() if $filename; |
163
|
4
|
|
|
|
|
14
|
return $self; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=pod |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head2 filename |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
Title : filename |
171
|
|
|
|
|
|
|
Usage : $value = $self->filename(); |
172
|
|
|
|
|
|
|
$self->filename($value); |
173
|
|
|
|
|
|
|
Function: Gets or sets the name of the dbm index file. |
174
|
|
|
|
|
|
|
Returns : The current value of filename |
175
|
|
|
|
|
|
|
Args : Value of filename if setting, or none if |
176
|
|
|
|
|
|
|
getting the value. |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=head2 write_flag |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
Title : write_flag |
181
|
|
|
|
|
|
|
Usage : $value = $self->write_flag(); |
182
|
|
|
|
|
|
|
$self->write_flag($value); |
183
|
|
|
|
|
|
|
Function: Gets or sets the value of write_flag, which |
184
|
|
|
|
|
|
|
is whether the dbm file should be opened with |
185
|
|
|
|
|
|
|
write access. |
186
|
|
|
|
|
|
|
Returns : The current value of write_flag (default 0) |
187
|
|
|
|
|
|
|
Args : Value of write_flag if setting, or none if |
188
|
|
|
|
|
|
|
getting the value. |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head2 dbm_package |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Usage : $value = $self->dbm_package(); |
193
|
|
|
|
|
|
|
$self->dbm_package($value); |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
Function: Gets or sets the name of the Perl dbm module used. |
196
|
|
|
|
|
|
|
If the value is unset, then it returns the value of |
197
|
|
|
|
|
|
|
the package variable $USE_DBM_TYPE or if that is |
198
|
|
|
|
|
|
|
unset, then it chooses the best available dbm type, |
199
|
|
|
|
|
|
|
choosing 'DB_File' in preference to 'SDBM_File'. |
200
|
|
|
|
|
|
|
Bio::Abstract::Index may work with other dbm file |
201
|
|
|
|
|
|
|
types. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Returns : The current value of dbm_package |
204
|
|
|
|
|
|
|
Args : Value of dbm_package if setting, or none if |
205
|
|
|
|
|
|
|
getting the value. |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=cut |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub dbm_package { |
210
|
7
|
|
|
7
|
1
|
14
|
my( $self, $value ) = @_; |
211
|
7
|
|
|
|
|
11
|
my $to_require = 0; |
212
|
7
|
100
|
66
|
|
|
32
|
if( $value || ! $self->{'_dbm_package'} ) { |
213
|
4
|
|
100
|
|
|
23
|
my $type = $value || $USE_DBM_TYPE || 'DB_File'; |
214
|
4
|
100
|
|
|
|
20
|
if( $type =~ /DB_File/i ) { |
215
|
2
|
|
|
|
|
4
|
eval { |
216
|
2
|
|
|
|
|
139
|
require DB_File; |
217
|
|
|
|
|
|
|
}; |
218
|
2
|
50
|
|
|
|
14
|
$type = ( $@ ) ? 'SDBM_File' : 'DB_File'; |
219
|
|
|
|
|
|
|
} |
220
|
4
|
50
|
|
|
|
14
|
if( $type ne 'DB_File' ) { |
221
|
4
|
|
|
|
|
9
|
eval { require "$type.pm"; }; |
|
4
|
|
|
|
|
513
|
|
222
|
4
|
50
|
|
|
|
2654
|
$self->throw($@) if( $@ ); |
223
|
|
|
|
|
|
|
} |
224
|
4
|
|
|
|
|
12
|
$self->{'_dbm_package'} = $type; |
225
|
4
|
100
|
|
|
|
9
|
if( ! defined $USE_DBM_TYPE ) { |
226
|
2
|
|
|
|
|
6
|
$USE_DBM_TYPE = $self->{'_dbm_package'}; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} |
229
|
7
|
|
|
|
|
20
|
return $self->{'_dbm_package'}; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=head2 db |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
Title : db |
235
|
|
|
|
|
|
|
Usage : $index->db |
236
|
|
|
|
|
|
|
Function: Returns a ref to the hash which is tied to the dbm |
237
|
|
|
|
|
|
|
file. Used internally when adding and retrieving |
238
|
|
|
|
|
|
|
data from the database. |
239
|
|
|
|
|
|
|
Example : $db = $index->db(); |
240
|
|
|
|
|
|
|
$db->{ $some_key } = $data |
241
|
|
|
|
|
|
|
$data = $index->db->{ $some_key }; |
242
|
|
|
|
|
|
|
Returns : ref to HASH |
243
|
|
|
|
|
|
|
Args : NONE |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=cut |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub db { |
248
|
126
|
|
|
126
|
1
|
939
|
return $_[0]->{'_DB'}; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 get_stream |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Title : get_stream |
255
|
|
|
|
|
|
|
Usage : $stream = $index->get_stream( $id ); |
256
|
|
|
|
|
|
|
Function: Returns a file handle with the file pointer |
257
|
|
|
|
|
|
|
at the approprite place |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
This provides for a way to get the actual |
260
|
|
|
|
|
|
|
file contents and not an object |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
WARNING: you must parse the record deliminter |
263
|
|
|
|
|
|
|
*yourself*. Abstract won't do this for you |
264
|
|
|
|
|
|
|
So this code |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
$fh = $index->get_stream($myid); |
267
|
|
|
|
|
|
|
while( <$fh> ) { |
268
|
|
|
|
|
|
|
# do something |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
will parse the entire file if you don't put in |
271
|
|
|
|
|
|
|
a last statement in, like |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
while( <$fh> ) { |
274
|
|
|
|
|
|
|
/^\/\// && last; # end of record |
275
|
|
|
|
|
|
|
# do something |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
Returns : A filehandle object |
279
|
|
|
|
|
|
|
Args : string represents the accession number |
280
|
|
|
|
|
|
|
Notes : This method should not be used without forethought |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=cut |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
#' |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub get_stream { |
287
|
16
|
|
|
16
|
1
|
976
|
my ($self,$id) = @_; |
288
|
|
|
|
|
|
|
|
289
|
16
|
|
|
|
|
28
|
my ($desc,$acc,$out); |
290
|
16
|
|
|
|
|
56
|
my $db = $self->db(); |
291
|
|
|
|
|
|
|
|
292
|
16
|
50
|
|
|
|
211
|
if (my $rec = $db->{ $id }) { |
293
|
16
|
|
|
|
|
24
|
my( @record ); |
294
|
|
|
|
|
|
|
|
295
|
16
|
|
|
|
|
62
|
my ($file, $begin, $end) = $self->unpack_record( $rec ); |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# Get the (possibly cached) filehandle |
298
|
16
|
|
|
|
|
65
|
my $fh = $self->_file_handle( $file ); |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
# move to start |
301
|
16
|
|
|
|
|
113
|
seek($fh, $begin, 0); |
302
|
|
|
|
|
|
|
|
303
|
16
|
|
|
|
|
56
|
return $fh; |
304
|
|
|
|
|
|
|
} else { |
305
|
0
|
|
|
|
|
0
|
$self->throw("Unable to find a record for $id in the flat file index"); |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head2 cachesize |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
Usage : $index->cachesize(1000000) |
313
|
|
|
|
|
|
|
Function: Sets the dbm file cache size for the index. |
314
|
|
|
|
|
|
|
Needs to be set before the DBM file gets opened. |
315
|
|
|
|
|
|
|
Example : $index->cachesize(1000000) |
316
|
|
|
|
|
|
|
Returns : size of the curent cache |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=cut |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub cachesize { |
321
|
0
|
|
|
0
|
1
|
0
|
my( $self, $size ) = @_; |
322
|
|
|
|
|
|
|
|
323
|
0
|
0
|
|
|
|
0
|
if(defined $size){ |
324
|
0
|
|
|
|
|
0
|
$self->{'_cachesize'} = $size; |
325
|
|
|
|
|
|
|
} |
326
|
0
|
|
|
|
|
0
|
return ( $self->{'_cachesize'} ); |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
=head2 ffactor |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
Usage : $index->ffactor(1000000) |
333
|
|
|
|
|
|
|
Function: Sets the dbm file fill factor. |
334
|
|
|
|
|
|
|
Needs to be set before the DBM file gets opened. |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Example : $index->ffactor(1000000) |
337
|
|
|
|
|
|
|
Returns : size of the curent cache |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=cut |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub ffactor { |
342
|
0
|
|
|
0
|
1
|
0
|
my( $self, $size ) = @_; |
343
|
|
|
|
|
|
|
|
344
|
0
|
0
|
|
|
|
0
|
if(defined $size){ |
345
|
0
|
|
|
|
|
0
|
$self->{'_ffactor'} = $size; |
346
|
|
|
|
|
|
|
} |
347
|
0
|
|
|
|
|
0
|
return ( $self->{'_ffactor'} ); |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=head2 open_dbm |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Usage : $index->open_dbm() |
354
|
|
|
|
|
|
|
Function: Opens the dbm file associated with the index |
355
|
|
|
|
|
|
|
object. Write access is only given if explicitly |
356
|
|
|
|
|
|
|
asked for by calling new(-write => 1) or having set |
357
|
|
|
|
|
|
|
the write_flag(1) on the index object. The type of |
358
|
|
|
|
|
|
|
dbm file opened is that returned by dbm_package(). |
359
|
|
|
|
|
|
|
The name of the file to be is opened is obtained by |
360
|
|
|
|
|
|
|
calling the filename() method. |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Example : $index->_open_dbm() |
363
|
|
|
|
|
|
|
Returns : 1 on success |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=cut |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub open_dbm { |
368
|
4
|
|
|
4
|
1
|
8
|
my( $self ) = @_; |
369
|
|
|
|
|
|
|
|
370
|
4
|
50
|
|
|
|
10
|
my $filename = $self->filename() |
371
|
|
|
|
|
|
|
or $self->throw("filename() not set"); |
372
|
|
|
|
|
|
|
|
373
|
4
|
|
|
|
|
24
|
my $db = $self->db(); |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
# Close the dbm file if already open (maybe we're getting |
376
|
|
|
|
|
|
|
# or dropping write access |
377
|
4
|
50
|
|
|
|
14
|
if (ref($db) ne 'HASH') { |
378
|
0
|
|
|
|
|
0
|
untie($db); |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
# What kind of DBM file are we going to open? |
382
|
4
|
|
|
|
|
12
|
my $dbm_type = $self->dbm_package; |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
# Choose mode for opening dbm file (read/write+create or read-only). |
385
|
4
|
50
|
|
|
|
13
|
my $mode_flags = $self->write_flag ? O_RDWR|O_CREAT : O_RDONLY; |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# Open the dbm file |
388
|
4
|
50
|
|
|
|
14
|
if ($dbm_type eq 'DB_File') { |
389
|
0
|
|
|
|
|
0
|
my $hash_inf = DB_File::HASHINFO->new(); |
390
|
0
|
|
|
|
|
0
|
my $cache = $self->cachesize(); |
391
|
0
|
|
|
|
|
0
|
my $ffactor = $self->ffactor(); |
392
|
0
|
0
|
|
|
|
0
|
if ($cache){ |
393
|
0
|
|
|
|
|
0
|
$hash_inf->{'cachesize'} = $cache; |
394
|
|
|
|
|
|
|
} |
395
|
0
|
0
|
|
|
|
0
|
if ($ffactor){ |
396
|
0
|
|
|
|
|
0
|
$hash_inf->{'ffactor'} = $ffactor; |
397
|
|
|
|
|
|
|
} |
398
|
0
|
0
|
|
|
|
0
|
tie( %$db, $dbm_type, $filename, $mode_flags, 0644, $hash_inf ) |
399
|
|
|
|
|
|
|
or $self->throw("Can't open '$dbm_type' dbm file '$filename' : $!"); |
400
|
|
|
|
|
|
|
} else { |
401
|
4
|
50
|
|
|
|
513
|
tie( %$db, $dbm_type, $filename, $mode_flags, 0644 ) |
402
|
|
|
|
|
|
|
or $self->throw("Can't open '$dbm_type' dbm file '$filename' : $!"); |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# The following methods access data in the dbm file: |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# Now, if we're a Bio::Index::Abstract caterpillar, then we |
408
|
|
|
|
|
|
|
# transform ourselves into a Bio::Index:: butterfly! |
409
|
4
|
50
|
|
|
|
22
|
if( ref($self) eq "Bio::Index::Abstract" ) { |
410
|
0
|
|
|
|
|
0
|
my $pkg = $self->_code_base(); |
411
|
0
|
|
|
|
|
0
|
bless $self, $pkg; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# Check or set this is the right kind and version of index |
415
|
4
|
|
|
|
|
25
|
$self->_type_and_version(); |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
# Check files haven't changed size since they were indexed |
418
|
4
|
|
|
|
|
36
|
$self->_check_file_sizes(); |
419
|
|
|
|
|
|
|
|
420
|
4
|
|
|
|
|
4
|
return 1; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=head2 _version |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
Title : _version |
426
|
|
|
|
|
|
|
Usage : $type = $index->_version() |
427
|
|
|
|
|
|
|
Function: Returns a string which identifes the version of an |
428
|
|
|
|
|
|
|
index module. Used to permanently identify an index |
429
|
|
|
|
|
|
|
file as having been created by a particular version |
430
|
|
|
|
|
|
|
of the index module. Must be provided by the sub class |
431
|
|
|
|
|
|
|
Example : |
432
|
|
|
|
|
|
|
Returns : |
433
|
|
|
|
|
|
|
Args : none |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=cut |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
sub _version { |
438
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
439
|
0
|
|
|
|
|
0
|
$self->throw("In Bio::Index::Abstract, no _version method in sub class"); |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=head2 _code_base |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
Title : _code_base |
445
|
|
|
|
|
|
|
Usage : $code = $db->_code_base(); |
446
|
|
|
|
|
|
|
Function: |
447
|
|
|
|
|
|
|
Example : |
448
|
|
|
|
|
|
|
Returns : Code package to be used with this |
449
|
|
|
|
|
|
|
Args : |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=cut |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub _code_base { |
455
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
456
|
0
|
|
|
|
|
0
|
my $code_key = '__TYPE_AND_VERSION'; |
457
|
0
|
|
|
|
|
0
|
my $record; |
458
|
|
|
|
|
|
|
|
459
|
0
|
|
|
|
|
0
|
$record = $self->db->{$code_key}; |
460
|
|
|
|
|
|
|
|
461
|
0
|
|
|
|
|
0
|
my($code,$version) = $self->unpack_record($record); |
462
|
0
|
0
|
|
|
|
0
|
if( wantarray ) { |
463
|
0
|
|
|
|
|
0
|
return ($code,$version); |
464
|
|
|
|
|
|
|
} else { |
465
|
0
|
|
|
|
|
0
|
return $code; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=head2 _type_and_version |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
Title : _type_and_version |
473
|
|
|
|
|
|
|
Usage : Called by _initalize |
474
|
|
|
|
|
|
|
Function: Checks that the index opened is made by the same index |
475
|
|
|
|
|
|
|
module and version of that module that made it. If the |
476
|
|
|
|
|
|
|
index is empty, then it adds the information to the |
477
|
|
|
|
|
|
|
database. |
478
|
|
|
|
|
|
|
Example : |
479
|
|
|
|
|
|
|
Returns : 1 or exception |
480
|
|
|
|
|
|
|
Args : none |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=cut |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
sub _type_and_version { |
485
|
4
|
|
|
4
|
|
9
|
my $self = shift; |
486
|
4
|
|
|
|
|
7
|
my $key = '__TYPE_AND_VERSION'; |
487
|
4
|
|
|
|
|
17
|
my $version = $self->_version(); |
488
|
4
|
|
|
|
|
10
|
my $type = ref $self; |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# Run check or add type and version key if missing |
491
|
4
|
50
|
|
|
|
10
|
if (my $rec = $self->db->{ $key }) { |
492
|
0
|
|
|
|
|
0
|
my( $db_type, $db_version ) = $self->unpack_record($rec); |
493
|
0
|
0
|
|
|
|
0
|
$self->throw("This index file is from version [$db_version] - You need to rebuild it to use module version [$version]") |
494
|
|
|
|
|
|
|
unless $db_version == $version; |
495
|
0
|
0
|
|
|
|
0
|
$self->throw("This index file is type [$db_type] - Can't access it with module for [$type]") |
496
|
|
|
|
|
|
|
unless $db_type eq $type; |
497
|
|
|
|
|
|
|
} else { |
498
|
4
|
50
|
|
|
|
21
|
$self->add_record( $key, $type, $version ) |
499
|
|
|
|
|
|
|
or $self->throw("Can't add Type and Version record"); |
500
|
|
|
|
|
|
|
} |
501
|
4
|
|
|
|
|
11
|
return 1; |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=head2 _check_file_sizes |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
Title : _check_file_sizes |
508
|
|
|
|
|
|
|
Usage : $index->_check_file_sizes() |
509
|
|
|
|
|
|
|
Function: Verifies that the files listed in the database |
510
|
|
|
|
|
|
|
are the same size as when the database was built, |
511
|
|
|
|
|
|
|
or throws an exception. Called by the new() |
512
|
|
|
|
|
|
|
function. |
513
|
|
|
|
|
|
|
Example : |
514
|
|
|
|
|
|
|
Returns : 1 or exception |
515
|
|
|
|
|
|
|
Args : |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub _check_file_sizes { |
520
|
4
|
|
|
4
|
|
9
|
my $self = shift; |
521
|
4
|
|
50
|
|
|
15
|
my $num = $self->_file_count() || 0; |
522
|
|
|
|
|
|
|
|
523
|
4
|
|
|
|
|
18
|
for (my $i = 0; $i < $num; $i++) { |
524
|
0
|
|
|
|
|
0
|
my( $file, $stored_size ) = $self->unpack_record( $self->db->{"__FILE_$i"} ); |
525
|
0
|
|
|
|
|
0
|
my $size = -s $file; |
526
|
0
|
0
|
|
|
|
0
|
unless ($size == $stored_size) { |
527
|
0
|
|
|
|
|
0
|
$self->throw("file $i [ $file ] has changed size $stored_size -> $size. This probably means you need to rebuild the index."); |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
} |
530
|
4
|
|
|
|
|
8
|
return 1; |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=head2 make_index |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Title : make_index |
537
|
|
|
|
|
|
|
Usage : $index->make_index( FILE_LIST ) |
538
|
|
|
|
|
|
|
Function: Takes a list of file names, checks that they are |
539
|
|
|
|
|
|
|
all fully qualified, and then calls _filename() on |
540
|
|
|
|
|
|
|
each. It supplies _filename() with the name of the |
541
|
|
|
|
|
|
|
file, and an integer which is stored with each record |
542
|
|
|
|
|
|
|
created by _filename(). Can be called multiple times, |
543
|
|
|
|
|
|
|
and can be used to add to an existing index file. |
544
|
|
|
|
|
|
|
Example : $index->make_index( '/home/seqs1', '/home/seqs2', '/nfs/pub/big_db' ); |
545
|
|
|
|
|
|
|
Returns : Number of files indexed |
546
|
|
|
|
|
|
|
Args : LIST OF FILES |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=cut |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
sub make_index { |
551
|
4
|
|
|
4
|
1
|
12
|
my($self, @files) = @_; |
552
|
4
|
|
|
|
|
9
|
my $count = 0; |
553
|
4
|
|
|
|
|
4
|
my $recs = 0; |
554
|
|
|
|
|
|
|
# blow up if write flag is not set. EB fix |
555
|
|
|
|
|
|
|
|
556
|
4
|
50
|
|
|
|
12
|
if( !defined $self->write_flag ) { |
557
|
0
|
|
|
|
|
0
|
$self->throw("Attempting to make an index on a read-only database. What about a WRITE flag on opening the index?"); |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
# We're really fussy/lazy, expecting all file names to be fully qualified |
561
|
4
|
50
|
|
|
|
12
|
$self->throw("No files to index provided") unless @files; |
562
|
4
|
|
|
|
|
16
|
for(my $i=0;$i
|
563
|
4
|
50
|
33
|
|
|
46
|
if( $Bio::Root::IO::FILESPECLOADED && File::Spec->can('rel2abs') ) { |
564
|
4
|
50
|
33
|
|
|
45
|
if( ! File::Spec->file_name_is_absolute($files[$i]) |
565
|
|
|
|
|
|
|
&& $self->pathtype() ne 'relative') { |
566
|
4
|
|
|
|
|
99
|
$files[$i] = File::Spec->rel2abs($files[$i]); |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
} else { |
569
|
0
|
0
|
|
|
|
0
|
if( $^O =~ /MSWin/i ) { |
570
|
0
|
0
|
|
|
|
0
|
($files[$i] =~ m|^[A-Za-z]:/|) || |
571
|
|
|
|
|
|
|
$self->throw("Not an absolute file path '$files[$i]'"); |
572
|
|
|
|
|
|
|
} else { |
573
|
0
|
0
|
|
|
|
0
|
($files[$i] =~ m|^/|) || |
574
|
|
|
|
|
|
|
$self->throw("Not an absolute file path '$files[$i]'"); |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
} |
577
|
4
|
50
|
|
|
|
97
|
$self->throw("File does not exist '$files[$i]'") unless -e $files[$i]; |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
# Add each file to the index |
581
|
|
|
|
|
|
|
FILE : |
582
|
4
|
|
|
|
|
9
|
foreach my $file (@files) { |
583
|
|
|
|
|
|
|
|
584
|
4
|
|
|
|
|
6
|
my $i; # index for this file |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
# Get new index for this file and increment file count |
587
|
4
|
50
|
|
|
|
11
|
if ( defined(my $count = $self->_file_count) ) { |
588
|
0
|
|
|
|
|
0
|
$i = $count; |
589
|
|
|
|
|
|
|
} else { |
590
|
4
|
|
|
|
|
10
|
$i = 0; $self->_file_count(0); |
|
4
|
|
|
|
|
8
|
|
591
|
|
|
|
|
|
|
} |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
# see whether this file has been already indexed |
594
|
4
|
|
|
|
|
12
|
my ($record,$number,$size); |
595
|
|
|
|
|
|
|
|
596
|
4
|
50
|
|
|
|
9
|
if( ($record = $self->db->{"__FILENAME_$file"}) ) { |
597
|
0
|
|
|
|
|
0
|
($number,$size) = $self->unpack_record($record); |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# if it is the same size - fine. Otherwise die |
600
|
0
|
0
|
|
|
|
0
|
if( -s $file == $size ) { |
601
|
0
|
|
|
|
|
0
|
$self->warn("File $file already indexed. Skipping..."); |
602
|
0
|
|
|
|
|
0
|
next FILE; |
603
|
|
|
|
|
|
|
} else { |
604
|
0
|
|
|
|
|
0
|
$self->throw("In index, $file has changed size ($size). Indicates that the index is out of date"); |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
# index this file |
609
|
4
|
|
|
|
|
19
|
$self->debug("Indexing file $file\n"); |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
# this is supplied by the subclass and does the serious work |
612
|
4
|
|
|
|
|
22
|
$recs += $self->_index_file( $file, $i ); # Specific method for each type of index |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
# Save file name and size for this index |
615
|
4
|
50
|
|
|
|
69
|
$self->add_record("__FILE_$i", $file, -s $file) |
616
|
|
|
|
|
|
|
or $self->throw("Can't add data to file: $file"); |
617
|
4
|
50
|
|
|
|
44
|
$self->add_record("__FILENAME_$file", $i, -s $file) |
618
|
|
|
|
|
|
|
or $self->throw("Can't add data to file: $file"); |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
# increment file lines |
621
|
4
|
|
|
|
|
9
|
$i++; $self->_file_count($i); |
|
4
|
|
|
|
|
11
|
|
622
|
4
|
|
|
|
|
9
|
my $temp; |
623
|
4
|
|
|
|
|
8
|
$temp = $self->_file_count(); |
624
|
|
|
|
|
|
|
} |
625
|
4
|
|
|
|
|
11
|
return ($count, $recs); |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=head2 pathtype |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
Title : pathtype |
631
|
|
|
|
|
|
|
Usage : $index->pathtype($pathtype) |
632
|
|
|
|
|
|
|
Function: Set the type of the file path |
633
|
|
|
|
|
|
|
Only two values are supported, 'relative' or 'absolute'. |
634
|
|
|
|
|
|
|
If the user does not give any value, it is set to |
635
|
|
|
|
|
|
|
absolute by default. Thus it mimics the default |
636
|
|
|
|
|
|
|
behavior of Bio::Index::Abstract module. |
637
|
|
|
|
|
|
|
Example : my $index = Bio::Index::Abstract->(-pathtype => 'relative', |
638
|
|
|
|
|
|
|
-file => $file.inx, |
639
|
|
|
|
|
|
|
); |
640
|
|
|
|
|
|
|
or |
641
|
|
|
|
|
|
|
$index->pathtype('relative'); |
642
|
|
|
|
|
|
|
Returns : Type of the path. |
643
|
|
|
|
|
|
|
Args : String (relative|absolute) |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=cut |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
sub pathtype { |
648
|
|
|
|
|
|
|
|
649
|
8
|
|
|
8
|
1
|
16
|
my($self, $type) = @_; |
650
|
|
|
|
|
|
|
|
651
|
8
|
100
|
|
|
|
17
|
if(defined($type)){ |
652
|
4
|
50
|
33
|
|
|
26
|
if($type ne 'absolute' && $type ne 'relative'){ |
653
|
0
|
|
|
|
|
0
|
$self->throw("Type of path can only be 'relative' or 'absolute', not [$type]."); |
654
|
|
|
|
|
|
|
} |
655
|
4
|
|
|
|
|
11
|
$self->{'_filepathtype'} = $type; |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
8
|
|
|
|
|
23
|
return $self->{'_filepathtype'}; |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=head2 _filename |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
Title : _filename |
665
|
|
|
|
|
|
|
Usage : $index->_filename( FILE INT ) |
666
|
|
|
|
|
|
|
Function: Indexes the file |
667
|
|
|
|
|
|
|
Example : |
668
|
|
|
|
|
|
|
Returns : |
669
|
|
|
|
|
|
|
Args : |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
=cut |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
sub _index_file { |
674
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
675
|
|
|
|
|
|
|
|
676
|
0
|
|
|
|
|
0
|
my $pkg = ref($self); |
677
|
0
|
|
|
|
|
0
|
$self->throw("Error: '$pkg' does not provide the _index_file() method"); |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=head2 _file_handle |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
Title : _file_handle |
685
|
|
|
|
|
|
|
Usage : $fh = $index->_file_handle( INT ) |
686
|
|
|
|
|
|
|
Function: Returns an open filehandle for the file |
687
|
|
|
|
|
|
|
index INT. On opening a new filehandle it |
688
|
|
|
|
|
|
|
caches it in the @{$index->_filehandle} array. |
689
|
|
|
|
|
|
|
If the requested filehandle is already open, |
690
|
|
|
|
|
|
|
it simply returns it from the array. |
691
|
|
|
|
|
|
|
Example : $first_file_indexed = $index->_file_handle( 0 ); |
692
|
|
|
|
|
|
|
Returns : ref to a filehandle |
693
|
|
|
|
|
|
|
Args : INT |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
=cut |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
sub _file_handle { |
698
|
16
|
|
|
16
|
|
33
|
my( $self, $i ) = @_; |
699
|
|
|
|
|
|
|
|
700
|
16
|
100
|
|
|
|
75
|
unless ($self->{'_filehandle'}[$i]) { |
701
|
4
|
50
|
|
|
|
13
|
my @rec = $self->unpack_record($self->db->{"__FILE_$i"}) |
702
|
|
|
|
|
|
|
or $self->throw("Can't get filename for index : $i"); |
703
|
4
|
|
|
|
|
8
|
my $file = $rec[0]; |
704
|
4
|
50
|
|
|
|
135
|
open my $fh, '<', $file or $self->throw("Could not read file '$file': $!"); |
705
|
4
|
|
|
|
|
16
|
$self->{'_filehandle'}[$i] = $fh; # Cache filehandle |
706
|
|
|
|
|
|
|
} |
707
|
16
|
|
|
|
|
32
|
return $self->{'_filehandle'}[$i]; |
708
|
|
|
|
|
|
|
} |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=head2 _file_count |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
Title : _file_count |
714
|
|
|
|
|
|
|
Usage : $index->_file_count( INT ) |
715
|
|
|
|
|
|
|
Function: Used by the index building sub in a sub class to |
716
|
|
|
|
|
|
|
track the number of files indexed. Sets or gets |
717
|
|
|
|
|
|
|
the number of files indexed when called with or |
718
|
|
|
|
|
|
|
without an argument. |
719
|
|
|
|
|
|
|
Example : |
720
|
|
|
|
|
|
|
Returns : INT |
721
|
|
|
|
|
|
|
Args : INT |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
=cut |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
sub _file_count { |
726
|
20
|
|
|
20
|
|
26
|
my $self = shift; |
727
|
20
|
100
|
|
|
|
40
|
if (@_) { |
728
|
8
|
|
|
|
|
14
|
$self->db->{'__FILE_COUNT'} = shift; |
729
|
|
|
|
|
|
|
} |
730
|
20
|
|
|
|
|
37
|
return $self->db->{'__FILE_COUNT'}; |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=head2 add_record |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
Title : add_record |
737
|
|
|
|
|
|
|
Usage : $index->add_record( $id, @stuff ); |
738
|
|
|
|
|
|
|
Function: Calls pack_record on @stuff, and adds the result |
739
|
|
|
|
|
|
|
of pack_record to the index database under key $id. |
740
|
|
|
|
|
|
|
If $id is a reference to an array, then a new entry |
741
|
|
|
|
|
|
|
is added under a key corresponding to each element |
742
|
|
|
|
|
|
|
of the array. |
743
|
|
|
|
|
|
|
Example : $index->add_record( $id, $fileNumber, $begin, $end ) |
744
|
|
|
|
|
|
|
Returns : TRUE on success or FALSE on failure |
745
|
|
|
|
|
|
|
Args : ID LIST |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
=cut |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
sub add_record { |
750
|
33
|
|
|
33
|
1
|
71
|
my( $self, $id, @rec ) = @_; |
751
|
33
|
|
|
|
|
117
|
$self->debug( "Adding key $id\n"); |
752
|
33
|
50
|
|
|
|
59
|
if( exists $self->db->{$id} ) { |
753
|
0
|
|
|
|
|
0
|
$self->warn("overwriting a current value stored for $id\n"); |
754
|
|
|
|
|
|
|
} |
755
|
33
|
|
|
|
|
83
|
$self->db->{$id} = $self->pack_record( @rec ); |
756
|
33
|
|
|
|
|
177
|
return 1; |
757
|
|
|
|
|
|
|
} |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
=head2 pack_record |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
Title : pack_record |
763
|
|
|
|
|
|
|
Usage : $packed_string = $index->pack_record( LIST ) |
764
|
|
|
|
|
|
|
Function: Packs an array of scalars into a single string |
765
|
|
|
|
|
|
|
joined by ASCII 034 (which is unlikely to be used |
766
|
|
|
|
|
|
|
in any of the strings), and returns it. |
767
|
|
|
|
|
|
|
Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end ) |
768
|
|
|
|
|
|
|
Returns : STRING or undef |
769
|
|
|
|
|
|
|
Args : LIST |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=cut |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
sub pack_record { |
774
|
33
|
|
|
33
|
1
|
60
|
my( $self, @args ) = @_; |
775
|
|
|
|
|
|
|
# Silence undefined warnings |
776
|
|
|
|
|
|
|
@args = map { |
777
|
33
|
50
|
|
|
|
50
|
$_ = (defined $_) ? $_ : ''; |
|
66
|
|
|
|
|
111
|
|
778
|
66
|
|
|
|
|
131
|
$_ ; |
779
|
|
|
|
|
|
|
} @args; |
780
|
33
|
|
|
|
|
113
|
return join "\034", @args; |
781
|
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
=head2 unpack_record |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
Title : unpack_record |
786
|
|
|
|
|
|
|
Usage : $index->unpack_record( STRING ) |
787
|
|
|
|
|
|
|
Function: Splits the sting provided into an array, |
788
|
|
|
|
|
|
|
splitting on ASCII 034. |
789
|
|
|
|
|
|
|
Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} ) |
790
|
|
|
|
|
|
|
Returns : A 3 element ARRAY |
791
|
|
|
|
|
|
|
Args : STRING containing ASCII 034 |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
=cut |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
sub unpack_record { |
796
|
20
|
|
|
20
|
1
|
51
|
my( $self, @args ) = @_; |
797
|
20
|
|
|
|
|
100
|
return split /\034/, $args[0]; |
798
|
|
|
|
|
|
|
} |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
=head2 count_records |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
Title : count_records |
803
|
|
|
|
|
|
|
Usage : $recs = $seqdb->count_records() |
804
|
|
|
|
|
|
|
Function: return count of all recs in the index |
805
|
|
|
|
|
|
|
Example : |
806
|
|
|
|
|
|
|
Returns : a scalar |
807
|
|
|
|
|
|
|
Args : none |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=cut |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
sub count_records { |
813
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
814
|
0
|
|
|
|
|
0
|
my $db = $self->db; |
815
|
0
|
|
|
|
|
0
|
my $c = 0; |
816
|
0
|
|
|
|
|
0
|
while (my($id, $rec) = each %$db) { |
817
|
0
|
0
|
|
|
|
0
|
if( $id =~ /^__/ ) { |
818
|
|
|
|
|
|
|
# internal info |
819
|
0
|
|
|
|
|
0
|
next; |
820
|
|
|
|
|
|
|
} |
821
|
0
|
|
|
|
|
0
|
$c++; |
822
|
|
|
|
|
|
|
} |
823
|
0
|
|
|
|
|
0
|
return ($c); |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
=head2 DESTROY |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
Title : DESTROY |
830
|
|
|
|
|
|
|
Usage : Called automatically when index goes out of scope |
831
|
|
|
|
|
|
|
Function: Closes connection to database and handles to |
832
|
|
|
|
|
|
|
sequence files |
833
|
|
|
|
|
|
|
Returns : NEVER |
834
|
|
|
|
|
|
|
Args : NONE |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
=cut |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
sub DESTROY { |
840
|
8
|
|
|
8
|
|
444
|
my $self = shift; |
841
|
8
|
|
|
|
|
17
|
untie($self->{'_DB'}); |
842
|
|
|
|
|
|
|
# An additional undef was the only way to force |
843
|
|
|
|
|
|
|
# the object to drop the open filehandles for ActivePerl |
844
|
8
|
|
|
|
|
225
|
undef $self->{'_DB'}; |
845
|
|
|
|
|
|
|
} |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
1; |