| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# BioPerl module for Bio::Index::Abstract |
|
4
|
|
|
|
|
|
|
# |
|
5
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# Cared for by Ewan Birney |
|
8
|
|
|
|
|
|
|
# and James Gilbert |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Index::Abstract - Abstract interface for indexing a flat file |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
You should not be using this module directly |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 USING DB_FILE |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
To use DB_File and not SDBM for this index, pass the value: |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
-dbm_package => 'DB_File' |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
to new (see below). |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
This object provides the basic mechanism to associate positions |
|
33
|
|
|
|
|
|
|
in files with names. The position and filenames are stored in DBM |
|
34
|
|
|
|
|
|
|
which can then be accessed later on. It is the equivalent of flat |
|
35
|
|
|
|
|
|
|
file indexing (eg, SRS or efetch). |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
This object is the guts to the mechanism, which will be used by the |
|
38
|
|
|
|
|
|
|
specific objects inheriting from it. |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
45
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
|
46
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
49
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 Support |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
I |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
58
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
59
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
60
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
65
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
|
66
|
|
|
|
|
|
|
web: |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head1 AUTHOR - Ewan Birney, James Gilbert |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Email - birney@sanger.ac.uk, jgrg@sanger.ac.uk |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head1 APPENDIX |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. Internal |
|
77
|
|
|
|
|
|
|
methods are usually preceded with an "_" (underscore). |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Let the code begin... |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
package Bio::Index::Abstract; |
|
85
|
|
|
|
|
|
|
|
|
86
|
2
|
|
|
2
|
|
9
|
use strict; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
49
|
|
|
87
|
2
|
|
|
2
|
|
4
|
use Fcntl qw( O_RDWR O_CREAT O_RDONLY ); |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
82
|
|
|
88
|
2
|
|
|
|
|
72
|
use vars qw( $TYPE_AND_VERSION_KEY |
|
89
|
2
|
|
|
2
|
|
6
|
$USE_DBM_TYPE $DB_HASH ); |
|
|
2
|
|
|
|
|
2
|
|
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
|
|
92
|
2
|
|
|
2
|
|
5
|
use Bio::Root::IO; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
41
|
|
|
93
|
2
|
|
|
2
|
|
6
|
use Symbol; |
|
|
2
|
|
|
|
|
1
|
|
|
|
2
|
|
|
|
|
92
|
|
|
94
|
|
|
|
|
|
|
|
|
95
|
2
|
|
|
2
|
|
7
|
use base qw(Bio::Root::Root); |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
134
|
|
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Generate accessor methods for simple object fields |
|
98
|
|
|
|
|
|
|
BEGIN { |
|
99
|
2
|
|
|
2
|
|
3
|
foreach my $func (qw(filename write_flag)) { |
|
100
|
2
|
|
|
2
|
|
7
|
no strict 'refs'; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
139
|
|
|
101
|
4
|
|
|
|
|
6
|
my $field = "_$func"; |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
*$func = sub { |
|
104
|
20
|
|
|
20
|
|
23
|
my( $self, $value ) = @_; |
|
105
|
|
|
|
|
|
|
|
|
106
|
20
|
100
|
|
|
|
35
|
if (defined $value) { |
|
107
|
8
|
|
|
|
|
14
|
$self->{$field} = $value; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
20
|
|
|
|
|
43
|
return $self->{$field}; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
4
|
|
|
|
|
3233
|
} |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head2 new |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Usage : $index = Bio::Index::Abstract->new( |
|
117
|
|
|
|
|
|
|
-filename => $dbm_file, |
|
118
|
|
|
|
|
|
|
-write_flag => 0, |
|
119
|
|
|
|
|
|
|
-dbm_package => 'DB_File', |
|
120
|
|
|
|
|
|
|
-verbose => 0); |
|
121
|
|
|
|
|
|
|
Function: Returns a new index object. If filename is |
|
122
|
|
|
|
|
|
|
specified, then open_dbm() is immediately called. |
|
123
|
|
|
|
|
|
|
Bio::Index::Abstract->new() will usually be called |
|
124
|
|
|
|
|
|
|
directly only when opening an existing index. |
|
125
|
|
|
|
|
|
|
Returns : A new index object |
|
126
|
|
|
|
|
|
|
Args : -filename The name of the dbm index file. |
|
127
|
|
|
|
|
|
|
-write_flag TRUE if write access to the dbm file is |
|
128
|
|
|
|
|
|
|
needed. |
|
129
|
|
|
|
|
|
|
-dbm_package The Perl dbm module to use for the |
|
130
|
|
|
|
|
|
|
index. |
|
131
|
|
|
|
|
|
|
-verbose Print debugging output to STDERR if |
|
132
|
|
|
|
|
|
|
TRUE. |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub new { |
|
137
|
4
|
|
|
4
|
1
|
9
|
my($class, @args) = @_; |
|
138
|
4
|
|
|
|
|
22
|
my $self = $class->SUPER::new(@args); |
|
139
|
4
|
|
|
|
|
25
|
my( $filename, $write_flag, $dbm_package, $cachesize, $ffactor, $pathtype ) = |
|
140
|
|
|
|
|
|
|
$self->_rearrange([qw(FILENAME |
|
141
|
|
|
|
|
|
|
WRITE_FLAG |
|
142
|
|
|
|
|
|
|
DBM_PACKAGE |
|
143
|
|
|
|
|
|
|
CACHESIZE |
|
144
|
|
|
|
|
|
|
FFACTOR |
|
145
|
|
|
|
|
|
|
PATHTYPE |
|
146
|
|
|
|
|
|
|
)], @args); |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# Store any parameters passed |
|
149
|
4
|
50
|
|
|
|
33
|
$self->filename($filename) if $filename; |
|
150
|
4
|
50
|
|
|
|
11
|
$self->cachesize($cachesize) if $cachesize; |
|
151
|
4
|
50
|
|
|
|
9
|
$self->ffactor($ffactor) if $ffactor; |
|
152
|
4
|
50
|
|
|
|
20
|
$self->write_flag($write_flag) if $write_flag; |
|
153
|
4
|
50
|
|
|
|
17
|
$self->dbm_package($dbm_package) if $dbm_package; |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
#If user doesn't give a path, we default it to absolute |
|
156
|
4
|
50
|
|
|
|
21
|
$pathtype ? $self->pathtype($pathtype) : $self->pathtype('absolute'); |
|
157
|
|
|
|
|
|
|
|
|
158
|
4
|
|
|
|
|
7
|
$self->{'_filehandle'} = []; # Array in which to cache SeqIO objects |
|
159
|
4
|
|
|
|
|
7
|
$self->{'_DB'} = {}; # Gets tied to the DBM file |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
# Open database |
|
162
|
4
|
50
|
|
|
|
20
|
$self->open_dbm() if $filename; |
|
163
|
4
|
|
|
|
|
11
|
return $self; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=pod |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head2 filename |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
Title : filename |
|
171
|
|
|
|
|
|
|
Usage : $value = $self->filename(); |
|
172
|
|
|
|
|
|
|
$self->filename($value); |
|
173
|
|
|
|
|
|
|
Function: Gets or sets the name of the dbm index file. |
|
174
|
|
|
|
|
|
|
Returns : The current value of filename |
|
175
|
|
|
|
|
|
|
Args : Value of filename if setting, or none if |
|
176
|
|
|
|
|
|
|
getting the value. |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=head2 write_flag |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
Title : write_flag |
|
181
|
|
|
|
|
|
|
Usage : $value = $self->write_flag(); |
|
182
|
|
|
|
|
|
|
$self->write_flag($value); |
|
183
|
|
|
|
|
|
|
Function: Gets or sets the value of write_flag, which |
|
184
|
|
|
|
|
|
|
is wether the dbm file should be opened with |
|
185
|
|
|
|
|
|
|
write access. |
|
186
|
|
|
|
|
|
|
Returns : The current value of write_flag (default 0) |
|
187
|
|
|
|
|
|
|
Args : Value of write_flag if setting, or none if |
|
188
|
|
|
|
|
|
|
getting the value. |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head2 dbm_package |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Usage : $value = $self->dbm_package(); |
|
193
|
|
|
|
|
|
|
$self->dbm_package($value); |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
Function: Gets or sets the name of the Perl dbm module used. |
|
196
|
|
|
|
|
|
|
If the value is unset, then it returns the value of |
|
197
|
|
|
|
|
|
|
the package variable $USE_DBM_TYPE or if that is |
|
198
|
|
|
|
|
|
|
unset, then it chooses the best available dbm type, |
|
199
|
|
|
|
|
|
|
choosing 'DB_File' in preference to 'SDBM_File'. |
|
200
|
|
|
|
|
|
|
Bio::Abstract::Index may work with other dbm file |
|
201
|
|
|
|
|
|
|
types. |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Returns : The current value of dbm_package |
|
204
|
|
|
|
|
|
|
Args : Value of dbm_package if setting, or none if |
|
205
|
|
|
|
|
|
|
getting the value. |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=cut |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub dbm_package { |
|
210
|
7
|
|
|
7
|
1
|
10
|
my( $self, $value ) = @_; |
|
211
|
7
|
|
|
|
|
10
|
my $to_require = 0; |
|
212
|
7
|
100
|
66
|
|
|
33
|
if( $value || ! $self->{'_dbm_package'} ) { |
|
213
|
4
|
|
100
|
|
|
19
|
my $type = $value || $USE_DBM_TYPE || 'DB_File'; |
|
214
|
4
|
100
|
|
|
|
21
|
if( $type =~ /DB_File/i ) { |
|
215
|
2
|
|
|
|
|
2
|
eval { |
|
216
|
2
|
|
|
|
|
269
|
require DB_File; |
|
217
|
|
|
|
|
|
|
}; |
|
218
|
2
|
50
|
|
|
|
8
|
$type = ( $@ ) ? 'SDBM_File' : 'DB_File'; |
|
219
|
|
|
|
|
|
|
} |
|
220
|
4
|
50
|
|
|
|
14
|
if( $type ne 'DB_File' ) { |
|
221
|
4
|
|
|
|
|
6
|
eval { require "$type.pm"; }; |
|
|
4
|
|
|
|
|
679
|
|
|
222
|
4
|
50
|
|
|
|
2700
|
$self->throw($@) if( $@ ); |
|
223
|
|
|
|
|
|
|
} |
|
224
|
4
|
|
|
|
|
9
|
$self->{'_dbm_package'} = $type; |
|
225
|
4
|
100
|
|
|
|
15
|
if( ! defined $USE_DBM_TYPE ) { |
|
226
|
2
|
|
|
|
|
5
|
$USE_DBM_TYPE = $self->{'_dbm_package'}; |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
} |
|
229
|
7
|
|
|
|
|
13
|
return $self->{'_dbm_package'}; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=head2 db |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
Title : db |
|
235
|
|
|
|
|
|
|
Usage : $index->db |
|
236
|
|
|
|
|
|
|
Function: Returns a ref to the hash which is tied to the dbm |
|
237
|
|
|
|
|
|
|
file. Used internally when adding and retrieving |
|
238
|
|
|
|
|
|
|
data from the database. |
|
239
|
|
|
|
|
|
|
Example : $db = $index->db(); |
|
240
|
|
|
|
|
|
|
$db->{ $some_key } = $data |
|
241
|
|
|
|
|
|
|
$data = $index->db->{ $some_key }; |
|
242
|
|
|
|
|
|
|
Returns : ref to HASH |
|
243
|
|
|
|
|
|
|
Args : NONE |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=cut |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub db { |
|
248
|
126
|
|
|
126
|
1
|
916
|
return $_[0]->{'_DB'}; |
|
249
|
|
|
|
|
|
|
} |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 get_stream |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Title : get_stream |
|
255
|
|
|
|
|
|
|
Usage : $stream = $index->get_stream( $id ); |
|
256
|
|
|
|
|
|
|
Function: Returns a file handle with the file pointer |
|
257
|
|
|
|
|
|
|
at the approprite place |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
This provides for a way to get the actual |
|
260
|
|
|
|
|
|
|
file contents and not an object |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
WARNING: you must parse the record deliminter |
|
263
|
|
|
|
|
|
|
*yourself*. Abstract wont do this for you |
|
264
|
|
|
|
|
|
|
So this code |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
$fh = $index->get_stream($myid); |
|
267
|
|
|
|
|
|
|
while( <$fh> ) { |
|
268
|
|
|
|
|
|
|
# do something |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
will parse the entire file if you don't put in |
|
271
|
|
|
|
|
|
|
a last statement in, like |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
while( <$fh> ) { |
|
274
|
|
|
|
|
|
|
/^\/\// && last; # end of record |
|
275
|
|
|
|
|
|
|
# do something |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
Returns : A filehandle object |
|
279
|
|
|
|
|
|
|
Args : string represents the accession number |
|
280
|
|
|
|
|
|
|
Notes : This method should not be used without forethought |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=cut |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
#' |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub get_stream { |
|
287
|
16
|
|
|
16
|
1
|
793
|
my ($self,$id) = @_; |
|
288
|
|
|
|
|
|
|
|
|
289
|
16
|
|
|
|
|
26
|
my ($desc,$acc,$out); |
|
290
|
16
|
|
|
|
|
45
|
my $db = $self->db(); |
|
291
|
|
|
|
|
|
|
|
|
292
|
16
|
50
|
|
|
|
167
|
if (my $rec = $db->{ $id }) { |
|
293
|
16
|
|
|
|
|
18
|
my( @record ); |
|
294
|
|
|
|
|
|
|
|
|
295
|
16
|
|
|
|
|
58
|
my ($file, $begin, $end) = $self->unpack_record( $rec ); |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# Get the (possibly cached) filehandle |
|
298
|
16
|
|
|
|
|
59
|
my $fh = $self->_file_handle( $file ); |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
# move to start |
|
301
|
16
|
|
|
|
|
70
|
seek($fh, $begin, 0); |
|
302
|
|
|
|
|
|
|
|
|
303
|
16
|
|
|
|
|
45
|
return $fh; |
|
304
|
|
|
|
|
|
|
} else { |
|
305
|
0
|
|
|
|
|
0
|
$self->throw("Unable to find a record for $id in the flat file index"); |
|
306
|
|
|
|
|
|
|
} |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head2 cachesize |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
Usage : $index->cachesize(1000000) |
|
313
|
|
|
|
|
|
|
Function: Sets the dbm file cache size for the index. |
|
314
|
|
|
|
|
|
|
Needs to be set before the DBM file gets opened. |
|
315
|
|
|
|
|
|
|
Example : $index->cachesize(1000000) |
|
316
|
|
|
|
|
|
|
Returns : size of the curent cache |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=cut |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub cachesize { |
|
321
|
0
|
|
|
0
|
1
|
0
|
my( $self, $size ) = @_; |
|
322
|
|
|
|
|
|
|
|
|
323
|
0
|
0
|
|
|
|
0
|
if(defined $size){ |
|
324
|
0
|
|
|
|
|
0
|
$self->{'_cachesize'} = $size; |
|
325
|
|
|
|
|
|
|
} |
|
326
|
0
|
|
|
|
|
0
|
return ( $self->{'_cachesize'} ); |
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
=head2 ffactor |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
Usage : $index->ffactor(1000000) |
|
333
|
|
|
|
|
|
|
Function: Sets the dbm file fill factor. |
|
334
|
|
|
|
|
|
|
Needs to be set before the DBM file gets opened. |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Example : $index->ffactor(1000000) |
|
337
|
|
|
|
|
|
|
Returns : size of the curent cache |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=cut |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub ffactor { |
|
342
|
0
|
|
|
0
|
1
|
0
|
my( $self, $size ) = @_; |
|
343
|
|
|
|
|
|
|
|
|
344
|
0
|
0
|
|
|
|
0
|
if(defined $size){ |
|
345
|
0
|
|
|
|
|
0
|
$self->{'_ffactor'} = $size; |
|
346
|
|
|
|
|
|
|
} |
|
347
|
0
|
|
|
|
|
0
|
return ( $self->{'_ffactor'} ); |
|
348
|
|
|
|
|
|
|
} |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=head2 open_dbm |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Usage : $index->open_dbm() |
|
354
|
|
|
|
|
|
|
Function: Opens the dbm file associated with the index |
|
355
|
|
|
|
|
|
|
object. Write access is only given if explicitly |
|
356
|
|
|
|
|
|
|
asked for by calling new(-write => 1) or having set |
|
357
|
|
|
|
|
|
|
the write_flag(1) on the index object. The type of |
|
358
|
|
|
|
|
|
|
dbm file opened is that returned by dbm_package(). |
|
359
|
|
|
|
|
|
|
The name of the file to be is opened is obtained by |
|
360
|
|
|
|
|
|
|
calling the filename() method. |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Example : $index->_open_dbm() |
|
363
|
|
|
|
|
|
|
Returns : 1 on success |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=cut |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub open_dbm { |
|
368
|
4
|
|
|
4
|
1
|
5
|
my( $self ) = @_; |
|
369
|
|
|
|
|
|
|
|
|
370
|
4
|
50
|
|
|
|
10
|
my $filename = $self->filename() |
|
371
|
|
|
|
|
|
|
or $self->throw("filename() not set"); |
|
372
|
|
|
|
|
|
|
|
|
373
|
4
|
|
|
|
|
15
|
my $db = $self->db(); |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
# Close the dbm file if already open (maybe we're getting |
|
376
|
|
|
|
|
|
|
# or dropping write access |
|
377
|
4
|
50
|
|
|
|
15
|
if (ref($db) ne 'HASH') { |
|
378
|
0
|
|
|
|
|
0
|
untie($db); |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
# What kind of DBM file are we going to open? |
|
382
|
4
|
|
|
|
|
14
|
my $dbm_type = $self->dbm_package; |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
# Choose mode for opening dbm file (read/write+create or read-only). |
|
385
|
4
|
50
|
|
|
|
10
|
my $mode_flags = $self->write_flag ? O_RDWR|O_CREAT : O_RDONLY; |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# Open the dbm file |
|
388
|
4
|
50
|
|
|
|
11
|
if ($dbm_type eq 'DB_File') { |
|
389
|
0
|
|
|
|
|
0
|
my $hash_inf = DB_File::HASHINFO->new(); |
|
390
|
0
|
|
|
|
|
0
|
my $cache = $self->cachesize(); |
|
391
|
0
|
|
|
|
|
0
|
my $ffactor = $self->ffactor(); |
|
392
|
0
|
0
|
|
|
|
0
|
if ($cache){ |
|
393
|
0
|
|
|
|
|
0
|
$hash_inf->{'cachesize'} = $cache; |
|
394
|
|
|
|
|
|
|
} |
|
395
|
0
|
0
|
|
|
|
0
|
if ($ffactor){ |
|
396
|
0
|
|
|
|
|
0
|
$hash_inf->{'ffactor'} = $ffactor; |
|
397
|
|
|
|
|
|
|
} |
|
398
|
0
|
0
|
|
|
|
0
|
tie( %$db, $dbm_type, $filename, $mode_flags, 0644, $hash_inf ) |
|
399
|
|
|
|
|
|
|
or $self->throw("Can't open '$dbm_type' dbm file '$filename' : $!"); |
|
400
|
|
|
|
|
|
|
} else { |
|
401
|
4
|
50
|
|
|
|
554
|
tie( %$db, $dbm_type, $filename, $mode_flags, 0644 ) |
|
402
|
|
|
|
|
|
|
or $self->throw("Can't open '$dbm_type' dbm file '$filename' : $!"); |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# The following methods access data in the dbm file: |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# Now, if we're a Bio::Index::Abstract caterpillar, then we |
|
408
|
|
|
|
|
|
|
# transform ourselves into a Bio::Index:: butterfly! |
|
409
|
4
|
50
|
|
|
|
16
|
if( ref($self) eq "Bio::Index::Abstract" ) { |
|
410
|
0
|
|
|
|
|
0
|
my $pkg = $self->_code_base(); |
|
411
|
0
|
|
|
|
|
0
|
bless $self, $pkg; |
|
412
|
|
|
|
|
|
|
} |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# Check or set this is the right kind and version of index |
|
415
|
4
|
|
|
|
|
22
|
$self->_type_and_version(); |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
# Check files haven't changed size since they were indexed |
|
418
|
4
|
|
|
|
|
17
|
$self->_check_file_sizes(); |
|
419
|
|
|
|
|
|
|
|
|
420
|
4
|
|
|
|
|
6
|
return 1; |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=head2 _version |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
Title : _version |
|
426
|
|
|
|
|
|
|
Usage : $type = $index->_version() |
|
427
|
|
|
|
|
|
|
Function: Returns a string which identifes the version of an |
|
428
|
|
|
|
|
|
|
index module. Used to permanently identify an index |
|
429
|
|
|
|
|
|
|
file as having been created by a particular version |
|
430
|
|
|
|
|
|
|
of the index module. Must be provided by the sub class |
|
431
|
|
|
|
|
|
|
Example : |
|
432
|
|
|
|
|
|
|
Returns : |
|
433
|
|
|
|
|
|
|
Args : none |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=cut |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
sub _version { |
|
438
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
439
|
0
|
|
|
|
|
0
|
$self->throw("In Bio::Index::Abstract, no _version method in sub class"); |
|
440
|
|
|
|
|
|
|
} |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=head2 _code_base |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
Title : _code_base |
|
445
|
|
|
|
|
|
|
Usage : $code = $db->_code_base(); |
|
446
|
|
|
|
|
|
|
Function: |
|
447
|
|
|
|
|
|
|
Example : |
|
448
|
|
|
|
|
|
|
Returns : Code package to be used with this |
|
449
|
|
|
|
|
|
|
Args : |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=cut |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub _code_base { |
|
455
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
|
456
|
0
|
|
|
|
|
0
|
my $code_key = '__TYPE_AND_VERSION'; |
|
457
|
0
|
|
|
|
|
0
|
my $record; |
|
458
|
|
|
|
|
|
|
|
|
459
|
0
|
|
|
|
|
0
|
$record = $self->db->{$code_key}; |
|
460
|
|
|
|
|
|
|
|
|
461
|
0
|
|
|
|
|
0
|
my($code,$version) = $self->unpack_record($record); |
|
462
|
0
|
0
|
|
|
|
0
|
if( wantarray ) { |
|
463
|
0
|
|
|
|
|
0
|
return ($code,$version); |
|
464
|
|
|
|
|
|
|
} else { |
|
465
|
0
|
|
|
|
|
0
|
return $code; |
|
466
|
|
|
|
|
|
|
} |
|
467
|
|
|
|
|
|
|
} |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=head2 _type_and_version |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
Title : _type_and_version |
|
473
|
|
|
|
|
|
|
Usage : Called by _initalize |
|
474
|
|
|
|
|
|
|
Function: Checks that the index opened is made by the same index |
|
475
|
|
|
|
|
|
|
module and version of that module that made it. If the |
|
476
|
|
|
|
|
|
|
index is empty, then it adds the information to the |
|
477
|
|
|
|
|
|
|
database. |
|
478
|
|
|
|
|
|
|
Example : |
|
479
|
|
|
|
|
|
|
Returns : 1 or exception |
|
480
|
|
|
|
|
|
|
Args : none |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=cut |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
sub _type_and_version { |
|
485
|
4
|
|
|
4
|
|
8
|
my $self = shift; |
|
486
|
4
|
|
|
|
|
6
|
my $key = '__TYPE_AND_VERSION'; |
|
487
|
4
|
|
|
|
|
17
|
my $version = $self->_version(); |
|
488
|
4
|
|
|
|
|
8
|
my $type = ref $self; |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# Run check or add type and version key if missing |
|
491
|
4
|
50
|
|
|
|
9
|
if (my $rec = $self->db->{ $key }) { |
|
492
|
0
|
|
|
|
|
0
|
my( $db_type, $db_version ) = $self->unpack_record($rec); |
|
493
|
0
|
0
|
|
|
|
0
|
$self->throw("This index file is from version [$db_version] - You need to rebuild it to use module version [$version]") |
|
494
|
|
|
|
|
|
|
unless $db_version == $version; |
|
495
|
0
|
0
|
|
|
|
0
|
$self->throw("This index file is type [$db_type] - Can't access it with module for [$type]") |
|
496
|
|
|
|
|
|
|
unless $db_type eq $type; |
|
497
|
|
|
|
|
|
|
} else { |
|
498
|
4
|
50
|
|
|
|
19
|
$self->add_record( $key, $type, $version ) |
|
499
|
|
|
|
|
|
|
or $self->throw("Can't add Type and Version record"); |
|
500
|
|
|
|
|
|
|
} |
|
501
|
4
|
|
|
|
|
9
|
return 1; |
|
502
|
|
|
|
|
|
|
} |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=head2 _check_file_sizes |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
Title : _check_file_sizes |
|
508
|
|
|
|
|
|
|
Usage : $index->_check_file_sizes() |
|
509
|
|
|
|
|
|
|
Function: Verifies that the files listed in the database |
|
510
|
|
|
|
|
|
|
are the same size as when the database was built, |
|
511
|
|
|
|
|
|
|
or throws an exception. Called by the new() |
|
512
|
|
|
|
|
|
|
function. |
|
513
|
|
|
|
|
|
|
Example : |
|
514
|
|
|
|
|
|
|
Returns : 1 or exception |
|
515
|
|
|
|
|
|
|
Args : |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub _check_file_sizes { |
|
520
|
4
|
|
|
4
|
|
4
|
my $self = shift; |
|
521
|
4
|
|
50
|
|
|
16
|
my $num = $self->_file_count() || 0; |
|
522
|
|
|
|
|
|
|
|
|
523
|
4
|
|
|
|
|
18
|
for (my $i = 0; $i < $num; $i++) { |
|
524
|
0
|
|
|
|
|
0
|
my( $file, $stored_size ) = $self->unpack_record( $self->db->{"__FILE_$i"} ); |
|
525
|
0
|
|
|
|
|
0
|
my $size = -s $file; |
|
526
|
0
|
0
|
|
|
|
0
|
unless ($size == $stored_size) { |
|
527
|
0
|
|
|
|
|
0
|
$self->throw("file $i [ $file ] has changed size $stored_size -> $size. This probably means you need to rebuild the index."); |
|
528
|
|
|
|
|
|
|
} |
|
529
|
|
|
|
|
|
|
} |
|
530
|
4
|
|
|
|
|
5
|
return 1; |
|
531
|
|
|
|
|
|
|
} |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=head2 make_index |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Title : make_index |
|
537
|
|
|
|
|
|
|
Usage : $index->make_index( FILE_LIST ) |
|
538
|
|
|
|
|
|
|
Function: Takes a list of file names, checks that they are |
|
539
|
|
|
|
|
|
|
all fully qualified, and then calls _filename() on |
|
540
|
|
|
|
|
|
|
each. It supplies _filename() with the name of the |
|
541
|
|
|
|
|
|
|
file, and an integer which is stored with each record |
|
542
|
|
|
|
|
|
|
created by _filename(). Can be called multiple times, |
|
543
|
|
|
|
|
|
|
and can be used to add to an existing index file. |
|
544
|
|
|
|
|
|
|
Example : $index->make_index( '/home/seqs1', '/home/seqs2', '/nfs/pub/big_db' ); |
|
545
|
|
|
|
|
|
|
Returns : Number of files indexed |
|
546
|
|
|
|
|
|
|
Args : LIST OF FILES |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=cut |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
sub make_index { |
|
551
|
4
|
|
|
4
|
1
|
8
|
my($self, @files) = @_; |
|
552
|
4
|
|
|
|
|
6
|
my $count = 0; |
|
553
|
4
|
|
|
|
|
5
|
my $recs = 0; |
|
554
|
|
|
|
|
|
|
# blow up if write flag is not set. EB fix |
|
555
|
|
|
|
|
|
|
|
|
556
|
4
|
50
|
|
|
|
11
|
if( !defined $self->write_flag ) { |
|
557
|
0
|
|
|
|
|
0
|
$self->throw("Attempting to make an index on a read-only database. What about a WRITE flag on opening the index?"); |
|
558
|
|
|
|
|
|
|
} |
|
559
|
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
# We're really fussy/lazy, expecting all file names to be fully qualified |
|
561
|
4
|
50
|
|
|
|
10
|
$self->throw("No files to index provided") unless @files; |
|
562
|
4
|
|
|
|
|
15
|
for(my $i=0;$i
|
|
563
|
4
|
50
|
33
|
|
|
41
|
if( $Bio::Root::IO::FILESPECLOADED && File::Spec->can('rel2abs') ) { |
|
564
|
4
|
50
|
33
|
|
|
41
|
if( ! File::Spec->file_name_is_absolute($files[$i]) |
|
565
|
|
|
|
|
|
|
&& $self->pathtype() ne 'relative') { |
|
566
|
4
|
|
|
|
|
79
|
$files[$i] = File::Spec->rel2abs($files[$i]); |
|
567
|
|
|
|
|
|
|
} |
|
568
|
|
|
|
|
|
|
} else { |
|
569
|
0
|
0
|
|
|
|
0
|
if( $^O =~ /MSWin/i ) { |
|
570
|
0
|
0
|
|
|
|
0
|
($files[$i] =~ m|^[A-Za-z]:/|) || |
|
571
|
|
|
|
|
|
|
$self->throw("Not an absolute file path '$files[$i]'"); |
|
572
|
|
|
|
|
|
|
} else { |
|
573
|
0
|
0
|
|
|
|
0
|
($files[$i] =~ m|^/|) || |
|
574
|
|
|
|
|
|
|
$self->throw("Not an absolute file path '$files[$i]'"); |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
} |
|
577
|
4
|
50
|
|
|
|
111
|
$self->throw("File does not exist '$files[$i]'") unless -e $files[$i]; |
|
578
|
|
|
|
|
|
|
} |
|
579
|
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
# Add each file to the index |
|
581
|
|
|
|
|
|
|
FILE : |
|
582
|
4
|
|
|
|
|
10
|
foreach my $file (@files) { |
|
583
|
|
|
|
|
|
|
|
|
584
|
4
|
|
|
|
|
4
|
my $i; # index for this file |
|
585
|
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
# Get new index for this file and increment file count |
|
587
|
4
|
50
|
|
|
|
8
|
if ( defined(my $count = $self->_file_count) ) { |
|
588
|
0
|
|
|
|
|
0
|
$i = $count; |
|
589
|
|
|
|
|
|
|
} else { |
|
590
|
4
|
|
|
|
|
5
|
$i = 0; $self->_file_count(0); |
|
|
4
|
|
|
|
|
19
|
|
|
591
|
|
|
|
|
|
|
} |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
# see whether this file has been already indexed |
|
594
|
4
|
|
|
|
|
8
|
my ($record,$number,$size); |
|
595
|
|
|
|
|
|
|
|
|
596
|
4
|
50
|
|
|
|
8
|
if( ($record = $self->db->{"__FILENAME_$file"}) ) { |
|
597
|
0
|
|
|
|
|
0
|
($number,$size) = $self->unpack_record($record); |
|
598
|
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# if it is the same size - fine. Otherwise die |
|
600
|
0
|
0
|
|
|
|
0
|
if( -s $file == $size ) { |
|
601
|
0
|
|
|
|
|
0
|
$self->warn("File $file already indexed. Skipping..."); |
|
602
|
0
|
|
|
|
|
0
|
next FILE; |
|
603
|
|
|
|
|
|
|
} else { |
|
604
|
0
|
|
|
|
|
0
|
$self->throw("In index, $file has changed size ($size). Indicates that the index is out of date"); |
|
605
|
|
|
|
|
|
|
} |
|
606
|
|
|
|
|
|
|
} |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
# index this file |
|
609
|
4
|
|
|
|
|
18
|
$self->debug("Indexing file $file\n"); |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
# this is supplied by the subclass and does the serious work |
|
612
|
4
|
|
|
|
|
23
|
$recs += $self->_index_file( $file, $i ); # Specific method for each type of index |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
# Save file name and size for this index |
|
615
|
4
|
50
|
|
|
|
98
|
$self->add_record("__FILE_$i", $file, -s $file) |
|
616
|
|
|
|
|
|
|
or $self->throw("Can't add data to file: $file"); |
|
617
|
4
|
50
|
|
|
|
59
|
$self->add_record("__FILENAME_$file", $i, -s $file) |
|
618
|
|
|
|
|
|
|
or $self->throw("Can't add data to file: $file"); |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
# increment file lines |
|
621
|
4
|
|
|
|
|
6
|
$i++; $self->_file_count($i); |
|
|
4
|
|
|
|
|
10
|
|
|
622
|
4
|
|
|
|
|
7
|
my $temp; |
|
623
|
4
|
|
|
|
|
9
|
$temp = $self->_file_count(); |
|
624
|
|
|
|
|
|
|
} |
|
625
|
4
|
|
|
|
|
11
|
return ($count, $recs); |
|
626
|
|
|
|
|
|
|
} |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=head2 pathtype |
|
629
|
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
Title : pathtype |
|
631
|
|
|
|
|
|
|
Usage : $index->pathtype($pathtype) |
|
632
|
|
|
|
|
|
|
Function: Set the type of the file path |
|
633
|
|
|
|
|
|
|
Only two values are supported, 'relative' or 'absolute'. |
|
634
|
|
|
|
|
|
|
If the user does not give any value, it is set to |
|
635
|
|
|
|
|
|
|
absolute by default. Thus it mimics the default |
|
636
|
|
|
|
|
|
|
behavior of Bio::Index::Abstract module. |
|
637
|
|
|
|
|
|
|
Example : my $index = Bio::Index::Abstract->(-pathtype => 'relative', |
|
638
|
|
|
|
|
|
|
-file => $file.inx, |
|
639
|
|
|
|
|
|
|
); |
|
640
|
|
|
|
|
|
|
or |
|
641
|
|
|
|
|
|
|
$index->pathtype('relative'); |
|
642
|
|
|
|
|
|
|
Returns : Type of the path. |
|
643
|
|
|
|
|
|
|
Args : String (relative|absolute) |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=cut |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
sub pathtype { |
|
648
|
|
|
|
|
|
|
|
|
649
|
8
|
|
|
8
|
1
|
10
|
my($self, $type) = @_; |
|
650
|
|
|
|
|
|
|
|
|
651
|
8
|
100
|
|
|
|
17
|
if(defined($type)){ |
|
652
|
4
|
50
|
33
|
|
|
13
|
if($type ne 'absolute' && $type ne 'relative'){ |
|
653
|
0
|
|
|
|
|
0
|
$self->throw("Type of path can only be 'relative' or 'absolute', not [$type]."); |
|
654
|
|
|
|
|
|
|
} |
|
655
|
4
|
|
|
|
|
8
|
$self->{'_filepathtype'} = $type; |
|
656
|
|
|
|
|
|
|
} |
|
657
|
|
|
|
|
|
|
|
|
658
|
8
|
|
|
|
|
22
|
return $self->{'_filepathtype'}; |
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=head2 _filename |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
Title : _filename |
|
665
|
|
|
|
|
|
|
Usage : $index->_filename( FILE INT ) |
|
666
|
|
|
|
|
|
|
Function: Indexes the file |
|
667
|
|
|
|
|
|
|
Example : |
|
668
|
|
|
|
|
|
|
Returns : |
|
669
|
|
|
|
|
|
|
Args : |
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
=cut |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
sub _index_file { |
|
674
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
675
|
|
|
|
|
|
|
|
|
676
|
0
|
|
|
|
|
0
|
my $pkg = ref($self); |
|
677
|
0
|
|
|
|
|
0
|
$self->throw("Error: '$pkg' does not provide the _index_file() method"); |
|
678
|
|
|
|
|
|
|
} |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=head2 _file_handle |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
Title : _file_handle |
|
685
|
|
|
|
|
|
|
Usage : $fh = $index->_file_handle( INT ) |
|
686
|
|
|
|
|
|
|
Function: Returns an open filehandle for the file |
|
687
|
|
|
|
|
|
|
index INT. On opening a new filehandle it |
|
688
|
|
|
|
|
|
|
caches it in the @{$index->_filehandle} array. |
|
689
|
|
|
|
|
|
|
If the requested filehandle is already open, |
|
690
|
|
|
|
|
|
|
it simply returns it from the array. |
|
691
|
|
|
|
|
|
|
Example : $first_file_indexed = $index->_file_handle( 0 ); |
|
692
|
|
|
|
|
|
|
Returns : ref to a filehandle |
|
693
|
|
|
|
|
|
|
Args : INT |
|
694
|
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
=cut |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
sub _file_handle { |
|
698
|
16
|
|
|
16
|
|
30
|
my( $self, $i ) = @_; |
|
699
|
|
|
|
|
|
|
|
|
700
|
16
|
100
|
|
|
|
57
|
unless ($self->{'_filehandle'}[$i]) { |
|
701
|
4
|
50
|
|
|
|
11
|
my @rec = $self->unpack_record($self->db->{"__FILE_$i"}) |
|
702
|
|
|
|
|
|
|
or $self->throw("Can't get filename for index : $i"); |
|
703
|
4
|
|
|
|
|
7
|
my $file = $rec[0]; |
|
704
|
4
|
50
|
|
|
|
133
|
open my $fh, '<', $file or $self->throw("Could not read file '$file': $!"); |
|
705
|
4
|
|
|
|
|
13
|
$self->{'_filehandle'}[$i] = $fh; # Cache filehandle |
|
706
|
|
|
|
|
|
|
} |
|
707
|
16
|
|
|
|
|
32
|
return $self->{'_filehandle'}[$i]; |
|
708
|
|
|
|
|
|
|
} |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=head2 _file_count |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
Title : _file_count |
|
714
|
|
|
|
|
|
|
Usage : $index->_file_count( INT ) |
|
715
|
|
|
|
|
|
|
Function: Used by the index building sub in a sub class to |
|
716
|
|
|
|
|
|
|
track the number of files indexed. Sets or gets |
|
717
|
|
|
|
|
|
|
the number of files indexed when called with or |
|
718
|
|
|
|
|
|
|
without an argument. |
|
719
|
|
|
|
|
|
|
Example : |
|
720
|
|
|
|
|
|
|
Returns : INT |
|
721
|
|
|
|
|
|
|
Args : INT |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
=cut |
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
sub _file_count { |
|
726
|
20
|
|
|
20
|
|
24
|
my $self = shift; |
|
727
|
20
|
100
|
|
|
|
36
|
if (@_) { |
|
728
|
8
|
|
|
|
|
14
|
$self->db->{'__FILE_COUNT'} = shift; |
|
729
|
|
|
|
|
|
|
} |
|
730
|
20
|
|
|
|
|
30
|
return $self->db->{'__FILE_COUNT'}; |
|
731
|
|
|
|
|
|
|
} |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=head2 add_record |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
Title : add_record |
|
737
|
|
|
|
|
|
|
Usage : $index->add_record( $id, @stuff ); |
|
738
|
|
|
|
|
|
|
Function: Calls pack_record on @stuff, and adds the result |
|
739
|
|
|
|
|
|
|
of pack_record to the index database under key $id. |
|
740
|
|
|
|
|
|
|
If $id is a reference to an array, then a new entry |
|
741
|
|
|
|
|
|
|
is added under a key corresponding to each element |
|
742
|
|
|
|
|
|
|
of the array. |
|
743
|
|
|
|
|
|
|
Example : $index->add_record( $id, $fileNumber, $begin, $end ) |
|
744
|
|
|
|
|
|
|
Returns : TRUE on success or FALSE on failure |
|
745
|
|
|
|
|
|
|
Args : ID LIST |
|
746
|
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
=cut |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
sub add_record { |
|
750
|
33
|
|
|
33
|
1
|
58
|
my( $self, $id, @rec ) = @_; |
|
751
|
33
|
|
|
|
|
73
|
$self->debug( "Adding key $id\n"); |
|
752
|
33
|
50
|
|
|
|
55
|
if( exists $self->db->{$id} ) { |
|
753
|
0
|
|
|
|
|
0
|
$self->warn("overwriting a current value stored for $id\n"); |
|
754
|
|
|
|
|
|
|
} |
|
755
|
33
|
|
|
|
|
69
|
$self->db->{$id} = $self->pack_record( @rec ); |
|
756
|
33
|
|
|
|
|
120
|
return 1; |
|
757
|
|
|
|
|
|
|
} |
|
758
|
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
=head2 pack_record |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
Title : pack_record |
|
763
|
|
|
|
|
|
|
Usage : $packed_string = $index->pack_record( LIST ) |
|
764
|
|
|
|
|
|
|
Function: Packs an array of scalars into a single string |
|
765
|
|
|
|
|
|
|
joined by ASCII 034 (which is unlikely to be used |
|
766
|
|
|
|
|
|
|
in any of the strings), and returns it. |
|
767
|
|
|
|
|
|
|
Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end ) |
|
768
|
|
|
|
|
|
|
Returns : STRING or undef |
|
769
|
|
|
|
|
|
|
Args : LIST |
|
770
|
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=cut |
|
772
|
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
sub pack_record { |
|
774
|
33
|
|
|
33
|
1
|
43
|
my( $self, @args ) = @_; |
|
775
|
|
|
|
|
|
|
# Silence undefined warnings |
|
776
|
|
|
|
|
|
|
@args = map { |
|
777
|
33
|
50
|
|
|
|
88
|
$_ = (defined $_) ? $_ : ''; |
|
|
66
|
|
|
|
|
76
|
|
|
778
|
66
|
|
|
|
|
87
|
$_ ; |
|
779
|
|
|
|
|
|
|
} @args; |
|
780
|
33
|
|
|
|
|
137
|
return join "\034", @args; |
|
781
|
|
|
|
|
|
|
} |
|
782
|
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
=head2 unpack_record |
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
Title : unpack_record |
|
786
|
|
|
|
|
|
|
Usage : $index->unpack_record( STRING ) |
|
787
|
|
|
|
|
|
|
Function: Splits the sting provided into an array, |
|
788
|
|
|
|
|
|
|
splitting on ASCII 034. |
|
789
|
|
|
|
|
|
|
Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} ) |
|
790
|
|
|
|
|
|
|
Returns : A 3 element ARRAY |
|
791
|
|
|
|
|
|
|
Args : STRING containing ASCII 034 |
|
792
|
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
=cut |
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
sub unpack_record { |
|
796
|
20
|
|
|
20
|
1
|
41
|
my( $self, @args ) = @_; |
|
797
|
20
|
|
|
|
|
83
|
return split /\034/, $args[0]; |
|
798
|
|
|
|
|
|
|
} |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
=head2 count_records |
|
801
|
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
Title : count_records |
|
803
|
|
|
|
|
|
|
Usage : $recs = $seqdb->count_records() |
|
804
|
|
|
|
|
|
|
Function: return count of all recs in the index |
|
805
|
|
|
|
|
|
|
Example : |
|
806
|
|
|
|
|
|
|
Returns : a scalar |
|
807
|
|
|
|
|
|
|
Args : none |
|
808
|
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=cut |
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
sub count_records { |
|
813
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
|
814
|
0
|
|
|
|
|
0
|
my $db = $self->db; |
|
815
|
0
|
|
|
|
|
0
|
my $c = 0; |
|
816
|
0
|
|
|
|
|
0
|
while (my($id, $rec) = each %$db) { |
|
817
|
0
|
0
|
|
|
|
0
|
if( $id =~ /^__/ ) { |
|
818
|
|
|
|
|
|
|
# internal info |
|
819
|
0
|
|
|
|
|
0
|
next; |
|
820
|
|
|
|
|
|
|
} |
|
821
|
0
|
|
|
|
|
0
|
$c++; |
|
822
|
|
|
|
|
|
|
} |
|
823
|
0
|
|
|
|
|
0
|
return ($c); |
|
824
|
|
|
|
|
|
|
} |
|
825
|
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
=head2 DESTROY |
|
828
|
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
Title : DESTROY |
|
830
|
|
|
|
|
|
|
Usage : Called automatically when index goes out of scope |
|
831
|
|
|
|
|
|
|
Function: Closes connection to database and handles to |
|
832
|
|
|
|
|
|
|
sequence files |
|
833
|
|
|
|
|
|
|
Returns : NEVER |
|
834
|
|
|
|
|
|
|
Args : NONE |
|
835
|
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
=cut |
|
838
|
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
sub DESTROY { |
|
840
|
8
|
|
|
8
|
|
563
|
my $self = shift; |
|
841
|
8
|
|
|
|
|
14
|
untie($self->{'_DB'}); |
|
842
|
|
|
|
|
|
|
# An additional undef was the only way to force |
|
843
|
|
|
|
|
|
|
# the object to drop the open filehandles for ActivePerl |
|
844
|
8
|
|
|
|
|
322
|
undef $self->{'_DB'}; |
|
845
|
|
|
|
|
|
|
} |
|
846
|
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
1; |