line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Search::InvertedIndex::Simple::BerkeleyDB; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# Name: |
4
|
|
|
|
|
|
|
# Search::InvertedIndex::Simple::BerkeleyDB. |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Documentation: |
7
|
|
|
|
|
|
|
# POD-style documentation is at the end. Extract it with pod2html.*. |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Reference: |
10
|
|
|
|
|
|
|
# Object Oriented Perl |
11
|
|
|
|
|
|
|
# Damian Conway |
12
|
|
|
|
|
|
|
# Manning |
13
|
|
|
|
|
|
|
# 1-884777-79-1 |
14
|
|
|
|
|
|
|
# P 114 |
15
|
|
|
|
|
|
|
# |
16
|
|
|
|
|
|
|
# Note: |
17
|
|
|
|
|
|
|
# o Tab = 4 spaces || die. |
18
|
|
|
|
|
|
|
# |
19
|
|
|
|
|
|
|
# Author: |
20
|
|
|
|
|
|
|
# Ron Savage |
21
|
|
|
|
|
|
|
# Home page: http://savage.net.au/index.html |
22
|
|
|
|
|
|
|
# |
23
|
|
|
|
|
|
|
# Licence: |
24
|
|
|
|
|
|
|
# Australian copyright (c) 1999-2002 Ron Savage. |
25
|
|
|
|
|
|
|
# |
26
|
|
|
|
|
|
|
# All Programs of mine are 'OSI Certified Open Source Software'; |
27
|
|
|
|
|
|
|
# you can redistribute them and/or modify them under the terms of |
28
|
|
|
|
|
|
|
# The Artistic License, a copy of which is available at: |
29
|
|
|
|
|
|
|
# http://www.opensource.org/licenses/index.html |
30
|
|
|
|
|
|
|
|
31
|
1
|
|
|
1
|
|
32887
|
use strict; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
39
|
|
32
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
28
|
|
33
|
1
|
|
|
1
|
|
5
|
no warnings 'redefine'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
43
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
require 5.005_62; |
36
|
|
|
|
|
|
|
|
37
|
1
|
|
|
1
|
|
490
|
use BerkeleyDB; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use Search::InvertedIndex::Simple; |
39
|
|
|
|
|
|
|
use Set::Array; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
our @ISA = qw(Search::InvertedIndex::Simple); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
44
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
45
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# This allows declaration use Search::InvertedIndex::Simple::BerkeleyDB ':all'; |
48
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
49
|
|
|
|
|
|
|
# will save memory. |
50
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw( |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
) ] ); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
our @EXPORT = qw( |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
); |
59
|
|
|
|
|
|
|
our $VERSION = '1.06'; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# ----------------------------------------------- |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# Encapsulated class data. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
{ |
66
|
|
|
|
|
|
|
my(%_attr_data) = |
67
|
|
|
|
|
|
|
( # Alphabetical order. |
68
|
|
|
|
|
|
|
_dataset => [], |
69
|
|
|
|
|
|
|
_keyset => [], |
70
|
|
|
|
|
|
|
_lower_case => 0, |
71
|
|
|
|
|
|
|
_separator => ',', |
72
|
|
|
|
|
|
|
); |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub _default_for |
75
|
|
|
|
|
|
|
{ |
76
|
|
|
|
|
|
|
my($self, $attr_name) = @_; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
$_attr_data{$attr_name}; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub _standard_keys |
82
|
|
|
|
|
|
|
{ |
83
|
|
|
|
|
|
|
sort keys %_attr_data; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
} # End of Encapsulated class data. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# ----------------------------------------------- |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub db_get |
91
|
|
|
|
|
|
|
{ |
92
|
|
|
|
|
|
|
my($self, $key) = @_; |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
my(%value); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
for (sort keys %$key) |
97
|
|
|
|
|
|
|
{ |
98
|
|
|
|
|
|
|
Carp::croak("Unknown key: $_") if (! $$self{'_key'}{$_}); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
$$self{'_dbh'}{$_} -> db_get($$key{$_}, $value{$_}); |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
return \%value; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
} # End of db_get. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# ----------------------------------------------- |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub db_print |
110
|
|
|
|
|
|
|
{ |
111
|
|
|
|
|
|
|
my($self) = @_; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
my($key, @log); |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
for $key (sort keys %{$$self{'_dbh'} }) |
116
|
|
|
|
|
|
|
{ |
117
|
|
|
|
|
|
|
my($count) = 0; |
118
|
|
|
|
|
|
|
my($cursor) = $$self{'_dbh'}{$key} -> db_cursor(); |
119
|
|
|
|
|
|
|
my($k, $v) = ('', ''); |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
push @log, $key; |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
while ($cursor -> c_get($k, $v, DB_NEXT) == 0) |
124
|
|
|
|
|
|
|
{ |
125
|
|
|
|
|
|
|
$count++; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
push @log, "$count: $k => $v"; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
return \@log; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
} # End of db_print. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# ----------------------------------------------- |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub db_put |
138
|
|
|
|
|
|
|
{ |
139
|
|
|
|
|
|
|
my($self) = @_; |
140
|
|
|
|
|
|
|
my($env) = BerkeleyDB::Env -> new |
141
|
|
|
|
|
|
|
( |
142
|
|
|
|
|
|
|
Flags => DB_PRIVATE, # Use RAM rather than disk files for the database. |
143
|
|
|
|
|
|
|
); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
for (@{$$self{'_keyset'} }) |
146
|
|
|
|
|
|
|
{ |
147
|
|
|
|
|
|
|
$$self{'_dbh'}{$_} = BerkeleyDB::Btree -> new |
148
|
|
|
|
|
|
|
( |
149
|
|
|
|
|
|
|
Env => $env, |
150
|
|
|
|
|
|
|
) || Carp::croak("Can't create BerkeleyDB::Btree for index $_: $!"); |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
my($index) = $self -> build_index(); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
my($primary_key, $secondary_key, $key); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
for $primary_key (sort keys %$index) |
158
|
|
|
|
|
|
|
{ |
159
|
|
|
|
|
|
|
for $secondary_key (sort keys %{$$index{$primary_key} }) |
160
|
|
|
|
|
|
|
{ |
161
|
|
|
|
|
|
|
$key = $$self{'_lower_case'} == 0 ? $secondary_key : lc $secondary_key; |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
$$self{'_dbh'}{$primary_key} -> db_put($key, join(',', $$index{$primary_key}{$secondary_key} -> print() ) ); |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
} # End of db_put. |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# ----------------------------------------------- |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub inflate |
172
|
|
|
|
|
|
|
{ |
173
|
|
|
|
|
|
|
my($self, $value) = @_; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
my($set); |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
for (sort keys %$value) |
178
|
|
|
|
|
|
|
{ |
179
|
|
|
|
|
|
|
if (! $$value{$_}) |
180
|
|
|
|
|
|
|
{ |
181
|
|
|
|
|
|
|
$set = undef; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
last; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
if (! $set) |
187
|
|
|
|
|
|
|
{ |
188
|
|
|
|
|
|
|
$set = Set::Array -> new(split(/$$self{'_separator'}/, $$value{$_}) ); |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
else |
191
|
|
|
|
|
|
|
{ |
192
|
|
|
|
|
|
|
$set = Set::Array -> new(join(',', $set -> intersection(Set::Array -> new(split(/$$self{'_separator'}/, $$value{$_}) ) ) ) ); |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
return $set; |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
} # End of inflate. |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# ----------------------------------------------- |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub new |
203
|
|
|
|
|
|
|
{ |
204
|
|
|
|
|
|
|
my($class, %arg) = @_; |
205
|
|
|
|
|
|
|
my($self) = bless({}, $class); |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
for my $attr_name ($self -> _standard_keys() ) |
208
|
|
|
|
|
|
|
{ |
209
|
|
|
|
|
|
|
my($arg_name) = $attr_name =~ /^_(.*)/; |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
if (exists($arg{$arg_name}) ) |
212
|
|
|
|
|
|
|
{ |
213
|
|
|
|
|
|
|
$$self{$attr_name} = $arg{$arg_name}; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
else |
216
|
|
|
|
|
|
|
{ |
217
|
|
|
|
|
|
|
$$self{$attr_name} = $self -> _default_for($attr_name); |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# There will be 1 database handle per entry in @{$$self{'_keyset'} }. |
222
|
|
|
|
|
|
|
# Also, convert keyset into a hash for ease of testing existances. |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
$$self{'_dbh'} = {}; |
225
|
|
|
|
|
|
|
@{$$self{'_key'} }{@{$$self{'_keyset'} } } = (1) x @{$$self{'_keyset'} }; |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
return $self; |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
} # End of new. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
# ----------------------------------------------- |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
1; |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
__END__ |