line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
## File: DiaColloDB::EnumFile::MMap.pm |
3
|
|
|
|
|
|
|
## Author: Bryan Jurish <moocow@cpan.org> |
4
|
|
|
|
|
|
|
## Description: collocation db, symbol<->integer enum, using mmap |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package DiaColloDB::EnumFile::MMap; |
7
|
1
|
|
|
1
|
|
7
|
use DiaColloDB::EnumFile; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
36
|
|
8
|
1
|
|
|
1
|
|
5
|
use DiaColloDB::Utils qw(:fcntl :file :json :regex); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
37
|
|
9
|
1
|
|
|
1
|
|
324
|
use File::Map qw(map_handle); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
10
|
1
|
|
|
1
|
|
150
|
use Fcntl qw(:DEFAULT :seek); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
30
|
|
11
|
1
|
|
|
1
|
|
438
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1313
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
##============================================================================== |
14
|
|
|
|
|
|
|
## Globals & Constants |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::EnumFile); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
##============================================================================== |
19
|
|
|
|
|
|
|
## Constructors etc. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
## $cldb = CLASS_OR_OBJECT->new(%args) |
22
|
|
|
|
|
|
|
## + %args, object structure: |
23
|
|
|
|
|
|
|
## ( |
24
|
|
|
|
|
|
|
## ##-- EnumFile: basic options |
25
|
|
|
|
|
|
|
## base => $base, ##-- database basename; use files "${base}.es", "${base}.esx", "${base}.eix", "${base}.hdr" |
26
|
|
|
|
|
|
|
## perms => $perms, ##-- default: 0666 & ~umask |
27
|
|
|
|
|
|
|
## flags => $flags, ##-- default: 'r' |
28
|
|
|
|
|
|
|
## pack_i => $pack_i, ##-- integer pack template (default='N') |
29
|
|
|
|
|
|
|
## pack_o => $pack_o, ##-- file offset pack template (default='N') |
30
|
|
|
|
|
|
|
## pack_l => $pack_l, ##-- string-length pack template (default='n') |
31
|
|
|
|
|
|
|
## pack_s => $pack_s, ##-- string pack template (default=undef) for text i/o |
32
|
|
|
|
|
|
|
## size => $size, ##-- number of mapped symbols, like scalar(@i2s) |
33
|
|
|
|
|
|
|
## utf8 => $bool, ##-- true iff strings are stored as utf8 (default, used by re2i()) |
34
|
|
|
|
|
|
|
## ## |
35
|
|
|
|
|
|
|
## ##-- EnumFile: in-memory construction and caching |
36
|
|
|
|
|
|
|
## s2i => \%s2i, ##-- maps symbols to integers |
37
|
|
|
|
|
|
|
## i2s => \@i2s, ##-- maps integers to symbols |
38
|
|
|
|
|
|
|
## dirty => $bool, ##-- true if in-memory structures are not in-sync with file data |
39
|
|
|
|
|
|
|
## loaded => $bool, ##-- true if file data has been loaded to memory |
40
|
|
|
|
|
|
|
## shared => $bool, ##-- true to avoid closing filehandles on close() or DESTROY() (default=false) |
41
|
|
|
|
|
|
|
## ## |
42
|
|
|
|
|
|
|
## ##-- EnumFile: pack lengths (after open()) |
43
|
|
|
|
|
|
|
## len_i => $len_i, ##-- bytes::length(pack($pack_i,0)) |
44
|
|
|
|
|
|
|
## len_o => $len_o, ##-- bytes::length(pack($pack_o,0)) |
45
|
|
|
|
|
|
|
## len_l => $len_l, ##-- bytes::length(pack($pack_l,0)) |
46
|
|
|
|
|
|
|
## len_sx => $len_sx, ##-- $len_o + $len_i |
47
|
|
|
|
|
|
|
## ## |
48
|
|
|
|
|
|
|
## ##-- EnumFile: filehandles (after open()) |
49
|
|
|
|
|
|
|
## sfh => $sfh, ##-- $base.es : pack("(${pack_l}/A)*", @$i2s) |
50
|
|
|
|
|
|
|
## ixfh => $ixfh, ##-- $base.eix : [$i] => pack("${pack_o}", $offset_in_sfh_of_string_with_id_i) |
51
|
|
|
|
|
|
|
## sxfh => $sxfh, ##-- $base.esx : [$j] => pack("${pack_o}${pack_i}", $offset_in_sfh_of_string_with_sortindex_j_and_id_i, $i) |
52
|
|
|
|
|
|
|
## ## |
53
|
|
|
|
|
|
|
## ##-- EnumFile::MMap: buffers |
54
|
|
|
|
|
|
|
## sbufr => \$sbuf, ##-- mmap $base.es |
55
|
|
|
|
|
|
|
## ixbufr => \$ixbuf, ##-- mmap $base.eix |
56
|
|
|
|
|
|
|
## sxbufr => \$sxbuf, ##-- mmap $base.esx |
57
|
|
|
|
|
|
|
## ) |
58
|
|
|
|
|
|
|
sub new { |
59
|
0
|
|
|
0
|
1
|
|
my $that = shift; |
60
|
0
|
|
|
|
|
|
return $that->SUPER::new( |
61
|
|
|
|
|
|
|
#sbufr=>undef, |
62
|
|
|
|
|
|
|
#ixbufr=>undef, |
63
|
|
|
|
|
|
|
#sxbufr=>undef, |
64
|
|
|
|
|
|
|
@_, ##-- user arguments |
65
|
|
|
|
|
|
|
); |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
##============================================================================== |
69
|
|
|
|
|
|
|
## I/O |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
72
|
|
|
|
|
|
|
## I/O: open/close (file) |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
## $enum_or_undef = $enum->open($base,$flags) |
75
|
|
|
|
|
|
|
## $enum_or_undef = $enum->open($base) |
76
|
|
|
|
|
|
|
## $enum_or_undef = $enum->open() |
77
|
|
|
|
|
|
|
## + opens file(s), clears {loaded} flag |
78
|
|
|
|
|
|
|
sub open { |
79
|
0
|
|
|
0
|
1
|
|
my ($enum,$base,$flags) = @_; |
80
|
0
|
0
|
|
|
|
|
$enum->SUPER::open($base,$flags) or return undef; |
81
|
0
|
0
|
|
|
|
|
return $enum if (!$enum->isa(__PACKAGE__)); ##-- superclass open() promoted us to another class |
82
|
0
|
|
|
|
|
|
return $enum->remap(); |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
## $enum_or_undef = $enum->remap() |
86
|
|
|
|
|
|
|
## + re-maps mmap buffers from enum handles |
87
|
|
|
|
|
|
|
sub remap { |
88
|
0
|
|
|
0
|
0
|
|
my $enum = shift; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
##-- mmap handles |
91
|
0
|
|
|
|
|
|
my $mapmode = fcperl($enum->{flags}); |
92
|
0
|
|
|
|
|
|
map_handle(my $sbuf, $enum->{sfh}, $mapmode); |
93
|
0
|
|
|
|
|
|
map_handle(my $ixbuf, $enum->{ixfh}, $mapmode); |
94
|
0
|
|
|
|
|
|
map_handle(my $sxbuf, $enum->{sxfh}, $mapmode); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
##-- buffers |
97
|
0
|
|
|
|
|
|
$enum->{sbufr} = \$sbuf; |
98
|
0
|
|
|
|
|
|
$enum->{ixbufr} = \$ixbuf; |
99
|
0
|
|
|
|
|
|
$enum->{sxbufr} = \$sxbuf; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
##-- flags |
102
|
0
|
|
|
|
|
|
$enum->{loaded} = 0; |
103
|
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
return $enum; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
## $enum_or_undef = $enum->close() |
108
|
|
|
|
|
|
|
sub close { |
109
|
0
|
|
|
0
|
1
|
|
my $enum = shift; |
110
|
0
|
0
|
0
|
|
|
|
if ($enum->opened && fcwrite($enum->{flags})) { |
111
|
0
|
0
|
|
|
|
|
$enum->flush() or return undef; |
112
|
|
|
|
|
|
|
} |
113
|
0
|
|
|
|
|
|
delete @$enum{qw(sbufr ixbufr sxbufr)}; |
114
|
0
|
|
|
|
|
|
return $enum->SUPER::close(); |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
## $bool = $enum->opened() |
118
|
|
|
|
|
|
|
sub opened { |
119
|
0
|
|
|
0
|
1
|
|
my $enum = shift; |
120
|
|
|
|
|
|
|
return |
121
|
|
|
|
|
|
|
( |
122
|
|
|
|
|
|
|
#defined($enum->{base}) && |
123
|
|
|
|
|
|
|
defined($enum->{sbufr}) |
124
|
|
|
|
|
|
|
&& defined($enum->{ixbufr}) |
125
|
|
|
|
|
|
|
&& defined($enum->{sxbufr}) |
126
|
0
|
|
0
|
|
|
|
); |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
## $bool = $enum->reopen() |
130
|
|
|
|
|
|
|
## + re-opens datafiles |
131
|
|
|
|
|
|
|
## + override also remaps buffers |
132
|
|
|
|
|
|
|
sub reopen { |
133
|
0
|
|
|
0
|
0
|
|
my $enum = shift; |
134
|
0
|
|
0
|
|
|
|
return $enum->SUPER::reopen() && $enum->remap(); |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
## $bool = $enum->dirty() |
139
|
|
|
|
|
|
|
## + returns true iff some in-memory structures haven't been flushed to disk |
140
|
|
|
|
|
|
|
## + INHERITED |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
## $bool = $enum->loaded() |
143
|
|
|
|
|
|
|
## + returns true iff in-memory structures have been populated from disk |
144
|
|
|
|
|
|
|
## + INHERITED |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
## $bool = $enum->flush() |
147
|
|
|
|
|
|
|
## $bool = $enum->flush($force) |
148
|
|
|
|
|
|
|
## + flush in-memory structures to disk |
149
|
|
|
|
|
|
|
## + no-op unless $force or $enum->dirty() is true |
150
|
|
|
|
|
|
|
## + clobbers any old disk-file contents with in-memory maps |
151
|
|
|
|
|
|
|
## + enum must be opened in write-mode |
152
|
|
|
|
|
|
|
## + INHERITED |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
156
|
|
|
|
|
|
|
## I/O: memory <-> file |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
## \@i2s = $enum->toArray() |
159
|
|
|
|
|
|
|
sub toArray { |
160
|
0
|
|
|
0
|
1
|
|
my $enum = shift; |
161
|
0
|
0
|
0
|
|
|
|
return $enum->{i2s}//[] if ($enum->loaded || !$enum->opened); |
|
|
|
0
|
|
|
|
|
162
|
0
|
|
|
|
|
|
my @i2s = unpack("($enum->{pack_l}/A)*", ${$enum->{sbufr}}); |
|
0
|
|
|
|
|
|
|
163
|
0
|
0
|
0
|
|
|
|
push(@i2s, @{$enum->{i2s}}[scalar(@i2s)..$#{$enum->{i2s}}]) if ($enum->dirty && $enum->{i2s}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
|
return \@i2s; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
## $enum = $enum->fromArray(\@i2s) |
168
|
|
|
|
|
|
|
## + clobbers $enum contents, steals \@i2s |
169
|
|
|
|
|
|
|
## + INHERITED |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
## $enum = $enum->fromEnum($enum2) |
172
|
|
|
|
|
|
|
## + clobbers $enum contents, does NOT steal $enum2->{i2s} |
173
|
|
|
|
|
|
|
## + INHERITED |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
## $bool = $enum->load() |
176
|
|
|
|
|
|
|
## + loads files to memory; must be opened |
177
|
|
|
|
|
|
|
## + INHERITED |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
180
|
|
|
|
|
|
|
## I/O: header |
181
|
|
|
|
|
|
|
## + INHERITED |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
184
|
|
|
|
|
|
|
## I/O: text |
185
|
|
|
|
|
|
|
## + INHERITED |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
##============================================================================== |
188
|
|
|
|
|
|
|
## Methods: population (in-memory only) |
189
|
|
|
|
|
|
|
## + INHERITED |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
##============================================================================== |
192
|
|
|
|
|
|
|
## Methods: lookup |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
## $s_or_undef = $enum->i2s($i) |
195
|
|
|
|
|
|
|
## + in-memory cache overrides file contents |
196
|
|
|
|
|
|
|
sub i2s { |
197
|
0
|
|
|
0
|
1
|
|
my ($enum,$i) = @_; |
198
|
0
|
0
|
|
|
|
|
return undef if ($i >= $enum->{size}); |
199
|
|
|
|
|
|
|
|
200
|
0
|
|
|
|
|
|
my $buf; |
201
|
0
|
0
|
|
|
|
|
return $buf if (defined($buf=$enum->{i2s}[$i])); |
202
|
|
|
|
|
|
|
|
203
|
0
|
|
0
|
|
|
|
my $soff = unpack($enum->{pack_o}, substr(${$enum->{ixbufr}}, $i*$enum->{len_o}, $enum->{len_o})) // return undef; |
|
0
|
|
|
|
|
|
|
204
|
0
|
|
|
|
|
|
my $slen = unpack($enum->{pack_l}, substr(${$enum->{sbufr}}, $soff, $enum->{len_l})); |
|
0
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
|
$buf = substr(${$enum->{sbufr}}, $soff+$enum->{len_l}, $slen); |
|
0
|
|
|
|
|
|
|
206
|
0
|
0
|
|
|
|
|
utf8::decode($buf) if ($enum->{utf8}); |
207
|
0
|
|
|
|
|
|
return $buf; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
## $i_or_undef = $enum->s2i($s) |
211
|
|
|
|
|
|
|
## $i_or_undef = $enum->s2i($s, $ilo,$ihi) |
212
|
|
|
|
|
|
|
## + binary search; enum must be opened |
213
|
|
|
|
|
|
|
sub s2i { |
214
|
0
|
|
|
0
|
1
|
|
my ($enum,$key,$ilo,$ihi) = @_; |
215
|
|
|
|
|
|
|
|
216
|
0
|
|
|
|
|
|
my ($sxbufr,$sbufr,$len_sx,$pack_o,$len_o,$pack_l,$len_l) = @$enum{qw(sxbufr sbufr len_sx pack_o len_o pack_l len_l)}; |
217
|
0
|
|
0
|
|
|
|
$ilo //= 0; |
218
|
0
|
0
|
0
|
|
|
|
$ihi //= $enum->{dirty} ? (length($$sxbufr)/$len_sx) : $enum->{size}; |
219
|
|
|
|
|
|
|
|
220
|
0
|
|
|
|
|
|
my ($imid,$buf,$soff,$slen,$si); |
221
|
0
|
0
|
|
|
|
|
return $buf if (defined($buf=$enum->{s2i}{$key})); |
222
|
|
|
|
|
|
|
|
223
|
0
|
0
|
0
|
|
|
|
utf8::encode($key) if ($enum->{utf8} && utf8::is_utf8($key)); |
224
|
0
|
|
|
|
|
|
while ($ilo < $ihi) { |
225
|
0
|
|
|
|
|
|
$imid = ($ihi+$ilo) >> 1; |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
##-- get sx-record @ $imid |
228
|
0
|
|
|
|
|
|
$soff = unpack($pack_o, substr($$sxbufr, $imid*$len_sx, $len_o)); |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
##-- get string for sx-record |
231
|
0
|
|
|
|
|
|
$slen = unpack($pack_l, substr($$sbufr, $soff, $len_l)); |
232
|
0
|
|
|
|
|
|
$buf = substr($$sbufr, $soff+$len_l, $slen); |
233
|
|
|
|
|
|
|
|
234
|
0
|
0
|
|
|
|
|
if ($buf lt $key) { |
235
|
0
|
|
|
|
|
|
$ilo = $imid + 1; |
236
|
|
|
|
|
|
|
} else { |
237
|
0
|
|
|
|
|
|
$ihi = $imid; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
##-- output |
242
|
0
|
0
|
|
|
|
|
if ($ilo==$ihi) { |
243
|
|
|
|
|
|
|
##-- get sx-record @ $ilo |
244
|
0
|
|
|
|
|
|
($soff,$si) = unpack($enum->{pack_o}.$enum->{pack_i}, substr($$sxbufr, $ilo*$len_sx, $len_sx)); |
245
|
0
|
0
|
|
|
|
|
return undef if (!defined($soff)); |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
##-- get string for sx-record |
248
|
0
|
|
|
|
|
|
$slen = unpack($pack_l, substr($$sbufr, $soff, $len_l)); |
249
|
0
|
|
|
|
|
|
$buf = substr($$sbufr, $soff+$len_l, $slen); |
250
|
|
|
|
|
|
|
|
251
|
0
|
0
|
|
|
|
|
return $si if ($buf eq $key); |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
0
|
|
|
|
|
|
return undef; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
## \@is = $enum->re2i($regex) |
258
|
|
|
|
|
|
|
## + gets indices for all strings matching $regex |
259
|
|
|
|
|
|
|
sub re2i { |
260
|
0
|
|
|
0
|
1
|
|
my ($enum,$re) = @_; |
261
|
0
|
|
|
|
|
|
my $utf8 = $enum->{utf8}; |
262
|
|
|
|
|
|
|
|
263
|
0
|
0
|
|
|
|
|
if (!ref($re)) { |
264
|
0
|
0
|
0
|
|
|
|
utf8::decode($re) if ($utf8 && !utf8::is_utf8($re)); |
265
|
0
|
|
|
|
|
|
$re = regex($re); |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
0
|
|
|
|
|
|
my $i2s = $enum->{i2s}; |
269
|
0
|
0
|
0
|
|
|
|
if ($enum->loaded || !$enum->opened) { |
270
|
|
|
|
|
|
|
##-- easy answer: loaded |
271
|
0
|
0
|
|
|
|
|
return [grep {utf8::decode($_) if ($utf8); $i2s->[$_] =~ $re} (0..$#$i2s)]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
##-- iteration a la toArray() |
275
|
0
|
|
|
|
|
|
my $pack_l = $enum->{pack_l}; |
276
|
0
|
|
|
|
|
|
my $len_l = $enum->{len_l}; |
277
|
0
|
|
|
|
|
|
my $sbufr = $enum->{sbufr}; |
278
|
0
|
|
|
|
|
|
my $offmax = length($$sbufr); |
279
|
0
|
|
|
|
|
|
my @is = qw(); |
280
|
0
|
|
|
|
|
|
my ($off,$i,$len_s,$s); |
281
|
0
|
|
|
|
|
|
for ($i=$off=0; $off < $offmax; ++$i, $off += ($len_l+$len_s)) { |
282
|
0
|
|
|
|
|
|
$len_s = unpack($pack_l, substr($$sbufr, $off, $len_l)); |
283
|
0
|
|
|
|
|
|
$s = substr($$sbufr, $off+$len_l, $len_s); |
284
|
0
|
0
|
|
|
|
|
utf8::decode($s) if ($utf8); |
285
|
0
|
0
|
|
|
|
|
push(@is, $i) if ($s =~ $re); |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
0
|
0
|
|
|
|
|
push(@is, grep {utf8::decode($_) if ($utf8); $i2s->[$_] =~ $re} (((-s $enum->{ixfh})/$enum->{len_o})..$#{$enum->{i2s}})) if ($enum->dirty); |
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
289
|
0
|
|
|
|
|
|
return \@is; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
##============================================================================== |
293
|
|
|
|
|
|
|
## Footer |
294
|
|
|
|
|
|
|
1; |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
__END__ |