| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Data::CompactReadonly; |
|
2
|
|
|
|
|
|
|
|
|
3
|
9
|
|
|
9
|
|
991245
|
use warnings; |
|
|
9
|
|
|
|
|
101
|
|
|
|
9
|
|
|
|
|
315
|
|
|
4
|
9
|
|
|
9
|
|
57
|
use strict; |
|
|
9
|
|
|
|
|
17
|
|
|
|
9
|
|
|
|
|
264
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
9
|
|
|
9
|
|
4479
|
use Data::CompactReadonly::V0::Node; |
|
|
9
|
|
|
|
|
32
|
|
|
|
9
|
|
|
|
|
5835
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# Yuck, semver. I give in, the stupid cult that doesn't understand |
|
9
|
|
|
|
|
|
|
# what the *number* bit of *version number* means has won. |
|
10
|
|
|
|
|
|
|
our $VERSION = '0.1.0'; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Data::CompactReadonly |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
A Compact Read Only Database that consumes very little memory. Once created a |
|
19
|
|
|
|
|
|
|
database can not be practically updated except by re-writing the whole thing. |
|
20
|
|
|
|
|
|
|
The aim is for random-access read performance to be on a par with L |
|
21
|
|
|
|
|
|
|
and for files to be much smaller. |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 VERSION 'NUMBERS' |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This module uses semantic versioning. That means that the version 'number' isn't |
|
26
|
|
|
|
|
|
|
really a number but has three parts: C. |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
The C number will increase when the API changes incompatibly; |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
The C number will increase when backward-compatible additions are made to the API; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
The C number will increase when bugs are fixed backward-compatibly. |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 FILE FORMAT VERSIONS |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
All versions so far support file format version 0 only. |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
See L for details of what that means. |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 METHODS |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head2 create |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Takes two arguments, the name of file into which to write a database, and some |
|
45
|
|
|
|
|
|
|
data. The data can be undef, a number, some text, or a reference to an array |
|
46
|
|
|
|
|
|
|
or hash that in turn consists of undefs, numbers, text, references to arrays or |
|
47
|
|
|
|
|
|
|
hashes, and so on ad infinitum. |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
This method may be very slow. It constructs a file by making lots |
|
50
|
|
|
|
|
|
|
of little writes and seek()ing all over the place. It doesn't do anything |
|
51
|
|
|
|
|
|
|
clever to figure out what pointer size to use, it just tries the shortest |
|
52
|
|
|
|
|
|
|
first, and then if that's not enough tries again, and again, bigger each time. |
|
53
|
|
|
|
|
|
|
See L for more on pointer sizes. It may also eat B of |
|
54
|
|
|
|
|
|
|
memory. It keeps a cache of everything it has seen while building your |
|
55
|
|
|
|
|
|
|
database, so that it can re-use data by just pointing at it instead of writing |
|
56
|
|
|
|
|
|
|
multiple copies of the same data into the file. |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
It tries really hard to preserve data types. So for example, C<60000> is stored |
|
59
|
|
|
|
|
|
|
and read back as an integer, but C<"60000"> is stored and read back as a string. |
|
60
|
|
|
|
|
|
|
This means that you can correctly store and retrieve C<"007"> but that C<007> |
|
61
|
|
|
|
|
|
|
will have the leading zeroes removed before Data::CompactReadonly ever sees it |
|
62
|
|
|
|
|
|
|
and so will be treated as exactly equivalent to C<7>. The same applies to floating |
|
63
|
|
|
|
|
|
|
point values too. C<"7.10"> is stored as a four byte string, but C<7.10> is stored |
|
64
|
|
|
|
|
|
|
the same as C<7.1>, as an eight byte IEEE754 double precision float. Note that |
|
65
|
|
|
|
|
|
|
perl parses values like C<7.0> as floating point, and thus so does this module. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Finally, while the file format permits numeric keys and Booleans in hashes, |
|
68
|
|
|
|
|
|
|
this method always coerces them to text. It does that to numbers because if you |
|
69
|
|
|
|
|
|
|
allow numeric keys, numbers that can't be represented in an C, such as |
|
70
|
|
|
|
|
|
|
1e100 or 3.14 will be subject to floating point imprecision, and so it is |
|
71
|
|
|
|
|
|
|
unlikely that you will ever be able to retrieve them as no exact match is |
|
72
|
|
|
|
|
|
|
possible. And it does it to Booleans because when you un-serialise them on an |
|
73
|
|
|
|
|
|
|
older perl they may be confused with strings, leading to loss of data if those |
|
74
|
|
|
|
|
|
|
strings are also present as keys in the dictionary. |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head2 read |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Takes a single compulsory argument, which is a filename or an already open file |
|
79
|
|
|
|
|
|
|
handle, and some options. |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
If the first argument is a filehandle, the current file pointer should be at |
|
82
|
|
|
|
|
|
|
the start of the database (not necessarily at the start of the file; the |
|
83
|
|
|
|
|
|
|
database could be in a C<__DATA__> segment) and B have been opened in |
|
84
|
|
|
|
|
|
|
"just the bytes ma'am" mode. |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
It is a fatal error to pass in a filehandle which was not opened correctly or |
|
87
|
|
|
|
|
|
|
the name of a file that can't be opened or which doesn't contain a valid |
|
88
|
|
|
|
|
|
|
database. |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
The options are name/value pairs. Valid options are: |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=over |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item tie |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
If true return tied objects instead of normal objects. This means that you will |
|
97
|
|
|
|
|
|
|
be able to access data by de-referencing and pretending to access elements |
|
98
|
|
|
|
|
|
|
directly. Under the bonnet this wraps around the objects as documented below, |
|
99
|
|
|
|
|
|
|
so is just a layer of indirection. On modern hardware you probably won't notice |
|
100
|
|
|
|
|
|
|
the concomittant slow down but may appreciate the convenience. |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item fast_collections |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
If true Dictionary keys and values will be permanently cached in memory the |
|
105
|
|
|
|
|
|
|
first time they are seen, instead of being fetched from the file when needed. |
|
106
|
|
|
|
|
|
|
Yes, this means that objects will grow in memory, potentially very large. |
|
107
|
|
|
|
|
|
|
Only use this if if it an acceptable pay-off for much faster access. |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
This is not yet implemented for Arrays. |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=back |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Returns the "root node" of the database. If that root node is a number, some |
|
114
|
|
|
|
|
|
|
piece of text, True, False, or Null, then it is decoded and the value returned. Otherwise an |
|
115
|
|
|
|
|
|
|
object (possibly a tied object) representing an Array or a Dictionary is returned. |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 OBJECTS |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
If you asked for normal objects to be returned instead of tied objects, then |
|
120
|
|
|
|
|
|
|
these are sub-classes of either C or |
|
121
|
|
|
|
|
|
|
C. Both implement the following three methods: |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head2 id |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Returns a unique id for this object within the database. Note that circular data |
|
126
|
|
|
|
|
|
|
structures are supported, and looking at the C is the only way to detect them. |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
This is not accessible when using tied objects. |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head2 count |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Returns the number of elements in the structure. |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head2 indices |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Returns a list of all the available indices in the structure. |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 element |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Takes a single argument, which must match one of the values that would be returned |
|
141
|
|
|
|
|
|
|
by C, and returns the associated data. |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
If the data is a number, Null, or text, the value will be returned directly. If the |
|
144
|
|
|
|
|
|
|
data is in turn another array or dictionary, an object will be returned. |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 exists |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Takes a single argument and tell you whether an index exists for it. It will still |
|
149
|
|
|
|
|
|
|
die if you ask it fomr something stupid such as a floating point array index or |
|
150
|
|
|
|
|
|
|
a Null dictionary entry. |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 UNSUPPORTED PERL TYPES |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Globs, Regexes, References (except to Arrays and Dictionaries). |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Booleans are only supported on perl version 5.35.7 or later. On earlier perls, a |
|
157
|
|
|
|
|
|
|
Boolean in the database will be decoded as a true or false I, but its type |
|
158
|
|
|
|
|
|
|
will be numeric or string. And a older perls will never write a True or False node |
|
159
|
|
|
|
|
|
|
to the database, they'll always write numbers or strings with true/false values, |
|
160
|
|
|
|
|
|
|
which other implementations will decode as numbers or strings. |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head1 BUGS/FEEDBACK |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Please report bugs by at L, including, if possible, a test case. |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
L if you need updateable databases. |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
L |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=head1 AUTHOR, COPYRIGHT and LICENCE |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Copyright 2020 David Cantrell EFE |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
This software is free-as-in-speech software, and may be used, |
|
179
|
|
|
|
|
|
|
distributed, and modified under the terms of either the GNU |
|
180
|
|
|
|
|
|
|
General Public Licence version 2 or the Artistic Licence. It's |
|
181
|
|
|
|
|
|
|
up to you which one you use. The full text of the licences can |
|
182
|
|
|
|
|
|
|
be found in the files GPL2.txt and ARTISTIC.txt, respectively. |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head1 CONSPIRACY |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
This module is also free-as-in-mason software. |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=cut |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub create { |
|
191
|
55
|
|
|
55
|
1
|
62451
|
my($class, $file, $data) = @_; |
|
192
|
|
|
|
|
|
|
|
|
193
|
55
|
|
|
|
|
122
|
my $version = 0; |
|
194
|
|
|
|
|
|
|
|
|
195
|
55
|
|
|
|
|
176
|
PTR_SIZE: foreach my $ptr_size (1 .. 8) { |
|
196
|
59
|
|
|
|
|
254
|
my $byte5 = chr(($version << 3) + $ptr_size - 1); |
|
197
|
59
|
50
|
|
|
|
109369
|
open(my $fh, '>:unix', $file) || die("Can't write $file: $! \n"); |
|
198
|
59
|
|
|
|
|
2201
|
print $fh "CROD$byte5"; |
|
199
|
59
|
|
|
|
|
222
|
eval { |
|
200
|
59
|
|
|
|
|
1275
|
"Data::CompactReadonly::V${version}::Node"->_create( |
|
201
|
|
|
|
|
|
|
filename => $file, |
|
202
|
|
|
|
|
|
|
fh => $fh, |
|
203
|
|
|
|
|
|
|
ptr_size => $ptr_size, |
|
204
|
|
|
|
|
|
|
data => $data, |
|
205
|
|
|
|
|
|
|
globals => { next_free_ptr => tell($fh), already_seen => {} } |
|
206
|
|
|
|
|
|
|
); |
|
207
|
|
|
|
|
|
|
}; |
|
208
|
59
|
100
|
66
|
|
|
18338
|
if($@ && index($@, "Data::CompactReadonly::V${version}::Node"->_ptr_blown()) != -1) { |
|
|
|
50
|
|
|
|
|
|
|
209
|
4
|
|
|
|
|
25330
|
next PTR_SIZE; |
|
210
|
0
|
|
|
|
|
0
|
} elsif($@) { die($@); } |
|
211
|
55
|
|
|
|
|
7929
|
last PTR_SIZE; |
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub read { |
|
216
|
116
|
|
|
116
|
1
|
57950
|
my($class, $file, %args) = @_; |
|
217
|
116
|
|
|
|
|
219
|
my $fh; |
|
218
|
116
|
100
|
|
|
|
317
|
if(ref($file)) { |
|
219
|
58
|
|
|
|
|
101
|
$fh = $file; |
|
220
|
58
|
|
|
|
|
275
|
my @layers = PerlIO::get_layers($fh); |
|
221
|
58
|
100
|
|
|
|
156
|
if(grep { $_ !~ /^(unix|perlio|scalar)$/ } @layers) { |
|
|
63
|
|
|
|
|
510
|
|
|
222
|
2
|
|
|
|
|
35
|
die( |
|
223
|
|
|
|
|
|
|
"$class: file handle has invalid encoding [". |
|
224
|
|
|
|
|
|
|
join(', ', @layers). |
|
225
|
|
|
|
|
|
|
"]\n" |
|
226
|
|
|
|
|
|
|
); |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
} else { |
|
229
|
58
|
100
|
|
|
|
2782
|
open($fh, '<', $file) || die("$class couldn't open file $file: $!\n"); |
|
230
|
57
|
|
|
|
|
331
|
binmode($fh); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
|
|
233
|
113
|
|
|
|
|
296
|
my $original_file_pointer = tell($fh); |
|
234
|
|
|
|
|
|
|
|
|
235
|
113
|
|
|
|
|
2319
|
read($fh, my $header, 5); |
|
236
|
113
|
|
|
|
|
713
|
(my $byte5) = ($header =~ /^CROD(.)/); |
|
237
|
113
|
100
|
|
|
|
364
|
die("$class: $file header invalid: doesn't match /CROD./\n") unless(defined($byte5)); |
|
238
|
|
|
|
|
|
|
|
|
239
|
112
|
|
|
|
|
287
|
my $version = (ord($byte5) & 0b11111000) >> 3; |
|
240
|
112
|
|
|
|
|
197
|
my $ptr_size = (ord($byte5) & 0b00000111) + 1; |
|
241
|
112
|
100
|
|
|
|
317
|
die("$class: $file header invalid: bad version\n") if($version == 0b11111); |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
return "Data::CompactReadonly::V${version}::Node"->_init( |
|
244
|
|
|
|
|
|
|
ptr_size => $ptr_size, |
|
245
|
|
|
|
|
|
|
fh => $fh, |
|
246
|
|
|
|
|
|
|
db_base => $original_file_pointer, |
|
247
|
|
|
|
|
|
|
map { |
|
248
|
111
|
100
|
|
|
|
374
|
exists($args{$_}) ? ($_ => 1 ) : () |
|
|
222
|
|
|
|
|
1175
|
|
|
249
|
|
|
|
|
|
|
} qw(fast_collections tie) |
|
250
|
|
|
|
|
|
|
); |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
1; |