line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::CompactReadonly; |
2
|
|
|
|
|
|
|
|
3
|
9
|
|
|
9
|
|
991245
|
use warnings; |
|
9
|
|
|
|
|
101
|
|
|
9
|
|
|
|
|
315
|
|
4
|
9
|
|
|
9
|
|
57
|
use strict; |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
264
|
|
5
|
|
|
|
|
|
|
|
6
|
9
|
|
|
9
|
|
4479
|
use Data::CompactReadonly::V0::Node; |
|
9
|
|
|
|
|
32
|
|
|
9
|
|
|
|
|
5835
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# Yuck, semver. I give in, the stupid cult that doesn't understand |
9
|
|
|
|
|
|
|
# what the *number* bit of *version number* means has won. |
10
|
|
|
|
|
|
|
our $VERSION = '0.1.0'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Data::CompactReadonly |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 DESCRIPTION |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
A Compact Read Only Database that consumes very little memory. Once created a |
19
|
|
|
|
|
|
|
database can not be practically updated except by re-writing the whole thing. |
20
|
|
|
|
|
|
|
The aim is for random-access read performance to be on a par with L |
21
|
|
|
|
|
|
|
and for files to be much smaller. |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 VERSION 'NUMBERS' |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This module uses semantic versioning. That means that the version 'number' isn't |
26
|
|
|
|
|
|
|
really a number but has three parts: C. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
The C number will increase when the API changes incompatibly; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
The C number will increase when backward-compatible additions are made to the API; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
The C number will increase when bugs are fixed backward-compatibly. |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 FILE FORMAT VERSIONS |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
All versions so far support file format version 0 only. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
See L for details of what that means. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 METHODS |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head2 create |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Takes two arguments, the name of file into which to write a database, and some |
45
|
|
|
|
|
|
|
data. The data can be undef, a number, some text, or a reference to an array |
46
|
|
|
|
|
|
|
or hash that in turn consists of undefs, numbers, text, references to arrays or |
47
|
|
|
|
|
|
|
hashes, and so on ad infinitum. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
This method may be very slow. It constructs a file by making lots |
50
|
|
|
|
|
|
|
of little writes and seek()ing all over the place. It doesn't do anything |
51
|
|
|
|
|
|
|
clever to figure out what pointer size to use, it just tries the shortest |
52
|
|
|
|
|
|
|
first, and then if that's not enough tries again, and again, bigger each time. |
53
|
|
|
|
|
|
|
See L for more on pointer sizes. It may also eat B of |
54
|
|
|
|
|
|
|
memory. It keeps a cache of everything it has seen while building your |
55
|
|
|
|
|
|
|
database, so that it can re-use data by just pointing at it instead of writing |
56
|
|
|
|
|
|
|
multiple copies of the same data into the file. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
It tries really hard to preserve data types. So for example, C<60000> is stored |
59
|
|
|
|
|
|
|
and read back as an integer, but C<"60000"> is stored and read back as a string. |
60
|
|
|
|
|
|
|
This means that you can correctly store and retrieve C<"007"> but that C<007> |
61
|
|
|
|
|
|
|
will have the leading zeroes removed before Data::CompactReadonly ever sees it |
62
|
|
|
|
|
|
|
and so will be treated as exactly equivalent to C<7>. The same applies to floating |
63
|
|
|
|
|
|
|
point values too. C<"7.10"> is stored as a four byte string, but C<7.10> is stored |
64
|
|
|
|
|
|
|
the same as C<7.1>, as an eight byte IEEE754 double precision float. Note that |
65
|
|
|
|
|
|
|
perl parses values like C<7.0> as floating point, and thus so does this module. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Finally, while the file format permits numeric keys and Booleans in hashes, |
68
|
|
|
|
|
|
|
this method always coerces them to text. It does that to numbers because if you |
69
|
|
|
|
|
|
|
allow numeric keys, numbers that can't be represented in an C, such as |
70
|
|
|
|
|
|
|
1e100 or 3.14 will be subject to floating point imprecision, and so it is |
71
|
|
|
|
|
|
|
unlikely that you will ever be able to retrieve them as no exact match is |
72
|
|
|
|
|
|
|
possible. And it does it to Booleans because when you un-serialise them on an |
73
|
|
|
|
|
|
|
older perl they may be confused with strings, leading to loss of data if those |
74
|
|
|
|
|
|
|
strings are also present as keys in the dictionary. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head2 read |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Takes a single compulsory argument, which is a filename or an already open file |
79
|
|
|
|
|
|
|
handle, and some options. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
If the first argument is a filehandle, the current file pointer should be at |
82
|
|
|
|
|
|
|
the start of the database (not necessarily at the start of the file; the |
83
|
|
|
|
|
|
|
database could be in a C<__DATA__> segment) and B have been opened in |
84
|
|
|
|
|
|
|
"just the bytes ma'am" mode. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
It is a fatal error to pass in a filehandle which was not opened correctly or |
87
|
|
|
|
|
|
|
the name of a file that can't be opened or which doesn't contain a valid |
88
|
|
|
|
|
|
|
database. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
The options are name/value pairs. Valid options are: |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=over |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item tie |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
If true return tied objects instead of normal objects. This means that you will |
97
|
|
|
|
|
|
|
be able to access data by de-referencing and pretending to access elements |
98
|
|
|
|
|
|
|
directly. Under the bonnet this wraps around the objects as documented below, |
99
|
|
|
|
|
|
|
so is just a layer of indirection. On modern hardware you probably won't notice |
100
|
|
|
|
|
|
|
the concomittant slow down but may appreciate the convenience. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item fast_collections |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
If true Dictionary keys and values will be permanently cached in memory the |
105
|
|
|
|
|
|
|
first time they are seen, instead of being fetched from the file when needed. |
106
|
|
|
|
|
|
|
Yes, this means that objects will grow in memory, potentially very large. |
107
|
|
|
|
|
|
|
Only use this if if it an acceptable pay-off for much faster access. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
This is not yet implemented for Arrays. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=back |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Returns the "root node" of the database. If that root node is a number, some |
114
|
|
|
|
|
|
|
piece of text, True, False, or Null, then it is decoded and the value returned. Otherwise an |
115
|
|
|
|
|
|
|
object (possibly a tied object) representing an Array or a Dictionary is returned. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 OBJECTS |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
If you asked for normal objects to be returned instead of tied objects, then |
120
|
|
|
|
|
|
|
these are sub-classes of either C or |
121
|
|
|
|
|
|
|
C. Both implement the following three methods: |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head2 id |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Returns a unique id for this object within the database. Note that circular data |
126
|
|
|
|
|
|
|
structures are supported, and looking at the C is the only way to detect them. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
This is not accessible when using tied objects. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head2 count |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Returns the number of elements in the structure. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head2 indices |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Returns a list of all the available indices in the structure. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 element |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Takes a single argument, which must match one of the values that would be returned |
141
|
|
|
|
|
|
|
by C, and returns the associated data. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
If the data is a number, Null, or text, the value will be returned directly. If the |
144
|
|
|
|
|
|
|
data is in turn another array or dictionary, an object will be returned. |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 exists |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Takes a single argument and tell you whether an index exists for it. It will still |
149
|
|
|
|
|
|
|
die if you ask it fomr something stupid such as a floating point array index or |
150
|
|
|
|
|
|
|
a Null dictionary entry. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 UNSUPPORTED PERL TYPES |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Globs, Regexes, References (except to Arrays and Dictionaries). |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Booleans are only supported on perl version 5.35.7 or later. On earlier perls, a |
157
|
|
|
|
|
|
|
Boolean in the database will be decoded as a true or false I, but its type |
158
|
|
|
|
|
|
|
will be numeric or string. And a older perls will never write a True or False node |
159
|
|
|
|
|
|
|
to the database, they'll always write numbers or strings with true/false values, |
160
|
|
|
|
|
|
|
which other implementations will decode as numbers or strings. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head1 BUGS/FEEDBACK |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Please report bugs by at L, including, if possible, a test case. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head1 SEE ALSO |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
L if you need updateable databases. |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
L |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=head1 AUTHOR, COPYRIGHT and LICENCE |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Copyright 2020 David Cantrell EFE |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
This software is free-as-in-speech software, and may be used, |
179
|
|
|
|
|
|
|
distributed, and modified under the terms of either the GNU |
180
|
|
|
|
|
|
|
General Public Licence version 2 or the Artistic Licence. It's |
181
|
|
|
|
|
|
|
up to you which one you use. The full text of the licences can |
182
|
|
|
|
|
|
|
be found in the files GPL2.txt and ARTISTIC.txt, respectively. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head1 CONSPIRACY |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
This module is also free-as-in-mason software. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=cut |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub create { |
191
|
55
|
|
|
55
|
1
|
62451
|
my($class, $file, $data) = @_; |
192
|
|
|
|
|
|
|
|
193
|
55
|
|
|
|
|
122
|
my $version = 0; |
194
|
|
|
|
|
|
|
|
195
|
55
|
|
|
|
|
176
|
PTR_SIZE: foreach my $ptr_size (1 .. 8) { |
196
|
59
|
|
|
|
|
254
|
my $byte5 = chr(($version << 3) + $ptr_size - 1); |
197
|
59
|
50
|
|
|
|
109369
|
open(my $fh, '>:unix', $file) || die("Can't write $file: $! \n"); |
198
|
59
|
|
|
|
|
2201
|
print $fh "CROD$byte5"; |
199
|
59
|
|
|
|
|
222
|
eval { |
200
|
59
|
|
|
|
|
1275
|
"Data::CompactReadonly::V${version}::Node"->_create( |
201
|
|
|
|
|
|
|
filename => $file, |
202
|
|
|
|
|
|
|
fh => $fh, |
203
|
|
|
|
|
|
|
ptr_size => $ptr_size, |
204
|
|
|
|
|
|
|
data => $data, |
205
|
|
|
|
|
|
|
globals => { next_free_ptr => tell($fh), already_seen => {} } |
206
|
|
|
|
|
|
|
); |
207
|
|
|
|
|
|
|
}; |
208
|
59
|
100
|
66
|
|
|
18338
|
if($@ && index($@, "Data::CompactReadonly::V${version}::Node"->_ptr_blown()) != -1) { |
|
|
50
|
|
|
|
|
|
209
|
4
|
|
|
|
|
25330
|
next PTR_SIZE; |
210
|
0
|
|
|
|
|
0
|
} elsif($@) { die($@); } |
211
|
55
|
|
|
|
|
7929
|
last PTR_SIZE; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub read { |
216
|
116
|
|
|
116
|
1
|
57950
|
my($class, $file, %args) = @_; |
217
|
116
|
|
|
|
|
219
|
my $fh; |
218
|
116
|
100
|
|
|
|
317
|
if(ref($file)) { |
219
|
58
|
|
|
|
|
101
|
$fh = $file; |
220
|
58
|
|
|
|
|
275
|
my @layers = PerlIO::get_layers($fh); |
221
|
58
|
100
|
|
|
|
156
|
if(grep { $_ !~ /^(unix|perlio|scalar)$/ } @layers) { |
|
63
|
|
|
|
|
510
|
|
222
|
2
|
|
|
|
|
35
|
die( |
223
|
|
|
|
|
|
|
"$class: file handle has invalid encoding [". |
224
|
|
|
|
|
|
|
join(', ', @layers). |
225
|
|
|
|
|
|
|
"]\n" |
226
|
|
|
|
|
|
|
); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} else { |
229
|
58
|
100
|
|
|
|
2782
|
open($fh, '<', $file) || die("$class couldn't open file $file: $!\n"); |
230
|
57
|
|
|
|
|
331
|
binmode($fh); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
113
|
|
|
|
|
296
|
my $original_file_pointer = tell($fh); |
234
|
|
|
|
|
|
|
|
235
|
113
|
|
|
|
|
2319
|
read($fh, my $header, 5); |
236
|
113
|
|
|
|
|
713
|
(my $byte5) = ($header =~ /^CROD(.)/); |
237
|
113
|
100
|
|
|
|
364
|
die("$class: $file header invalid: doesn't match /CROD./\n") unless(defined($byte5)); |
238
|
|
|
|
|
|
|
|
239
|
112
|
|
|
|
|
287
|
my $version = (ord($byte5) & 0b11111000) >> 3; |
240
|
112
|
|
|
|
|
197
|
my $ptr_size = (ord($byte5) & 0b00000111) + 1; |
241
|
112
|
100
|
|
|
|
317
|
die("$class: $file header invalid: bad version\n") if($version == 0b11111); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
return "Data::CompactReadonly::V${version}::Node"->_init( |
244
|
|
|
|
|
|
|
ptr_size => $ptr_size, |
245
|
|
|
|
|
|
|
fh => $fh, |
246
|
|
|
|
|
|
|
db_base => $original_file_pointer, |
247
|
|
|
|
|
|
|
map { |
248
|
111
|
100
|
|
|
|
374
|
exists($args{$_}) ? ($_ => 1 ) : () |
|
222
|
|
|
|
|
1175
|
|
249
|
|
|
|
|
|
|
} qw(fast_collections tie) |
250
|
|
|
|
|
|
|
); |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
1; |