line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::CompactReadonly; |
2
|
|
|
|
|
|
|
|
3
|
9
|
|
|
9
|
|
961427
|
use warnings; |
|
9
|
|
|
|
|
102
|
|
|
9
|
|
|
|
|
323
|
|
4
|
9
|
|
|
9
|
|
50
|
use strict; |
|
9
|
|
|
|
|
20
|
|
|
9
|
|
|
|
|
188
|
|
5
|
|
|
|
|
|
|
|
6
|
9
|
|
|
9
|
|
4564
|
use Data::CompactReadonly::V0::Node; |
|
9
|
|
|
|
|
31
|
|
|
9
|
|
|
|
|
5289
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# Yuck, semver. I give in, the stupid cult that doesn't understand |
9
|
|
|
|
|
|
|
# what the *number* bit of *version number* means has won. |
10
|
|
|
|
|
|
|
our $VERSION = '0.0.5'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Data::CompactReadonly |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 DESCRIPTION |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
A Compact Read Only Database that consumes very little memory. Once created a |
19
|
|
|
|
|
|
|
database can not be practically updated except by re-writing the whole thing. |
20
|
|
|
|
|
|
|
The aim is for random-access read performance to be on a par with L |
21
|
|
|
|
|
|
|
and for files to be much smaller. |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 VERSION 'NUMBERS' |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This module uses semantic versioning. That means that the version 'number' isn't |
26
|
|
|
|
|
|
|
really a number but has three parts: C. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
The C number will increase when the API changes incompatibly; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
The C number will increase when backward-compatible additions are made to the API; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
The C number will increase when bugs are fixed backward-compatibly. |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 FILE FORMAT VERSIONS |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
All versions so far support file format version 0 only. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
See L for details of what that means. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 METHODS |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head2 create |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Takes two arguments, the name of file into which to write a database, and some |
45
|
|
|
|
|
|
|
data. The data can be undef, a number, some text, or a reference to an array |
46
|
|
|
|
|
|
|
or hash that in turn consists of undefs, numbers, text, references to arrays or |
47
|
|
|
|
|
|
|
hashes, and so on ad infinitum. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
This method may be very slow. It constructs a file by making lots |
50
|
|
|
|
|
|
|
of little writes and seek()ing all over the place. It doesn't do anything |
51
|
|
|
|
|
|
|
clever to figure out what pointer size to use, it just tries the shortest |
52
|
|
|
|
|
|
|
first, and then if that's not enough tries again, and again, bigger each time. |
53
|
|
|
|
|
|
|
See L for more on pointer sizes. It may also eat B of |
54
|
|
|
|
|
|
|
memory. It keeps a cache of everything it has seen while building your |
55
|
|
|
|
|
|
|
database, so that it can re-use data by just pointing at it instead of writing |
56
|
|
|
|
|
|
|
multiple copies of the same data into the file. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Note that it will carefully preserve things that look like numbers but have |
59
|
|
|
|
|
|
|
extraneous leading or trailing zeroes. "007", for instance, is text, not a number, |
60
|
|
|
|
|
|
|
the leading zeroes are important. And while 7.10 is a number, the extra zero has |
61
|
|
|
|
|
|
|
meaning - it tells you that the value is accurate to three significant figures. If |
62
|
|
|
|
|
|
|
it were stored as a number, it would be retrieved as merely 7.1, accurate to only |
63
|
|
|
|
|
|
|
two significant figures. We are happy to spend a little extra storage in the |
64
|
|
|
|
|
|
|
interested of correctly storing your data. If you then go on to just treat 7.10 |
65
|
|
|
|
|
|
|
as a number in perl, and so as equivalent to 7.1 that is of course up to you. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Finally, while the file format permits numeric keys in hashes, this method |
68
|
|
|
|
|
|
|
always coerces them to text. This is because if you allow numeric keys, |
69
|
|
|
|
|
|
|
numbers that can't be represented in an C, such as 1e100 or 3.14 will |
70
|
|
|
|
|
|
|
be subject to floating point imprecision, and so it is unlikely that you |
71
|
|
|
|
|
|
|
will ever be able to retrieve them as no exact match is possible. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head2 read |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Takes a single compulsory argument, which is a filename or an already open file |
76
|
|
|
|
|
|
|
handle, and some options. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
If the first argument is a filehandle, the current file pointer should be at |
79
|
|
|
|
|
|
|
the start of the database (not necessarily at the start of the file; the |
80
|
|
|
|
|
|
|
database could be in a C<__DATA__> segment) and B have been opened in |
81
|
|
|
|
|
|
|
"just the bytes ma'am" mode. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
It is a fatal error to pass in a filehandle which was not opened correctly or |
84
|
|
|
|
|
|
|
the name of a file that can't be opened or which doesn't contain a valid |
85
|
|
|
|
|
|
|
database. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
The options are name/value pairs. Valid options are: |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=over |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=item tie |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
If true return tied objects instead of normal objects. This means that you will |
94
|
|
|
|
|
|
|
be able to access data by de-referencing and pretending to access elements |
95
|
|
|
|
|
|
|
directly. Under the bonnet this wraps around the objects as documented below, |
96
|
|
|
|
|
|
|
so is just a layer of indirection. On modern hardware you probably won't notice |
97
|
|
|
|
|
|
|
the concomittant slow down but may appreciate the convenience. |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=item fast_collections |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
If true Dictionary keys and values will be permanently cached in memory the |
102
|
|
|
|
|
|
|
first time they are seen, instead of being fetched from the file when needed. |
103
|
|
|
|
|
|
|
Yes, this means that objects will grow in memory, potentially very large. |
104
|
|
|
|
|
|
|
Only use this if if it an acceptable pay-off for much faster access. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
This is not yet implemented for Arrays. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=back |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Returns the "root node" of the database. If that root node is a number, some |
111
|
|
|
|
|
|
|
piece of text, or Null, then it is decoded and the value returned. Otherwise an |
112
|
|
|
|
|
|
|
object (possibly a tied object) representing an Array or a Dictionary is returned. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 OBJECTS |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
If you asked for normal objects to be returned instead of tied objects, then |
117
|
|
|
|
|
|
|
these are sub-classes of either C or |
118
|
|
|
|
|
|
|
C. Both implement the following three methods: |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head2 id |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Returns a unique id for this object within the database. Note that circular data |
123
|
|
|
|
|
|
|
structures are supported, and looking at the C is the only way to detect them. |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
This is not accessible when using tied objects. |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head2 count |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Returns the number of elements in the structure. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head2 indices |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Returns a list of all the available indices in the structure. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 element |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Takes a single argument, which must match one of the values that would be returned |
138
|
|
|
|
|
|
|
by C, and returns the associated data. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
If the data is a number, Null, or text, the value will be returned directly. If the |
141
|
|
|
|
|
|
|
data is in turn another array or dictionary, an object will be returned. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head2 exists |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Takes a single argument and tell you whether an index exists for it. It will still |
146
|
|
|
|
|
|
|
die if you ask it fomr something stupid such as a floating point array index or |
147
|
|
|
|
|
|
|
a Null dictionary entry. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head1 UNSUPPORTED PERL TYPES |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Globs, Regexes, References (except to Arrays and Dictionaries) |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head1 BUGS/FEEDBACK |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Please report bugs by at L, including, if possible, a test case. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 SEE ALSO |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
L if you need updateable databases. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
L |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head1 AUTHOR, COPYRIGHT and LICENCE |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Copyright 2020 David Cantrell EFE |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
This software is free-as-in-speech software, and may be used, |
170
|
|
|
|
|
|
|
distributed, and modified under the terms of either the GNU |
171
|
|
|
|
|
|
|
General Public Licence version 2 or the Artistic Licence. It's |
172
|
|
|
|
|
|
|
up to you which one you use. The full text of the licences can |
173
|
|
|
|
|
|
|
be found in the files GPL2.txt and ARTISTIC.txt, respectively. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=head1 CONSPIRACY |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
This module is also free-as-in-mason software. |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=cut |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub create { |
182
|
46
|
|
|
46
|
1
|
50683
|
my($class, $file, $data) = @_; |
183
|
|
|
|
|
|
|
|
184
|
46
|
|
|
|
|
101
|
my $version = 0; |
185
|
|
|
|
|
|
|
|
186
|
46
|
|
|
|
|
141
|
PTR_SIZE: foreach my $ptr_size (1 .. 8) { |
187
|
50
|
|
|
|
|
208
|
my $byte5 = chr(($version << 3) + $ptr_size - 1); |
188
|
50
|
50
|
|
|
|
122242
|
open(my $fh, '>:unix', $file) || die("Can't write $file: $! \n"); |
189
|
50
|
|
|
|
|
1400
|
print $fh "CROD$byte5"; |
190
|
50
|
|
|
|
|
188
|
eval { |
191
|
50
|
|
|
|
|
987
|
"Data::CompactReadonly::V${version}::Node"->_create( |
192
|
|
|
|
|
|
|
filename => $file, |
193
|
|
|
|
|
|
|
fh => $fh, |
194
|
|
|
|
|
|
|
ptr_size => $ptr_size, |
195
|
|
|
|
|
|
|
data => $data, |
196
|
|
|
|
|
|
|
globals => { next_free_ptr => tell($fh), already_seen => {} } |
197
|
|
|
|
|
|
|
); |
198
|
|
|
|
|
|
|
}; |
199
|
50
|
100
|
66
|
|
|
18656
|
if($@ && index($@, "Data::CompactReadonly::V${version}::Node"->_ptr_blown()) != -1) { |
|
|
50
|
|
|
|
|
|
200
|
4
|
|
|
|
|
523
|
next PTR_SIZE; |
201
|
0
|
|
|
|
|
0
|
} elsif($@) { die($@); } |
202
|
46
|
|
|
|
|
5824
|
last PTR_SIZE; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub read { |
207
|
107
|
|
|
107
|
1
|
63079
|
my($class, $file, %args) = @_; |
208
|
107
|
|
|
|
|
205
|
my $fh; |
209
|
107
|
100
|
|
|
|
321
|
if(ref($file)) { |
210
|
58
|
|
|
|
|
103
|
$fh = $file; |
211
|
58
|
|
|
|
|
315
|
my @layers = PerlIO::get_layers($fh); |
212
|
58
|
100
|
|
|
|
149
|
if(grep { $_ !~ /^(unix|perlio|scalar)$/ } @layers) { |
|
63
|
|
|
|
|
559
|
|
213
|
2
|
|
|
|
|
24
|
die( |
214
|
|
|
|
|
|
|
"$class: file handle has invalid encoding [". |
215
|
|
|
|
|
|
|
join(', ', @layers). |
216
|
|
|
|
|
|
|
"]\n" |
217
|
|
|
|
|
|
|
); |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} else { |
220
|
49
|
100
|
|
|
|
2195
|
open($fh, '<', $file) || die("$class couldn't open file $file: $!\n"); |
221
|
48
|
|
|
|
|
242
|
binmode($fh); |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
104
|
|
|
|
|
278
|
my $original_file_pointer = tell($fh); |
225
|
|
|
|
|
|
|
|
226
|
104
|
|
|
|
|
1803
|
read($fh, my $header, 5); |
227
|
104
|
|
|
|
|
610
|
(my $byte5) = ($header =~ /^CROD(.)/); |
228
|
104
|
100
|
|
|
|
355
|
die("$class: $file header invalid: doesn't match /CROD./\n") unless(defined($byte5)); |
229
|
|
|
|
|
|
|
|
230
|
103
|
|
|
|
|
279
|
my $version = (ord($byte5) & 0b11111000) >> 3; |
231
|
103
|
|
|
|
|
192
|
my $ptr_size = (ord($byte5) & 0b00000111) + 1; |
232
|
103
|
100
|
|
|
|
263
|
die("$class: $file header invalid: bad version\n") if($version == 0b11111); |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
return "Data::CompactReadonly::V${version}::Node"->_init( |
235
|
|
|
|
|
|
|
ptr_size => $ptr_size, |
236
|
|
|
|
|
|
|
fh => $fh, |
237
|
|
|
|
|
|
|
db_base => $original_file_pointer, |
238
|
|
|
|
|
|
|
map { |
239
|
102
|
100
|
|
|
|
364
|
exists($args{$_}) ? ($_ => 1 ) : () |
|
204
|
|
|
|
|
1006
|
|
240
|
|
|
|
|
|
|
} qw(fast_collections tie) |
241
|
|
|
|
|
|
|
); |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
1; |