File Coverage

c/tree.c
Criterion Covered Total %
statement 776 869 89.3
branch 431 600 71.8
condition n/a
subroutine n/a
pod n/a
total 1207 1469 82.1


line stmt bran cond sub pod time code
1             #include "tree.h"
2              
3             #ifndef WIN32
4             #include <sys/mman.h>
5             #else
6             #include "windows_mman.h"
7             #endif
8              
9             #include <sys/socket.h>
10             #include <sys/stat.h>
11             #include <sys/types.h>
12              
13             #include <errno.h>
14             #include <fcntl.h>
15             #include <inttypes.h>
16             #include <netdb.h>
17             #include <stdbool.h>
18             #include <stdio.h>
19              
20             #ifdef __GNUC__
21             #define UNUSED(x) UNUSED_##x __attribute__((__unused__))
22             #else
23             #define UNUSED(x) UNUSED_##x
24             #endif
25              
26             /* This is also defined in MaxMind::DB::Common but we don't want to have to
27             * fetch it every time we need it. */
28             #define DATA_SECTION_SEPARATOR_SIZE (16)
29              
30             #define SHA1_KEY_LENGTH (27)
31              
32             #define MERGE_KEY_SIZE (57)
33              
34             typedef struct freeze_args_s {
35             FILE *file;
36             char *filename;
37             HV *data_hash;
38             } freeze_args_s;
39              
40             typedef struct thawed_network_s {
41             MMDBW_network_s *network;
42             MMDBW_record_s *record;
43             } thawed_network_s;
44              
45             typedef struct encode_args_s {
46             PerlIO *output_io;
47             SV *root_data_type;
48             SV *serializer;
49             HV *data_pointer_cache;
50             } encode_args_s;
51              
52             struct network {
53             const char *const ipstr;
54             const uint8_t prefix_length;
55             };
56              
57             static void verify_ip(MMDBW_tree_s *tree, const char *ipstr);
58             static int128_t ip_string_to_integer(const char *ipstr, int family);
59             static int128_t ip_bytes_to_integer(uint8_t *bytes, int family);
60             static void
61             integer_to_ip_bytes(int tree_ip_version, uint128_t ip, uint8_t *bytes);
62             static void integer_to_ip_string(int tree_ip_version,
63             uint128_t ip,
64             char *dst,
65             int dst_length);
66             static int prefix_length_for_largest_subnet(uint128_t start_ip,
67             uint128_t end_ip,
68             int family,
69             uint128_t *reverse_mask);
70             static const char *
71             store_data_in_tree(MMDBW_tree_s *tree, const char *const key, SV *data_sv);
72             static const char *increment_data_reference_count(MMDBW_tree_s *tree,
73             const char *const key);
74             static void
75             set_stored_data_in_tree(MMDBW_tree_s *tree, const char *const key, SV *data_sv);
76             static void decrement_data_reference_count(MMDBW_tree_s *tree,
77             const char *const key);
78             static MMDBW_network_s resolve_network(MMDBW_tree_s *tree,
79             const char *const ipstr,
80             uint8_t prefix_length);
81             static MMDBW_status
82             resolve_ip(int tree_ip_version, const char *const ipstr, uint8_t *bytes);
83             static void free_network(MMDBW_network_s *network);
84             static void alias_ipv4_networks(MMDBW_tree_s *tree);
85             static MMDBW_status insert_reserved_networks_as_fixed_empty(MMDBW_tree_s *tree);
86             static MMDBW_status
87             insert_networks_as_fixed_empty(MMDBW_tree_s *tree,
88             struct network const *const networks,
89             const size_t num_networks);
90             static MMDBW_status
91             insert_record_for_network(MMDBW_tree_s *tree,
92             MMDBW_network_s *network,
93             MMDBW_record_s *new_record,
94             MMDBW_merge_strategy merge_strategy,
95             bool is_internal_insert);
96             static MMDBW_status
97             insert_record_into_next_node(MMDBW_tree_s *tree,
98             MMDBW_record_s *current_record,
99             MMDBW_network_s *network,
100             int current_bit,
101             MMDBW_record_s *new_record,
102             MMDBW_merge_strategy merge_strategy,
103             bool is_internal_insert);
104             static MMDBW_status
105             insert_record_into_current_record(MMDBW_tree_s *tree,
106             MMDBW_record_s *current_record,
107             MMDBW_network_s *network,
108             MMDBW_record_s *new_record,
109             MMDBW_merge_strategy merge_strategy);
110             static const char *maybe_merge_records(MMDBW_tree_s *tree,
111             MMDBW_network_s *network,
112             MMDBW_record_s *new_record,
113             MMDBW_record_s *record_to_set,
114             MMDBW_merge_strategy merge_strategy);
115             static int network_bit_value(MMDBW_network_s *network, uint8_t current_bit);
116             static int tree_depth0(MMDBW_tree_s *tree);
117             static SV *merge_hashes(MMDBW_tree_s *tree,
118             SV *from,
119             SV *into,
120             MMDBW_merge_strategy merge_strategy);
121             static void merge_new_from_hash_into_hash(MMDBW_tree_s *tree,
122             HV *from,
123             HV *to,
124             MMDBW_merge_strategy merge_strategy);
125             static SV *merge_values(MMDBW_tree_s *tree,
126             SV *from,
127             SV *into,
128             MMDBW_merge_strategy merge_strategy);
129             static SV *merge_arrays(MMDBW_tree_s *tree,
130             SV *from,
131             SV *into,
132             MMDBW_merge_strategy merge_strategy);
133             static MMDBW_status find_record_for_network(MMDBW_tree_s *tree,
134             MMDBW_network_s *network,
135             MMDBW_record_s **record);
136             static MMDBW_node_s *new_node_from_record(MMDBW_tree_s *tree,
137             MMDBW_record_s *record);
138             static MMDBW_status free_node_and_subnodes(MMDBW_tree_s *tree,
139             MMDBW_node_s *node,
140             bool remove_alias_and_fixed_nodes);
141             static MMDBW_status free_record_value(MMDBW_tree_s *tree,
142             MMDBW_record_s *record,
143             bool remove_alias_and_fixed_nodes);
144             static void assign_node_number(MMDBW_tree_s *tree,
145             MMDBW_node_s *node,
146             uint128_t UNUSED(network),
147             uint8_t UNUSED(depth),
148             void *UNUSED(args));
149             static void freeze_search_tree(MMDBW_tree_s *tree, freeze_args_s *args);
150             static void freeze_node(MMDBW_tree_s *tree,
151             MMDBW_node_s *node,
152             uint128_t network,
153             uint8_t depth,
154             void *void_args);
155             static void freeze_data_record(MMDBW_tree_s *UNUSED(tree),
156             uint128_t network,
157             uint8_t depth,
158             const char *key,
159             freeze_args_s *args);
160             static void freeze_to_file(freeze_args_s *args, void *data, size_t size);
161             static void freeze_data_to_file(freeze_args_s *args, MMDBW_tree_s *tree);
162             static SV *freeze_hash(HV *hash);
163             static uint8_t thaw_uint8(uint8_t **buffer);
164             static thawed_network_s *thaw_network(MMDBW_tree_s *tree, uint8_t **buffer);
165             static uint8_t *thaw_bytes(uint8_t **buffer, size_t size);
166             static uint128_t thaw_uint128(uint8_t **buffer);
167             static STRLEN thaw_strlen(uint8_t **buffer);
168             static const char *thaw_data_key(uint8_t **buffer);
169             static HV *thaw_data_hash(SV *data_to_decode);
170             static void encode_node(MMDBW_tree_s *tree,
171             MMDBW_node_s *node,
172             uint128_t UNUSED(network),
173             uint8_t UNUSED(depth),
174             void *void_args);
175             static void
176             check_record_sanity(MMDBW_node_s *node, MMDBW_record_s *record, char *side);
177             static uint32_t record_value_as_number(MMDBW_tree_s *tree,
178             MMDBW_record_s *record,
179             encode_args_s *args);
180             static void iterate_tree(MMDBW_tree_s *tree,
181             MMDBW_record_s *record,
182             uint128_t network,
183             const uint8_t depth,
184             bool depth_first,
185             void *args,
186             MMDBW_iterator_callback callback);
187             static SV *key_for_data(SV *data);
188             static const char *merge_cache_lookup(MMDBW_tree_s *tree,
189             char *merge_cache_key);
190             static void store_in_merge_cache(MMDBW_tree_s *tree,
191             char *merge_cache_key,
192             const char *const new_key);
193             static void *checked_malloc(size_t size);
194             static void
195             checked_fwrite(FILE *file, char *filename, void *buffer, size_t count);
196             static void check_perlio_result(SSize_t result, SSize_t expected, char *op);
197             static char *status_error_message(MMDBW_status status);
198             static const char *record_type_name(MMDBW_record_type type);
199              
200             // Create a new tree.
201             //
202             // For a description of `alias_ipv6' and `remove_reserved_networks', refer to
203             // the MaxMind::DB::Writer::Tree documentation about these options.
204 203           MMDBW_tree_s *new_tree(const uint8_t ip_version,
205             uint8_t record_size,
206             MMDBW_merge_strategy merge_strategy,
207             const bool alias_ipv6,
208             const bool remove_reserved_networks) {
209 203 50         if (merge_strategy == MMDBW_MERGE_STRATEGY_UNKNOWN) {
210 0           croak("Unknown merge_strategy encountered");
211             }
212 203 50         if (ip_version != 4 && ip_version != 6) {
213 0           croak("Unexpected IP version of %u", ip_version);
214             }
215 203 100         if (record_size != 24 && record_size != 28 && record_size != 32) {
    50          
216 0           croak("Only record sizes of 24, 28, and 32 are supported. Received %u.",
217             record_size);
218             }
219              
220 203           MMDBW_tree_s *tree = checked_malloc(sizeof(MMDBW_tree_s));
221 203           tree->ip_version = ip_version;
222              
223 203           tree->record_size = record_size;
224 203           tree->merge_strategy = merge_strategy;
225 203           tree->merge_cache = NULL;
226 203           tree->data_table = NULL;
227 203           tree->root_record = (MMDBW_record_s){
228             .type = MMDBW_RECORD_TYPE_EMPTY,
229             };
230 203           tree->node_count = 0;
231              
232 203 100         if (alias_ipv6) {
233 23           alias_ipv4_networks(tree);
234             }
235              
236 203 100         if (remove_reserved_networks) {
237 151           MMDBW_status const status =
238 151           insert_reserved_networks_as_fixed_empty(tree);
239 151 50         if (status != MMDBW_SUCCESS) {
240 0           free_tree(tree);
241 0           croak("Error inserting reserved networks: %s",
242             status_error_message(status));
243             }
244             }
245              
246 203           return tree;
247             }
248              
249 105268           void insert_network(MMDBW_tree_s *tree,
250             const char *ipstr,
251             const uint8_t prefix_length,
252             SV *key_sv,
253             SV *data,
254             MMDBW_merge_strategy merge_strategy) {
255 105268           verify_ip(tree, ipstr);
256              
257 105267           MMDBW_network_s network = resolve_network(tree, ipstr, prefix_length);
258              
259 105266           const char *const key =
260 105266           store_data_in_tree(tree, SvPVbyte_nolen(key_sv), data);
261 105266           MMDBW_record_s new_record = {.type = MMDBW_RECORD_TYPE_DATA,
262             .value = {.key = key}};
263              
264 105266           MMDBW_status status = insert_record_for_network(
265             tree, &network, &new_record, merge_strategy, false);
266              
267             // The data's ref count gets incremented by the insert each time it is
268             // inserted. As such, we need to decrement it here.
269 105262           decrement_data_reference_count(tree, key);
270 105262           free_network(&network);
271              
272 105262           if (MMDBW_SUCCESS != status) {
273 8           croak("%s (when inserting %s/%" PRIu8 ")",
274             status_error_message(status),
275             ipstr,
276             prefix_length);
277             }
278 105254           }
279              
280 106967           static void verify_ip(MMDBW_tree_s *tree, const char *ipstr) {
281 106967 100         if (tree->ip_version == 4 && strchr(ipstr, ':')) {
    100          
282 2           croak("You cannot insert an IPv6 address (%s) into an IPv4 tree.",
283             ipstr);
284             }
285 106965           }
286              
287 849           void insert_range(MMDBW_tree_s *tree,
288             const char *start_ipstr,
289             const char *end_ipstr,
290             SV *key_sv,
291             SV *data_sv,
292 849           MMDBW_merge_strategy merge_strategy) {
293 849           verify_ip(tree, start_ipstr);
294 848           verify_ip(tree, end_ipstr);
295              
296 848           uint128_t start_ip = ip_string_to_integer(start_ipstr, tree->ip_version);
297 847           uint128_t end_ip = ip_string_to_integer(end_ipstr, tree->ip_version);
298              
299 847 100         if (end_ip < start_ip) {
300 1           croak("First IP (%s) in range comes before last IP (%s)",
301             start_ipstr,
302             end_ipstr);
303             }
304              
305 846           const char *const key =
306 846           store_data_in_tree(tree, SvPVbyte_nolen(key_sv), data_sv);
307              
308 846 100         uint8_t bytes[tree->ip_version == 6 ? 16 : 4];
309              
310 846           MMDBW_status status = MMDBW_SUCCESS;
311              
312             // Eventually we could change the code to walk the tree and break up the
313             // range at the same time, saving some unnecessary computation. However,
314             // that would require more significant refactoring of the insertion and
315             // merging code.
316 1734 100         while (start_ip <= end_ip) {
317 890           uint128_t reverse_mask;
318 1780           int prefix_length = prefix_length_for_largest_subnet(
319 890           start_ip, end_ip, tree->ip_version, &reverse_mask);
320              
321 1780           integer_to_ip_bytes(tree->ip_version, start_ip, bytes);
322              
323 890           MMDBW_network_s network = {
324             .bytes = bytes,
325             .prefix_length = prefix_length,
326             };
327              
328 890           MMDBW_record_s new_record = {.type = MMDBW_RECORD_TYPE_DATA,
329             .value = {.key = key}};
330              
331 890           status = insert_record_for_network(
332             tree, &network, &new_record, merge_strategy, false);
333              
334 890 50         if (MMDBW_SUCCESS != status) {
335             break;
336             }
337              
338 890           start_ip = (start_ip | reverse_mask) + 1;
339              
340             // The +1 caused an overflow and we are done.
341 890 100         if (start_ip == 0) {
342             break;
343             }
344             }
345             // store_data_in_tree starts at a reference count of 1, so we need to
346             // decrement in order to account for that.
347 846           decrement_data_reference_count(tree, key);
348              
349 846 50         if (MMDBW_SUCCESS != status) {
350 0           croak("%s (when inserting %s - %s)",
351             status_error_message(status),
352             start_ipstr,
353             end_ipstr);
354             }
355 846           }
356              
357 1695           static int128_t ip_string_to_integer(const char *ipstr, int family) {
358 1695 100         uint8_t bytes[family == 6 ? 16 : 4];
359 1695 100         if (resolve_ip(family, ipstr, bytes) != MMDBW_SUCCESS) {
360 1           croak("Invalid IP address: %s", ipstr);
361             }
362 1694           return ip_bytes_to_integer(bytes, family);
363             }
364              
365             static int128_t ip_bytes_to_integer(uint8_t *bytes, int family) {
366             int length = family == 6 ? 16 : 4;
367              
368             int128_t ipint = 0;
369 10918 100         for (int i = 0; i < length; i++) {
370 9224           ipint = (ipint << 8) | bytes[i];
371             }
372 1694           return ipint;
373             }
374              
375             static void
376 890           integer_to_ip_bytes(int tree_ip_version, uint128_t ip, uint8_t *bytes) {
377 890 100         int bytes_length = tree_ip_version == 6 ? 16 : 4;
378 6046 0         for (int i = 1; i <= bytes_length; i++) {
    100          
379 5156           bytes[bytes_length - i] = 0xFF & ip;
380 5156           ip >>= 8;
381             }
382             }
383              
384 0           static void integer_to_ip_string(int tree_ip_version,
385             uint128_t ip,
386             char *dst,
387 0           int dst_length) {
388 0 0         uint8_t bytes[tree_ip_version == 6 ? 16 : 4];
389 0           integer_to_ip_bytes(tree_ip_version, ip, bytes);
390              
391 0 0         if (NULL == inet_ntop(tree_ip_version == 6 ? AF_INET6 : AF_INET,
    0          
392             bytes,
393             dst,
394             dst_length)) {
395 0           croak("Error converting IP integer to string");
396             }
397 0           }
398              
399 890           static int prefix_length_for_largest_subnet(uint128_t start_ip,
400             uint128_t end_ip,
401             int family,
402             uint128_t *reverse_mask) {
403              
404 890 50         if (start_ip > end_ip) {
405 0           croak("Start IP of the range must be less than or equal to end IP");
406             }
407              
408 890 100         int prefix_length = family == 6 ? 128 : 32;
409 890           *reverse_mask = 1;
410              
411 890           while (
412             // First IP of the subnet must be the start IP
413 15467           (start_ip & ~*reverse_mask) == start_ip
414             // the last IP of the subnet must be <= the end IP
415 15004 100         && (start_ip | *reverse_mask) <= end_ip
416             // stop if we have all IPs (shouldn't be required, but safety measure)
417 30046 100         && prefix_length > 0) {
    100          
418 14577           prefix_length--;
419 14577           *reverse_mask = (*reverse_mask << 1) | 1;
420             }
421              
422             // We overshoot by one shift
423 890           *reverse_mask >>= 1;
424              
425 890           return prefix_length;
426             }
427              
428 2           void remove_network(MMDBW_tree_s *tree,
429             const char *ipstr,
430             const uint8_t prefix_length) {
431 2           verify_ip(tree, ipstr);
432              
433 2           MMDBW_network_s network = resolve_network(tree, ipstr, prefix_length);
434              
435 2           MMDBW_record_s new_record = {.type = MMDBW_RECORD_TYPE_EMPTY};
436              
437 4           MMDBW_status status = insert_record_for_network(
438             tree, &network, &new_record, MMDBW_MERGE_STRATEGY_NONE, false);
439              
440 2           free_network(&network);
441 2           if (status != MMDBW_SUCCESS) {
442 0           croak("Unable to remove network: %s", status_error_message(status));
443             }
444 2           }
445              
446             static const char *
447 106210           store_data_in_tree(MMDBW_tree_s *tree, const char *const key, SV *data_sv) {
448 106210           const char *const new_key = increment_data_reference_count(tree, key);
449 106210           set_stored_data_in_tree(tree, key, data_sv);
450              
451 106210           return new_key;
452             }
453              
454 342281           static const char *increment_data_reference_count(MMDBW_tree_s *tree,
455             const char *const key) {
456 342281           MMDBW_data_hash_s *data = NULL;
457 1496345 100         HASH_FIND(hh, tree->data_table, key, SHA1_KEY_LENGTH, data);
    100          
    100          
    50          
    100          
    100          
    100          
458              
459             /* We allow this possibility as we need to create the record separately
460             from updating the data when thawing */
461 342079 100         if (NULL == data) {
462 3437           data = checked_malloc(sizeof(MMDBW_data_hash_s));
463 3437           data->reference_count = 0;
464              
465 3437           data->data_sv = NULL;
466              
467 3437           data->key = checked_malloc(SHA1_KEY_LENGTH + 1);
468 3437 100         strcpy((char *)data->key, key);
469              
470 12048 100         HASH_ADD_KEYPTR(hh, tree->data_table, data->key, SHA1_KEY_LENGTH, data);
    50          
    50          
    100          
    100          
    100          
    50          
    50          
    50          
    100          
    100          
    100          
    100          
    50          
    50          
471             }
472 342281           data->reference_count++;
473              
474 342281           return data->key;
475             }
476              
477 107874           static void set_stored_data_in_tree(MMDBW_tree_s *tree,
478             const char *const key,
479             SV *data_sv) {
480 107874           MMDBW_data_hash_s *data = NULL;
481 475949 50         HASH_FIND(hh, tree->data_table, key, SHA1_KEY_LENGTH, data);
    100          
    50          
    50          
    100          
    50          
    50          
482              
483 107874 50         if (NULL == data) {
484 0           croak("Attempt to set unknown data record in tree");
485             }
486              
487 107874 100         if (NULL != data->data_sv) {
488             return;
489             }
490              
491 3437           SvREFCNT_inc_simple_void_NN(data_sv);
492 3437           data->data_sv = data_sv;
493             }
494              
495 342281           static void decrement_data_reference_count(MMDBW_tree_s *tree,
496             const char *const key) {
497 342281           MMDBW_data_hash_s *data = NULL;
498 1496831 50         HASH_FIND(hh, tree->data_table, key, SHA1_KEY_LENGTH, data);
    100          
    50          
    50          
    100          
    50          
    50          
499              
500 342281 50         if (NULL == data) {
501 0           croak("Attempt to remove data that does not exist from tree");
502             }
503              
504 342281           data->reference_count--;
505 342281 100         if (0 == data->reference_count) {
506 3437 100         HASH_DEL(tree->data_table, data);
    100          
    100          
    100          
    100          
    100          
    100          
    100          
507 3437           SvREFCNT_dec(data->data_sv);
508 3437           free((char *)data->key);
509 3437           free(data);
510             }
511 342281           }
512              
513 123419           static MMDBW_network_s resolve_network(MMDBW_tree_s *tree,
514             const char *const ipstr,
515             uint8_t prefix_length) {
516 138304 100         uint8_t *bytes = checked_malloc(tree->ip_version == 6 ? 16 : 4);
517              
518 123419 100         if (resolve_ip(tree->ip_version, ipstr, bytes) != MMDBW_SUCCESS) {
519 1           free(bytes);
520 1           croak("Invalid IP address: %s", ipstr);
521             }
522              
523 123418 100         if (NULL == strchr(ipstr, ':')) {
524             // IPv4
525 16214 50         if (prefix_length > 32) {
526 0           free(bytes);
527 0           croak("Prefix length greater than 32 on an IPv4 network (%s/%d)",
528             ipstr,
529             prefix_length);
530             }
531 16214 100         if (tree->ip_version == 6) {
532             // Inserting IPv4 network into an IPv6 tree
533 1329           prefix_length += 96;
534             }
535 107204 50         } else if (prefix_length > 128) {
536 0           free(bytes);
537 0           croak("Prefix length greater than 128 on an IPv6 network (%s/%d)",
538             ipstr,
539             prefix_length);
540             }
541              
542 123418           MMDBW_network_s network = {
543             .bytes = bytes,
544             .prefix_length = prefix_length,
545             };
546              
547 123418           return network;
548             }
549              
550             static MMDBW_status
551 125114           resolve_ip(int tree_ip_version, const char *const ipstr, uint8_t *bytes) {
552 125114           bool is_ipv4_address = NULL == strchr(ipstr, ':');
553 125114 100         int family = is_ipv4_address ? AF_INET : AF_INET6;
554 125114 100         if (tree_ip_version == 6 && is_ipv4_address) {
555             // We are inserting/looking up an IPv4 address in an IPv6 tree.
556             // The canonical location for this in our database is ::a.b.c.d.
557             // To get this address, we zero out the first 12 bytes of bytes
558             // and then put the IPv4 address in the remaining 4. The reason to
559             // not use getaddrinfo with AI_V4MAPPED is that it gives us
560             // ::FFFF:a.b.c.d and AI_V4MAPPED doesn't work on all platforms.
561             // See GitHub #7 and #51.
562 1339           memset(bytes, 0, 12);
563 1339           bytes += 12;
564             }
565 125114 100         if (!inet_pton(family, ipstr, bytes)) {
566 2           return MMDBW_RESOLVING_IP_ERROR;
567             }
568             return MMDBW_SUCCESS;
569             }
570              
571 229231           static void free_network(MMDBW_network_s *network) {
572 105264           free((char *)network->bytes);
573             }
574              
575             static struct network ipv4_aliases[] = {
576             {
577             .ipstr = "::ffff:0:0",
578             .prefix_length = 96,
579             },
580             {.ipstr = "2001::", .prefix_length = 32},
581             {.ipstr = "2002::", .prefix_length = 16}};
582              
583 23           static void alias_ipv4_networks(MMDBW_tree_s *tree) {
584 23 100         if (tree->ip_version == 4) {
585 4           return;
586             }
587              
588 19           MMDBW_network_s ipv4_root_network = resolve_network(tree, "::0.0.0.0", 96);
589 19           MMDBW_node_s *ipv4_root_node = new_node();
590 19           MMDBW_record_s ipv4_root_record = {
591             .type = MMDBW_RECORD_TYPE_FIXED_NODE,
592             .value.node = ipv4_root_node,
593             };
594              
595 38           MMDBW_status status = insert_record_for_network(tree,
596             &ipv4_root_network,
597             &ipv4_root_record,
598             MMDBW_MERGE_STRATEGY_NONE,
599             true);
600              
601 19           free_network(&ipv4_root_network);
602              
603 19 50         if (status != MMDBW_SUCCESS) {
604 0           croak("Unable to create IPv4 root node when setting up aliases: %s",
605             status_error_message(status));
606             }
607              
608 76 100         for (size_t i = 0; i < sizeof(ipv4_aliases) / sizeof(struct network); i++) {
609 57           MMDBW_network_s alias_network = resolve_network(
610 57           tree, ipv4_aliases[i].ipstr, ipv4_aliases[i].prefix_length);
611              
612 57           MMDBW_record_s record_for_alias = {
613             .type = MMDBW_RECORD_TYPE_ALIAS,
614             .value.node = ipv4_root_node,
615             };
616              
617 57           MMDBW_status status =
618 57           insert_record_for_network(tree,
619             &alias_network,
620             &record_for_alias,
621             MMDBW_MERGE_STRATEGY_NONE,
622             true);
623              
624 57           free_network(&alias_network);
625              
626 57 50         if (MMDBW_SUCCESS != status) {
627 0           croak("Unexpected error when searching for last node for alias: %s",
628             status_error_message(status));
629             }
630             }
631             }
632              
633             // https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
634             static struct network reserved_networks_ipv4[] = {
635             {.ipstr = "0.0.0.0", .prefix_length = 8},
636             {.ipstr = "10.0.0.0", .prefix_length = 8},
637             {.ipstr = "100.64.0.0", .prefix_length = 10},
638             {.ipstr = "127.0.0.0", .prefix_length = 8},
639             {.ipstr = "169.254.0.0", .prefix_length = 16},
640             {.ipstr = "172.16.0.0", .prefix_length = 12},
641             // This is an odd case. 192.0.0.0/24 is reserved, but there is a note that
642             // says "Not useable unless by virtue of a more specific reservation". As
643             // such, since 192.0.0.0/29 was more recently reserved, it's possible the
644             // intention is that the rest is not reserved any longer. I'm not too clear
645             // on this, but I believe that is the rationale, so I choose to leave it.
646             {.ipstr = "192.0.0.0", .prefix_length = 29},
647             // TODO(wstorey@maxmind.com): 192.168.0.8/32
648             // TODO(wstorey@maxmind.com): 192.168.0.9/32
649             // TODO(wstorey@maxmind.com): 192.168.0.10/32
650             // TODO(wstorey@maxmind.com): 192.168.0.170/32
651             // TODO(wstorey@maxmind.com): 192.168.0.171/32
652             {.ipstr = "192.0.2.0", .prefix_length = 24},
653             // 192.31.196.0/24 is routable I believe
654             // TODO(wstorey@maxmnd.com): 192.52.193.0/24
655             // TODO(wstorey@maxmind.com): Looks like 192.88.99.0/24 may no longer be
656             // reserved?
657             {.ipstr = "192.88.99.0", .prefix_length = 24},
658             {.ipstr = "192.168.0.0", .prefix_length = 16},
659             // 192.175.48.0/24 is routable I believe
660             {.ipstr = "198.18.0.0", .prefix_length = 15},
661             {.ipstr = "198.51.100.0", .prefix_length = 24},
662             {.ipstr = "203.0.113.0", .prefix_length = 24},
663             // The above IANA page doesn't list 224.0.0.0/4, but at least some parts
664             // are listed in https://tools.ietf.org/html/rfc5771
665             {.ipstr = "224.0.0.0", .prefix_length = 4},
666             {.ipstr = "240.0.0.0", .prefix_length = 4},
667             // 255.255.255.255/32 gets brought in by 240.0.0.0/4.
668             };
669              
670             // https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
671             static struct network reserved_networks_ipv6[] = {
672             // ::/128 and ::1/128 are reserved under IPv6 but these are already
673             // covered under 0.0.0.0/8.
674             //
675             // ::ffff:0:0/96 - IPv4 mapped addresses. We treat it specially with the
676             // `alias_ipv6_to_ipv4' option.
677             //
678             // 64:ff9b::/96 - well known prefix mapping, covered by alias_ipv6_to_ipv4
679             //
680             // TODO(wstorey@maxmind.com): 64:ff9b:1::/48 should be in
681             // alias_ipv6_to_ipv4?
682              
683             {.ipstr = "100::", .prefix_length = 64},
684              
685             // 2001::/23 is reserved. We include all of it here other than 2001::/32
686             // as it is Teredo which is globally routable.
687             {.ipstr = "2001:1::", .prefix_length = 32},
688             {.ipstr = "2001:2::", .prefix_length = 31},
689             {.ipstr = "2001:4::", .prefix_length = 30},
690             {.ipstr = "2001:8::", .prefix_length = 29},
691             {.ipstr = "2001:10::", .prefix_length = 28},
692             {.ipstr = "2001:20::", .prefix_length = 27},
693             {.ipstr = "2001:40::", .prefix_length = 26},
694             {.ipstr = "2001:80::", .prefix_length = 25},
695             {.ipstr = "2001:100::", .prefix_length = 24},
696              
697             {.ipstr = "2001:db8::", .prefix_length = 32},
698             // 2002::/16 - 6to4, part of alias_ipv6_to_ipv4
699             // 2620:4f:8000::/48 is routable I believe
700             {.ipstr = "fc00::", .prefix_length = 7},
701             {.ipstr = "fe80::", .prefix_length = 10},
702             // Multicast
703             {.ipstr = "ff00::", .prefix_length = 8},
704             };
705              
706             // Insert a FIXED_EMPTY record for all private and reserved networks.
707             //
708             // This is to avoid the case where we might otherwise accidentally add
709             // information about such networks.
710             static MMDBW_status
711 151           insert_reserved_networks_as_fixed_empty(MMDBW_tree_s *tree) {
712 151           MMDBW_status const status = insert_networks_as_fixed_empty(
713             tree,
714             reserved_networks_ipv4,
715             sizeof(reserved_networks_ipv4) / sizeof(struct network));
716 151 50         if (status != MMDBW_SUCCESS) {
717             return status;
718             }
719              
720 151 100         if (tree->ip_version == 6) {
721 80           MMDBW_status const status = insert_networks_as_fixed_empty(
722             tree,
723             reserved_networks_ipv6,
724             sizeof(reserved_networks_ipv6) / sizeof(struct network));
725 80 50         if (status != MMDBW_SUCCESS) {
726             return status;
727             }
728             }
729              
730             return MMDBW_SUCCESS;
731             }
732              
733             // Insert a FIXED_EMPTY record for each network.
734             static MMDBW_status
735 231           insert_networks_as_fixed_empty(MMDBW_tree_s *tree,
736             struct network const *const networks,
737             const size_t num_networks) {
738 3616 100         for (size_t i = 0; i < num_networks; i++) {
739 3385           MMDBW_network_s resolved_network =
740 3385           resolve_network(tree, networks[i].ipstr, networks[i].prefix_length);
741              
742 3385           MMDBW_record_s record = {
743             .type = MMDBW_RECORD_TYPE_FIXED_EMPTY,
744             };
745              
746 6770           MMDBW_status const status = insert_record_for_network(
747             tree, &resolved_network, &record, MMDBW_MERGE_STRATEGY_NONE, true);
748              
749 3385           free_network(&resolved_network);
750              
751 3385 50         if (status != MMDBW_SUCCESS) {
752 0           return status;
753             }
754             }
755              
756             return MMDBW_SUCCESS;
757             }
758              
759             static MMDBW_status
760 215436           insert_record_for_network(MMDBW_tree_s *tree,
761             MMDBW_network_s *network,
762             MMDBW_record_s *new_record,
763             MMDBW_merge_strategy merge_strategy,
764             bool is_internal_insert) {
765 106156 100         if (merge_strategy == MMDBW_MERGE_STRATEGY_UNKNOWN) {
766 106135           merge_strategy = tree->merge_strategy;
767             }
768              
769 109619           return insert_record_into_next_node(tree,
770             &(tree->root_record),
771             network,
772             0,
773             new_record,
774             merge_strategy,
775             is_internal_insert);
776             }
777              
778             static MMDBW_status
779 26585411           insert_record_into_next_node(MMDBW_tree_s *tree,
780             MMDBW_record_s *current_record,
781             MMDBW_network_s *network,
782             int current_bit,
783             MMDBW_record_s *new_record,
784             MMDBW_merge_strategy merge_strategy,
785             bool is_internal_insert) {
786             // We've reached the record where the network belongs. Depending on the
787             // type of record it is, we insert right here.
788 26585411 100         if (current_bit >= network->prefix_length &&
789 249460 100         (current_record->type == MMDBW_RECORD_TYPE_EMPTY ||
790 18433 100         current_record->type == MMDBW_RECORD_TYPE_DATA ||
791 229 100         (current_record->type == MMDBW_RECORD_TYPE_NODE &&
792             merge_strategy == MMDBW_MERGE_STRATEGY_NONE))) {
793 231038           return insert_record_into_current_record(
794             tree, current_record, network, new_record, merge_strategy);
795             }
796              
797 26354373 100         if (current_bit == network->prefix_length &&
798 93 100         current_record->type == MMDBW_RECORD_TYPE_FIXED_NODE &&
799 63 50         new_record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
800             // We could potentially make this work, but it's tricky. One of the
801             // purposes of fixed nodes is for alias nodes to point at them.
802             // Returning success here without doing anything is not what we want to
803             // do: We should be setting the node value to what is in the new record
804             // since it may be setting up alias nodes pointing at it. Changing the
805             // current node value is not an option either: Aliases may already be
806             // pointing to it.
807             //
808             // If we don't have this case, we'll continue to descend the tree,
809             // inserting the new record's fixed node in potentially multiple
810             // locations. If that happens, we end up freeing the node multiple
811             // times when destroying the tree.
812             //
813             // Consider the case where we previously inserted fixed nodes to a
814             // greater depth than we are right now. Because we'd continue to
815             // descend the tree, we'd insert this new fixed node record at
816             // potentially multiple spots in the tree, at each point we encounter
817             // an EMPTY, DATA, or NODE record (the conditions where we can enter
818             // insert_record_into_current_record()). The stopping condition would
819             // not be correct.
820             return MMDBW_FIXED_NODE_OVERWRITE_ATTEMPT_ERROR;
821             }
822              
823             // Figure out the next node.
824 26354373           MMDBW_node_s *next_node = NULL;
825 26354373           switch (current_record->type) {
826 258719           case MMDBW_RECORD_TYPE_EMPTY:
827             case MMDBW_RECORD_TYPE_DATA: {
828             // In this case we create a new node to point to. We make the new
829             // nodes left and right identical to us.
830 258719           next_node = new_node_from_record(tree, current_record);
831 258719           current_record->value.node = next_node;
832 258719           current_record->type = MMDBW_RECORD_TYPE_NODE;
833 258719           break;
834             }
835             case MMDBW_RECORD_TYPE_FIXED_EMPTY:
836             // We're a) trying to overwrite a fixed empty network, b) trying to
837             // insert a network containing a fixed empty network, or c) trying
838             // to insert inside a fixed empty network. (You can see these 3
839             // cases handled separately in the ALIAS case.) We ignore the insert
840             // with the assumption that it was not intended. e.g., you probably
841             // didn't mean to insert data about a reserved network. Doing so
842             // makes working with dirty data easier. Note this may change to be
843             // more strict and return an error in the future.
844             return MMDBW_SUCCESS;
845 23           break;
846 23           case MMDBW_RECORD_TYPE_ALIAS: {
847             // The insert is trying to overwrite an aliased network. We do not
848             // allow this.
849 23 100         if (current_bit == network->prefix_length) {
850             return MMDBW_ALIAS_OVERWRITE_ATTEMPT_ERROR;
851             }
852              
853             // We don't follow aliases when inserting a network that contains
854             // an aliased network within it.
855 18 100         if (current_bit > network->prefix_length) {
856             // We return success to silently ignore when we try to insert
857             // here. If we raise an error, it makes working with dirty data
858             // more difficult. We assume such inserts were not intended and
859             // can be safely skipped.
860             return MMDBW_SUCCESS;
861             }
862             // current_bit < network->prefix_length. This means we contain the
863             // new network already as ALIAS. Inserting the network is not valid
864             // because of that.
865             return MMDBW_INSERT_INTO_ALIAS_NODE_ERROR;
866 26094242           break;
867             }
868 26094242           case MMDBW_RECORD_TYPE_FIXED_NODE:
869             case MMDBW_RECORD_TYPE_NODE: {
870             // We're a node already.
871 26094242           next_node = current_record->value.node;
872 26094242           break;
873             }
874             }
875              
876             // If we are inserting an alias, a fixed node, or a fixed empty record, we
877             // make all of the nodes down to that record fixed nodes. This makes it
878             // easier to not accidentally delete or modify them.
879 26352961           if (new_record->type == MMDBW_RECORD_TYPE_ALIAS ||
880 26352961 100         new_record->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
881             new_record->type == MMDBW_RECORD_TYPE_FIXED_EMPTY) {
882 183726           current_record->type = MMDBW_RECORD_TYPE_FIXED_NODE;
883             }
884              
885 26352961           bool next_is_right = network_bit_value(network, current_bit);
886              
887             // if we are beyond the prefix length, both records are within the
888             // network we are inserting.
889 26352961           bool insert_into_both = current_bit >= network->prefix_length;
890              
891 26352961           if (next_is_right || insert_into_both) {
892 1600913           MMDBW_status status =
893 1600913           insert_record_into_next_node(tree,
894             &(next_node->right_record),
895             network,
896             current_bit + 1,
897             new_record,
898             merge_strategy,
899             is_internal_insert);
900 1600909 100         if (status != MMDBW_SUCCESS) {
901             return status;
902             }
903             }
904              
905 26352913 100         if (!next_is_right || insert_into_both) {
906 24769062           MMDBW_status status =
907 24769062           insert_record_into_next_node(tree,
908             &(next_node->left_record),
909             network,
910             current_bit + 1,
911             new_record,
912             merge_strategy,
913             is_internal_insert);
914 24768954 100         if (status != MMDBW_SUCCESS) {
915             return status;
916             }
917             }
918              
919             // We inserted the new record into the right and/or left record of the next
920             // node. We now need to trim the tree upwards by merging identical records.
921             // Basically what we do here is take care of the case where the record
922             // we're at points at another node, and the records in that node are both
923             // the same. In that case, we delete the node we point at and take its
924             // value on ourselves.
925              
926 26352529 100         if (next_node->left_record.type == next_node->right_record.type &&
927             // We don't allow merging into aliases or fixed nodes
928 1641486 100         current_record->type == MMDBW_RECORD_TYPE_NODE) {
929 1571926           switch (next_node->left_record.type) {
930 0           case MMDBW_RECORD_TYPE_EMPTY: {
931 0           MMDBW_status status =
932 0           free_node_and_subnodes(tree, next_node, false);
933 0 0         if (status != MMDBW_SUCCESS) {
934             return MMDBW_SUCCESS;
935             }
936 0           current_record->type = MMDBW_RECORD_TYPE_EMPTY;
937 0           break;
938             }
939 103680           case MMDBW_RECORD_TYPE_DATA: {
940             // If the two keys are the same, the records can be merged.
941             // Otherwise, break.
942 103680 100         if (strcmp(next_node->left_record.value.key,
943             next_node->right_record.value.key)) {
944             break;
945             }
946 3997           const char *key = increment_data_reference_count(
947             tree, next_node->left_record.value.key);
948 3997           MMDBW_status status =
949 3997           free_node_and_subnodes(tree, next_node, false);
950 3997 50         if (status != MMDBW_SUCCESS) {
951             return MMDBW_SUCCESS;
952             }
953 3997           current_record->type = MMDBW_RECORD_TYPE_DATA;
954 3997           current_record->value.key = key;
955 3997           break;
956             }
957             case MMDBW_RECORD_TYPE_ALIAS:
958             case MMDBW_RECORD_TYPE_FIXED_NODE:
959             case MMDBW_RECORD_TYPE_FIXED_EMPTY:
960             case MMDBW_RECORD_TYPE_NODE: {
961             // Do nothing in these cases. We don't trim immutable nodes.
962             break;
963             }
964             }
965             }
966             return MMDBW_SUCCESS;
967             }
968              
969             static MMDBW_status
970 231038           insert_record_into_current_record(MMDBW_tree_s *tree,
971             MMDBW_record_s *current_record,
972             MMDBW_network_s *network,
973             MMDBW_record_s *new_record,
974             MMDBW_merge_strategy merge_strategy) {
975             // We only get called when we have a current_record with these record
976             // types. There was previously logic for other types, but that was
977             // confusing.
978 231038 100         if (current_record->type != MMDBW_RECORD_TYPE_EMPTY &&
979 11 50         current_record->type != MMDBW_RECORD_TYPE_DATA &&
980             current_record->type != MMDBW_RECORD_TYPE_NODE) {
981             return MMDBW_INSERT_INVALID_RECORD_TYPE_ERROR;
982             }
983              
984 231038 100         if (current_record->type == MMDBW_RECORD_TYPE_EMPTY &&
    100          
985             merge_strategy == MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS) {
986             // We do not create a new record when using "only if parent exists"
987             return MMDBW_SUCCESS;
988             }
989              
990 230381           const char *merged_key = maybe_merge_records(
991             tree, network, new_record, current_record, merge_strategy);
992              
993 230377           MMDBW_status status = free_record_value(tree, current_record, false);
994 230377 50         if (status != MMDBW_SUCCESS) {
995             return status;
996             }
997              
998             // Update the record to match the new one. Replace what's there.
999 230377           current_record->type = new_record->type;
1000 230377 100         if (new_record->type == MMDBW_RECORD_TYPE_DATA) {
1001 226594 100         const char *const key = increment_data_reference_count(
1002             tree, merged_key == NULL ? new_record->value.key : merged_key);
1003 226594           current_record->value.key = key;
1004 3783           } else if (new_record->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
1005 3783 100         new_record->type == MMDBW_RECORD_TYPE_NODE ||
1006             new_record->type == MMDBW_RECORD_TYPE_ALIAS) {
1007 76           current_record->value.node = new_record->value.node;
1008 3707 50         } else if (new_record->type == MMDBW_RECORD_TYPE_EMPTY ||
1009             new_record->type == MMDBW_RECORD_TYPE_FIXED_EMPTY) {
1010 3707           current_record->value.key = NULL;
1011 3707           current_record->value.node = NULL;
1012             } else {
1013             return MMDBW_INSERT_INVALID_RECORD_TYPE_ERROR;
1014             }
1015              
1016 230377 100         if (merged_key) {
1017 5415           decrement_data_reference_count(tree, merged_key);
1018             }
1019              
1020             return status;
1021             }
1022              
1023 230381           static const char *maybe_merge_records(MMDBW_tree_s *tree,
1024             MMDBW_network_s *network,
1025             MMDBW_record_s *new_record,
1026             MMDBW_record_s *record_to_set,
1027             MMDBW_merge_strategy merge_strategy) {
1028 230381 100         if (MMDBW_RECORD_TYPE_DATA != new_record->type ||
    100          
1029             merge_strategy == MMDBW_MERGE_STRATEGY_NONE) {
1030             return NULL;
1031             }
1032              
1033             /* This must come before the node pruning code in
1034             insert_record_for_network, as we only want to prune nodes where the
1035             merged record matches. */
1036 107565 100         if (MMDBW_RECORD_TYPE_DATA != record_to_set->type
1037             // If the two keys are equal, there is no point in trying to merge
1038             // the contents.
1039 5427 100         || strcmp(new_record->value.key, record_to_set->value.key) == 0) {
1040             return NULL;
1041             }
1042              
1043 5419           char merge_cache_key[MERGE_KEY_SIZE + 1];
1044 5419           snprintf(merge_cache_key,
1045             MERGE_KEY_SIZE + 1,
1046             "%d-%s-%s",
1047             merge_strategy,
1048             new_record->value.key,
1049             record_to_set->value.key);
1050              
1051 5419           const char *cached_key = merge_cache_lookup(tree, merge_cache_key);
1052 5419 100         if (cached_key != NULL) {
1053 5317           const char *const new_key =
1054 5317           increment_data_reference_count(tree, cached_key);
1055 5317           return new_key;
1056             }
1057              
1058 102           SV *merged = merge_hashes_for_keys(tree,
1059             new_record->value.key,
1060             record_to_set->value.key,
1061             network,
1062             merge_strategy);
1063              
1064 98           SV *key_sv = key_for_data(merged);
1065 98           const char *const new_key =
1066 98           store_data_in_tree(tree, SvPVbyte_nolen(key_sv), merged);
1067 98           SvREFCNT_dec(key_sv);
1068              
1069             /* The ref count was incremented in store_data_in_tree */
1070 98           SvREFCNT_dec(merged);
1071              
1072 98           store_in_merge_cache(tree, merge_cache_key, new_key);
1073              
1074 98           return new_key;
1075             }
1076              
1077 26954692           static int network_bit_value(MMDBW_network_s *network, uint8_t current_bit) {
1078 26352961 100         return network->bytes[current_bit >> 3] & (1U << (~current_bit & 7));
1079             }
1080              
1081 2417063           static int tree_depth0(MMDBW_tree_s *tree) {
1082 2417063           return tree->ip_version == 6 ? 127 : 31;
1083             }
1084              
1085 102           SV *merge_hashes_for_keys(MMDBW_tree_s *tree,
1086             const char *const key_from,
1087             const char *const key_into,
1088             MMDBW_network_s *network,
1089             MMDBW_merge_strategy merge_strategy) {
1090              
1091 102           SV *data_from = data_for_key(tree, key_from);
1092 102           SV *data_into = data_for_key(tree, key_into);
1093              
1094 102 100         if (!(SvROK(data_from) && SvROK(data_into) &&
    50          
1095 100 100         SvTYPE(SvRV(data_from)) == SVt_PVHV &&
1096 99 100         SvTYPE(SvRV(data_into)) == SVt_PVHV)) {
1097             /* We added key_into earlier during insert_record_for_network, so we
1098             have to make sure here that it's removed again after we decide to
1099             not actually store this network. It might be nicer to not insert
1100             anything into the tree until we're sure we really want to. */
1101 4           decrement_data_reference_count(tree, key_from);
1102              
1103 4           bool is_ipv6 = tree->ip_version == 6;
1104 4 50         char address_string[is_ipv6 ? INET6_ADDRSTRLEN : INET_ADDRSTRLEN];
1105 4           inet_ntop(is_ipv6 ? AF_INET6 : AF_INET,
1106 4 50         network->bytes,
1107             address_string,
1108             sizeof(address_string));
1109              
1110 4           croak("Cannot merge data records unless both records are hashes - "
1111             "inserting %s/%" PRIu8,
1112             address_string,
1113             network->prefix_length);
1114             }
1115              
1116 98           return merge_hashes(tree, data_from, data_into, merge_strategy);
1117             }
1118              
1119 116           static SV *merge_hashes(MMDBW_tree_s *tree,
1120             SV *from,
1121             SV *into,
1122             MMDBW_merge_strategy merge_strategy) {
1123 116           HV *hash_from = (HV *)SvRV(from);
1124 116           HV *hash_into = (HV *)SvRV(into);
1125 116           HV *hash_new = newHV();
1126              
1127 116           merge_new_from_hash_into_hash(tree, hash_into, hash_new, false);
1128 116           merge_new_from_hash_into_hash(tree, hash_from, hash_new, merge_strategy);
1129              
1130 116           return newRV_noinc((SV *)hash_new);
1131             }
1132              
1133             // Note: unlike the other merge functions, this does _not_ replace existing
1134             // values.
1135 232           static void merge_new_from_hash_into_hash(MMDBW_tree_s *tree,
1136             HV *from,
1137             HV *to,
1138             MMDBW_merge_strategy merge_strategy) {
1139 232           (void)hv_iterinit(from);
1140 232           HE *he;
1141 551 100         while (NULL != (he = hv_iternext(from))) {
1142 319           STRLEN key_length;
1143 319 50         const char *const key = HePV(he, key_length);
1144 319           U32 hash = 0;
1145 319           SV *value = HeVAL(he);
1146 319 100         if (hv_exists(to, key, key_length)) {
1147 46 100         if (merge_strategy == MMDBW_MERGE_STRATEGY_RECURSE ||
1148             merge_strategy ==
1149             MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS) {
1150 32           SV **existing_value = hv_fetch(to, key, key_length, 0);
1151 32 50         if (existing_value == NULL) {
1152             // This should never happen as we just did an hv_exists
1153 0           croak("Received an unexpected NULL from hv_fetch");
1154             }
1155              
1156 32           value =
1157 32           merge_values(tree, value, *existing_value, merge_strategy);
1158             } else {
1159             // We are replacing the current value
1160 14           hash = HeHASH(he);
1161 14           SvREFCNT_inc_simple_void_NN(value);
1162             }
1163 273 100         } else if (merge_strategy ==
1164 18           MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS &&
1165 18 100         SvROK(value)) {
1166 10           continue;
1167             } else {
1168 263           hash = HeHASH(he);
1169 263           SvREFCNT_inc_simple_void_NN(value);
1170             }
1171              
1172 309           (void)hv_store(to, key, key_length, value, hash);
1173             }
1174              
1175 232           return;
1176             }
1177              
1178 38           static SV *merge_values(MMDBW_tree_s *tree,
1179             SV *from,
1180             SV *into,
1181             MMDBW_merge_strategy merge_strategy) {
1182 38 50         if (SvROK(from) != SvROK(into)) {
1183 0           croak("Attempt to merge a reference value and non-refrence value");
1184             }
1185              
1186 38 100         if (!SvROK(from)) {
1187             // If the two values are scalars, we prefer the one in the hash being
1188             // inserted.
1189 10           SvREFCNT_inc_simple_void_NN(from);
1190 10           return from;
1191             }
1192              
1193 28 100         if (SvTYPE(SvRV(from)) == SVt_PVHV && SvTYPE(SvRV(into)) == SVt_PVHV) {
    50          
1194 18           return merge_hashes(tree, from, into, merge_strategy);
1195             }
1196              
1197 10 50         if (SvTYPE(SvRV(from)) == SVt_PVAV && SvTYPE(SvRV(into)) == SVt_PVAV) {
    50          
1198 10           return merge_arrays(tree, from, into, merge_strategy);
1199             }
1200              
1201 0           croak("Only arrayrefs, hashrefs, and scalars can be merged.");
1202             }
1203              
1204 10           static SV *merge_arrays(MMDBW_tree_s *tree,
1205             SV *from,
1206             SV *into,
1207             MMDBW_merge_strategy merge_strategy) {
1208 10           AV *from_array = (AV *)SvRV(from);
1209 10           AV *into_array = (AV *)SvRV(into);
1210              
1211             // Note that av_len() is really the index of the last element. In newer
1212             // Perl versions, it is also called av_top_index() or av_tindex()
1213 10           SSize_t from_top_index = av_len(from_array);
1214 10           SSize_t into_top_index = av_len(into_array);
1215              
1216 10           SSize_t new_top_index =
1217             from_top_index > into_top_index ? from_top_index : into_top_index;
1218              
1219 10           AV *new_array = newAV();
1220 22 100         for (SSize_t i = 0; i <= new_top_index; i++) {
1221 14           SV *new_value = NULL;
1222 14           SV **from_value = av_fetch(from_array, i, 0);
1223 14           SV **into_value = av_fetch(into_array, i, 0);
1224 14 100         if (from_value != NULL && into_value != NULL) {
1225 6           new_value =
1226 6           merge_values(tree, *from_value, *into_value, merge_strategy);
1227 8 100         } else if (from_value != NULL) {
1228 6 100         if (merge_strategy ==
1229 4           MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS &&
1230 4 100         SvROK(*from_value)) {
1231             break;
1232             }
1233 4           new_value = *from_value;
1234 4           SvREFCNT_inc_simple_void_NN(new_value);
1235 2 50         } else if (into_value != NULL) {
1236 2           new_value = *into_value;
1237 2           SvREFCNT_inc_simple_void_NN(new_value);
1238             } else {
1239 0           croak("Received unexpected NULLs when merging arrays");
1240             }
1241              
1242 12           av_push(new_array, new_value);
1243             }
1244 10           return newRV_noinc((SV *)new_array);
1245             }
1246              
1247 14749           SV *lookup_ip_address(MMDBW_tree_s *tree, const char *const ipstr) {
1248 14749           bool is_ipv6_address = NULL != strchr(ipstr, ':');
1249 14749 100         if (tree->ip_version == 4 && is_ipv6_address) {
    100          
1250             return &PL_sv_undef;
1251             }
1252 2421           MMDBW_network_s network =
1253 14807 100         resolve_network(tree, ipstr, is_ipv6_address ? 128 : 32);
1254              
1255 14689           MMDBW_record_s *record_for_address;
1256 14689           MMDBW_status status =
1257 14689           find_record_for_network(tree, &network, &record_for_address);
1258              
1259 14689           free_network(&network);
1260              
1261 14689 50         if (MMDBW_SUCCESS != status) {
1262 0           croak("Received an unexpected NULL when looking up %s: %s",
1263             ipstr,
1264             status_error_message(status));
1265             }
1266              
1267 14689           if (record_for_address->type == MMDBW_RECORD_TYPE_NODE ||
1268 14689 50         record_for_address->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
1269             record_for_address->type == MMDBW_RECORD_TYPE_ALIAS) {
1270 0           croak("WTF - found a node or alias record for an address lookup - "
1271             "%s" PRIu8,
1272             ipstr);
1273             return &PL_sv_undef;
1274             }
1275              
1276 14689 100         if (record_for_address->type == MMDBW_RECORD_TYPE_EMPTY ||
1277             record_for_address->type == MMDBW_RECORD_TYPE_FIXED_EMPTY) {
1278             return &PL_sv_undef;
1279             }
1280              
1281 14406           return newSVsv(data_for_key(tree, record_for_address->value.key));
1282             }
1283              
1284 14689           static MMDBW_status find_record_for_network(MMDBW_tree_s *tree,
1285             MMDBW_network_s *network,
1286             MMDBW_record_s **record) {
1287 14689           *record = &(tree->root_record);
1288              
1289 616420 100         for (int current_bit = 0; current_bit < network->prefix_length;
1290 601731           current_bit++) {
1291              
1292 616004           MMDBW_node_s *node;
1293 616004           if ((*record)->type == MMDBW_RECORD_TYPE_NODE ||
1294 616004 100         (*record)->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
1295             (*record)->type == MMDBW_RECORD_TYPE_ALIAS) {
1296 601731           node = (*record)->value.node;
1297             } else {
1298             break;
1299             }
1300              
1301 601731 100         if (network_bit_value(network, current_bit)) {
1302 47571           *record = &(node->right_record);
1303             } else {
1304 554160           *record = &(node->left_record);
1305             }
1306             }
1307              
1308 14689           return MMDBW_SUCCESS;
1309             }
1310              
1311 258719           static MMDBW_node_s *new_node_from_record(MMDBW_tree_s *tree,
1312             MMDBW_record_s *record) {
1313 258719           MMDBW_node_s *node = new_node();
1314 258719 100         if (record->type == MMDBW_RECORD_TYPE_DATA) {
1315             /* We only need to increment the reference count once as we are
1316             replacing the parent record */
1317 163           increment_data_reference_count(tree, record->value.key);
1318              
1319 163           node->left_record.type = MMDBW_RECORD_TYPE_DATA;
1320 163           node->left_record.value.key = record->value.key;
1321              
1322 163           node->right_record.type = MMDBW_RECORD_TYPE_DATA;
1323 163           node->right_record.value.key = record->value.key;
1324             }
1325              
1326 258719           return node;
1327             }
1328              
1329 258738           MMDBW_node_s *new_node() {
1330 258738           MMDBW_node_s *node = checked_malloc(sizeof(MMDBW_node_s));
1331              
1332 258738           node->number = 0;
1333 258738           node->left_record.type = node->right_record.type = MMDBW_RECORD_TYPE_EMPTY;
1334              
1335 258738           return node;
1336             }
1337              
1338 258738           static MMDBW_status free_node_and_subnodes(MMDBW_tree_s *tree,
1339             MMDBW_node_s *node,
1340             bool remove_alias_and_fixed_nodes) {
1341 258738           MMDBW_status status = free_record_value(
1342             tree, &(node->left_record), remove_alias_and_fixed_nodes);
1343 258738 50         if (status != MMDBW_SUCCESS) {
1344             return status;
1345             }
1346              
1347 258738           status = free_record_value(
1348             tree, &(node->right_record), remove_alias_and_fixed_nodes);
1349 258738 50         if (status != MMDBW_SUCCESS) {
1350             return status;
1351             }
1352              
1353 258738           free(node);
1354 258738           return MMDBW_SUCCESS;
1355             }
1356              
1357 748056           static MMDBW_status free_record_value(MMDBW_tree_s *tree,
1358             MMDBW_record_s *record,
1359             bool remove_alias_and_fixed_nodes) {
1360 748056 100         if (record->type == MMDBW_RECORD_TYPE_FIXED_NODE &&
    50          
1361             !remove_alias_and_fixed_nodes) {
1362             return MMDBW_FREED_FIXED_NODE_ERROR;
1363             }
1364              
1365 748056 100         if (record->type == MMDBW_RECORD_TYPE_FIXED_EMPTY &&
    50          
1366             !remove_alias_and_fixed_nodes) {
1367             return MMDBW_FREED_FIXED_EMPTY_ERROR;
1368             }
1369              
1370 748056 100         if (record->type == MMDBW_RECORD_TYPE_NODE ||
1371             record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
1372 254741           return free_node_and_subnodes(
1373 254741           tree, record->value.node, remove_alias_and_fixed_nodes);
1374             }
1375              
1376 493315 100         if (record->type == MMDBW_RECORD_TYPE_DATA) {
1377 230754           decrement_data_reference_count(tree, record->value.key);
1378             }
1379              
1380             /* Alias nodes should only be removed explicitly. We can't just croak
1381             as it will leave the tree in an inconsistent state causing a segfault
1382             during unwinding. */
1383 493315 100         if (record->type == MMDBW_RECORD_TYPE_ALIAS &&
    50          
1384             !remove_alias_and_fixed_nodes) {
1385 0           return MMDBW_FREED_ALIAS_NODE_ERROR;
1386             }
1387             return MMDBW_SUCCESS;
1388             }
1389              
1390 109           void assign_node_numbers(MMDBW_tree_s *tree) {
1391 109           tree->node_count = 0;
1392 109           start_iteration(tree, false, (void *)NULL, &assign_node_number);
1393 107           }
1394              
1395 449793           static void assign_node_number(MMDBW_tree_s *tree,
1396             MMDBW_node_s *node,
1397             uint128_t UNUSED(network),
1398             uint8_t UNUSED(depth),
1399             void *UNUSED(args)) {
1400 449793           node->number = tree->node_count++;
1401 449793           return;
1402             }
1403              
1404             /* 16 bytes for an IP address, 1 byte for the prefix length */
1405             #define FROZEN_RECORD_MAX_SIZE (16 + 1 + SHA1_KEY_LENGTH)
1406             #define FROZEN_NODE_MAX_SIZE (FROZEN_RECORD_MAX_SIZE * 2)
1407              
1408             /* 17 bytes of NULLs followed by something that cannot be an SHA1 key are a
1409             clear indicator that there are no more frozen networks in the buffer. */
1410             #define SEVENTEEN_NULLS "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
1411             #define FREEZE_SEPARATOR "not an SHA1 key"
1412             /* We subtract 1 as we treat this as a sequence of bytes rather than a null
1413             terminated string. */
1414             #define FREEZE_SEPARATOR_LENGTH (sizeof(FREEZE_SEPARATOR) - 1)
1415              
1416 30           void freeze_tree(MMDBW_tree_s *tree,
1417             char *filename,
1418             char *frozen_params,
1419             size_t frozen_params_size) {
1420 30           FILE *file = fopen(filename, "wb");
1421 30 50         if (!file) {
1422 0           croak("Could not open file %s: %s", filename, strerror(errno));
1423             }
1424              
1425 30           freeze_args_s args = {
1426             .file = file,
1427             .filename = filename,
1428             };
1429              
1430 60           freeze_to_file(&args, &frozen_params_size, 4);
1431 60           freeze_to_file(&args, frozen_params, frozen_params_size);
1432              
1433 30           freeze_search_tree(tree, &args);
1434              
1435 60           freeze_to_file(&args, SEVENTEEN_NULLS, 17);
1436 60           freeze_to_file(&args, FREEZE_SEPARATOR, FREEZE_SEPARATOR_LENGTH);
1437              
1438 30           freeze_data_to_file(&args, tree);
1439              
1440 30 50         if (fclose(file) != 0) {
1441 0           croak("Could not close file %s: %s", filename, strerror(errno));
1442             }
1443              
1444             /* When the hash is _freed_, Perl decrements the ref count for each value
1445             * so we don't need to mess with them. */
1446 30           SvREFCNT_dec((SV *)args.data_hash);
1447 30           }
1448              
1449 30           static void freeze_search_tree(MMDBW_tree_s *tree, freeze_args_s *args) {
1450 30 50         if (tree->root_record.type == MMDBW_RECORD_TYPE_DATA) {
1451 0           croak("A tree that only contains a data record for /0 cannot be "
1452             "frozen");
1453             }
1454              
1455 30 50         if (tree->root_record.type == MMDBW_RECORD_TYPE_NODE ||
1456             tree->root_record.type == MMDBW_RECORD_TYPE_FIXED_NODE) {
1457 30           start_iteration(tree, false, (void *)args, &freeze_node);
1458 30           return;
1459             }
1460              
1461 0           croak("Unexected root record type when freezing tree: %s",
1462             record_type_name(tree->root_record.type));
1463             }
1464              
1465 110567           static void freeze_node(MMDBW_tree_s *tree,
1466             MMDBW_node_s *node,
1467             uint128_t network,
1468             uint8_t depth,
1469             void *void_args) {
1470 110567           freeze_args_s *args = (freeze_args_s *)void_args;
1471              
1472 110567           const uint8_t next_depth = depth + 1;
1473              
1474 110567 100         if (node->left_record.type == MMDBW_RECORD_TYPE_DATA) {
1475 51741           freeze_data_record(
1476             tree, network, next_depth, node->left_record.value.key, args);
1477             }
1478              
1479 110567 100         if (node->right_record.type == MMDBW_RECORD_TYPE_DATA) {
1480 54077           uint128_t right_network = flip_network_bit(tree, network, depth);
1481 54077           freeze_data_record(tree,
1482             right_network,
1483             next_depth,
1484             node->right_record.value.key,
1485             args);
1486             }
1487 110567           }
1488              
1489 105818           static void freeze_data_record(MMDBW_tree_s *UNUSED(tree),
1490             uint128_t network,
1491             uint8_t depth,
1492             const char *key,
1493             freeze_args_s *args) {
1494             /* It'd save some space to shrink this to 4 bytes for IPv4-only trees, but
1495             * that would also complicated thawing quite a bit. */
1496 211636           freeze_to_file(args, &network, 16);
1497 211636           freeze_to_file(args, &(depth), 1);
1498 211636           freeze_to_file(args, (char *)key, SHA1_KEY_LENGTH);
1499 105818           }
1500              
1501 317574           static void freeze_to_file(freeze_args_s *args, void *data, size_t size) {
1502 105878           checked_fwrite(args->file, args->filename, data, size);
1503             }
1504              
1505 30           static void freeze_data_to_file(freeze_args_s *args, MMDBW_tree_s *tree) {
1506 30           HV *data_hash = newHV();
1507              
1508 30           MMDBW_data_hash_s *item, *tmp;
1509 3390 50         HASH_ITER(hh, tree->data_table, item, tmp) {
    100          
    100          
1510 1665           SvREFCNT_inc_simple_void_NN(item->data_sv);
1511 1665           (void)hv_store(data_hash, item->key, SHA1_KEY_LENGTH, item->data_sv, 0);
1512             }
1513              
1514 30           SV *frozen_data = freeze_hash(data_hash);
1515 30           STRLEN frozen_data_size;
1516 30           char *frozen_data_chars = SvPV(frozen_data, frozen_data_size);
1517              
1518 30           checked_fwrite(
1519             args->file, args->filename, &frozen_data_size, sizeof(STRLEN));
1520 30           checked_fwrite(
1521             args->file, args->filename, frozen_data_chars, frozen_data_size);
1522              
1523 30           SvREFCNT_dec(frozen_data);
1524 30           SvREFCNT_dec((SV *)data_hash);
1525 30           }
1526              
1527 30           static SV *freeze_hash(HV *hash) {
1528 30           dSP;
1529 30           ENTER;
1530 30           SAVETMPS;
1531              
1532 30           SV *hashref = sv_2mortal(newRV_inc((SV *)hash));
1533              
1534 30 50         PUSHMARK(SP);
1535 30 50         EXTEND(SP, 1);
1536 30           PUSHs(hashref);
1537 30           PUTBACK;
1538              
1539 30           int count = call_pv("Sereal::Encoder::encode_sereal", G_SCALAR);
1540              
1541 30           SPAGAIN;
1542              
1543 30 50         if (count != 1) {
1544 0           croak("Expected 1 item back from Sereal::Encoder::encode_sereal call");
1545             }
1546              
1547 30           SV *frozen = POPs;
1548 30 50         if (!SvPOK(frozen)) {
1549 0           croak("The Sereal::Encoder::encode_sereal sub returned an SV which is "
1550             "not SvPOK!");
1551             }
1552              
1553             /* The SV will be mortal so it's about to lose a ref with the FREETMPS
1554             call below. */
1555 30           SvREFCNT_inc_simple_void_NN(frozen);
1556              
1557 30           PUTBACK;
1558 30 50         FREETMPS;
1559 30           LEAVE;
1560              
1561 30           return frozen;
1562             }
1563              
1564 29           MMDBW_tree_s *thaw_tree(char *filename,
1565             uint32_t initial_offset,
1566             uint8_t ip_version,
1567             uint8_t record_size,
1568             MMDBW_merge_strategy merge_strategy,
1569             const bool alias_ipv6,
1570             const bool remove_reserved_networks) {
1571             #ifdef WIN32
1572             int fd = open(filename, O_RDONLY);
1573             #else
1574 29           int fd = open(filename, O_RDONLY, 0);
1575             #endif
1576 29 50         if (fd == -1) {
1577 0           croak("Could not open file %s: %s", filename, strerror(errno));
1578             }
1579              
1580 29           struct stat fileinfo;
1581 29 50         if (fstat(fd, &fileinfo) == -1) {
1582 0           close(fd);
1583 0           croak("Could not stat file: %s: %s", filename, strerror(errno));
1584             }
1585              
1586 29           uint8_t *buffer =
1587 29           (uint8_t *)mmap(NULL, fileinfo.st_size, PROT_READ, MAP_SHARED, fd, 0);
1588 29           close(fd);
1589              
1590 29           buffer += initial_offset;
1591              
1592 29           MMDBW_tree_s *tree = new_tree(ip_version,
1593             record_size,
1594             merge_strategy,
1595             alias_ipv6,
1596             remove_reserved_networks);
1597              
1598 29           thawed_network_s *thawed;
1599 105846 100         while (NULL != (thawed = thaw_network(tree, &buffer))) {
1600             // We should never need to merge when thawing a tree.
1601 105817           MMDBW_status status =
1602 105817           insert_record_for_network(tree,
1603             thawed->network,
1604             thawed->record,
1605             MMDBW_MERGE_STRATEGY_NONE,
1606             true);
1607 105817           free_network(thawed->network);
1608 105817           free(thawed->network);
1609 105817 50         if (thawed->record->type == MMDBW_RECORD_TYPE_DATA) {
1610 105817           free((char *)thawed->record->value.key);
1611             }
1612 105817           free(thawed->record);
1613 105817           free(thawed);
1614 105817 50         if (status != MMDBW_SUCCESS) {
1615 0           croak("Could not thaw tree: %s", status_error_message(status));
1616             }
1617             }
1618              
1619 29           STRLEN frozen_data_size = thaw_strlen(&buffer);
1620              
1621             /* per perlapi newSVpvn copies the string */
1622 29           SV *data_to_decode = sv_2mortal(newSVpvn((char *)buffer, frozen_data_size));
1623 29           HV *data_hash = thaw_data_hash(data_to_decode);
1624              
1625 29           hv_iterinit(data_hash);
1626 29           char *key;
1627 29           I32 keylen;
1628 29           SV *value;
1629 1693 100         while (NULL != (value = hv_iternextsv(data_hash, &key, &keylen))) {
1630 1664           set_stored_data_in_tree(tree, key, value);
1631             }
1632              
1633 29           SvREFCNT_dec((SV *)data_hash);
1634              
1635 29           return tree;
1636             }
1637              
1638 105846           static uint8_t thaw_uint8(uint8_t **buffer) {
1639 105846           uint8_t value;
1640 105846           memcpy(&value, *buffer, 1);
1641 105846           *buffer += 1;
1642 105846           return value;
1643             }
1644              
1645 105846           static thawed_network_s *thaw_network(MMDBW_tree_s *tree, uint8_t **buffer) {
1646 105846           uint128_t start_ip = thaw_uint128(buffer);
1647 105846           uint8_t prefix_length = thaw_uint8(buffer);
1648              
1649 105846           if (start_ip == 0 && prefix_length == 0) {
1650 29           uint8_t *maybe_separator = thaw_bytes(buffer, FREEZE_SEPARATOR_LENGTH);
1651 29 50         if (memcmp(maybe_separator,
1652             FREEZE_SEPARATOR,
1653             FREEZE_SEPARATOR_LENGTH) == 0) {
1654              
1655 29           free(maybe_separator);
1656 29           return NULL;
1657             }
1658              
1659 0           croak("Found a ::0/0 network but that should never happen!");
1660             }
1661              
1662             uint8_t *start_ip_bytes = (uint8_t *)&start_ip;
1663             uint8_t temp;
1664 952353 100         for (int i = 0; i < 8; i++) {
1665 846536           temp = start_ip_bytes[i];
1666 846536           start_ip_bytes[i] = start_ip_bytes[15 - i];
1667 846536           start_ip_bytes[15 - i] = temp;
1668             }
1669              
1670 105817           thawed_network_s *thawed = checked_malloc(sizeof(thawed_network_s));
1671              
1672 105817           uint8_t *bytes;
1673 105817 100         if (tree->ip_version == 4) {
1674 1538           bytes = checked_malloc(4);
1675 1538           memcpy(bytes, start_ip_bytes + 12, 4);
1676             } else {
1677 104279           bytes = checked_malloc(16);
1678 104279           memcpy(bytes, &start_ip, 16);
1679             }
1680              
1681 105817           MMDBW_network_s network = {
1682             .bytes = bytes,
1683             .prefix_length = prefix_length,
1684             };
1685              
1686 105817           thawed->network = checked_malloc(sizeof(MMDBW_network_s));
1687 105817           memcpy(thawed->network, &network, sizeof(MMDBW_network_s));
1688              
1689 105817           MMDBW_record_s *record = checked_malloc(sizeof(MMDBW_record_s));
1690 105817           record->type = MMDBW_RECORD_TYPE_DATA;
1691              
1692 105817           record->value.key = thaw_data_key(buffer);
1693              
1694 105817           thawed->record = record;
1695              
1696 105817           return thawed;
1697             }
1698              
1699 29           static uint8_t *thaw_bytes(uint8_t **buffer, size_t size) {
1700 29           uint8_t *value = checked_malloc(size);
1701 29           memcpy(value, *buffer, size);
1702 29           *buffer += size;
1703 29           return value;
1704             }
1705              
1706 105846           static uint128_t thaw_uint128(uint8_t **buffer) {
1707 105846           uint128_t value;
1708 105846 100         memcpy(&value, *buffer, 16);
1709 105846           *buffer += 16;
1710 105846 100         return value;
1711             }
1712              
1713 29           static STRLEN thaw_strlen(uint8_t **buffer) {
1714 29           STRLEN value;
1715 29           memcpy(&value, *buffer, sizeof(STRLEN));
1716 29           *buffer += sizeof(STRLEN);
1717 29           return value;
1718             }
1719              
1720 105817           static const char *thaw_data_key(uint8_t **buffer) {
1721 105817           char *value = checked_malloc(SHA1_KEY_LENGTH + 1);
1722 105817           memcpy(value, *buffer, SHA1_KEY_LENGTH);
1723 105817           *buffer += SHA1_KEY_LENGTH;
1724 105817           value[SHA1_KEY_LENGTH] = '\0';
1725 105817           return (const char *)value;
1726             }
1727              
1728 29           static HV *thaw_data_hash(SV *data_to_decode) {
1729 29           dSP;
1730 29           ENTER;
1731 29           SAVETMPS;
1732              
1733 29 50         PUSHMARK(SP);
1734 29 50         EXTEND(SP, 1);
1735 29           PUSHs(data_to_decode);
1736 29           PUTBACK;
1737              
1738 29           int count = call_pv("Sereal::Decoder::decode_sereal", G_SCALAR);
1739              
1740 29           SPAGAIN;
1741              
1742 29 50         if (count != 1) {
1743 0           croak("Expected 1 item back from Sereal::Decoder::decode_sereal call");
1744             }
1745              
1746 29           SV *thawed = POPs;
1747 29 50         if (!SvROK(thawed)) {
1748 0           croak("The Sereal::Decoder::decode_sereal sub returned an SV which is "
1749             "not SvROK!");
1750             }
1751              
1752 29           SV *data_hash = SvREFCNT_inc_simple_NN(SvRV(thawed));
1753              
1754 29           PUTBACK;
1755 29 50         FREETMPS;
1756 29           LEAVE;
1757              
1758 29           return (HV *)data_hash;
1759             }
1760              
1761 37           void write_search_tree(MMDBW_tree_s *tree,
1762             SV *output,
1763             SV *root_data_type,
1764             SV *serializer) {
1765 37           assign_node_numbers(tree);
1766              
1767             /* This is a gross way to get around the fact that with C function
1768             * pointers we can't easily pass different params to different
1769             * callbacks. */
1770 37           encode_args_s args = {.output_io = IoOFP(sv_2io(output)),
1771             .root_data_type = root_data_type,
1772             .serializer = serializer,
1773 37           .data_pointer_cache = newHV()};
1774              
1775 37           start_iteration(tree, false, (void *)&args, &encode_node);
1776              
1777             /* When the hash is _freed_, Perl decrements the ref count for each value
1778             * so we don't need to mess with them. */
1779 37           SvREFCNT_dec((SV *)args.data_pointer_cache);
1780              
1781 37           return;
1782             }
1783              
1784 219170           static void encode_node(MMDBW_tree_s *tree,
1785             MMDBW_node_s *node,
1786             uint128_t UNUSED(network),
1787             uint8_t UNUSED(depth),
1788             void *void_args) {
1789 219170           encode_args_s *args = (encode_args_s *)void_args;
1790              
1791 219170           check_record_sanity(node, &(node->left_record), "left");
1792 219170           check_record_sanity(node, &(node->right_record), "right");
1793              
1794 438340           uint32_t left =
1795 219170           htonl(record_value_as_number(tree, &(node->left_record), args));
1796 438340           uint32_t right =
1797 219170 100         htonl(record_value_as_number(tree, &(node->right_record), args));
1798              
1799 219170           uint8_t *left_bytes = (uint8_t *)&left;
1800 219170           uint8_t *right_bytes = (uint8_t *)&right;
1801              
1802 219170 100         if (tree->record_size == 24) {
1803 217471           check_perlio_result(PerlIO_printf(args->output_io,
1804             "%c%c%c%c%c%c",
1805 217471           left_bytes[1],
1806 217471           left_bytes[2],
1807 217471           left_bytes[3],
1808 217471           right_bytes[1],
1809 217471           right_bytes[2],
1810 217471           right_bytes[3]),
1811             6,
1812             "PerlIO_printf");
1813 1699 100         } else if (tree->record_size == 28) {
1814 774           check_perlio_result(
1815 774           PerlIO_printf(args->output_io,
1816             "%c%c%c%c%c%c%c",
1817 774           left_bytes[1],
1818 774           left_bytes[2],
1819 774           left_bytes[3],
1820 774           (left_bytes[0] << 4) | (right_bytes[0] & 15),
1821 774           right_bytes[1],
1822 774           right_bytes[2],
1823 774           right_bytes[3]),
1824             7,
1825             "PerlIO_printf");
1826             } else {
1827 925           check_perlio_result(PerlIO_printf(args->output_io,
1828             "%c%c%c%c%c%c%c%c",
1829 925           left_bytes[0],
1830 925           left_bytes[1],
1831 925           left_bytes[2],
1832 925           left_bytes[3],
1833 925           right_bytes[0],
1834 925           right_bytes[1],
1835 925           right_bytes[2],
1836 925           right_bytes[3]),
1837             8,
1838             "PerlIO_printf");
1839             }
1840 219170           }
1841              
1842             /* Note that for data records, we will ensure that the key they contain does
1843             * match a data record in the record_value_as_number() subroutine. */
1844             static void
1845 438340           check_record_sanity(MMDBW_node_s *node, MMDBW_record_s *record, char *side) {
1846 438340 100         if (record->type == MMDBW_RECORD_TYPE_NODE ||
1847             record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
1848 219133 50         if (record->value.node->number == node->number) {
1849 0           croak("%s record of node %" PRIu32 " points to the same node",
1850             side,
1851             node->number);
1852             }
1853              
1854 219133 50         if (record->value.node->number < node->number) {
1855 0           croak("%s record of node %" PRIu32
1856             " points to a node number (%" PRIu32 ")",
1857             side,
1858             node->number,
1859             record->value.node->number);
1860             }
1861             }
1862              
1863             // This is a simple check that we aren't pointing at the tree root.
1864 438340 100         if (record->type == MMDBW_RECORD_TYPE_ALIAS) {
1865 30 50         if (record->value.node->number == 0) {
1866 0           croak("%s record of node %" PRIu32 " is an alias to node 0",
1867             side,
1868             node->number);
1869             }
1870             }
1871 438340           }
1872              
1873 438340           static uint32_t record_value_as_number(MMDBW_tree_s *tree,
1874             MMDBW_record_s *record,
1875             encode_args_s *args) {
1876 438340           uint32_t record_value = 0;
1877              
1878 438340           switch (record->type) {
1879 15230           case MMDBW_RECORD_TYPE_FIXED_EMPTY:
1880             case MMDBW_RECORD_TYPE_EMPTY: {
1881             // Say that the IP isn't here.
1882 15230           record_value = tree->node_count;
1883 15230           break;
1884             }
1885 219163           case MMDBW_RECORD_TYPE_NODE:
1886             case MMDBW_RECORD_TYPE_ALIAS:
1887             case MMDBW_RECORD_TYPE_FIXED_NODE: {
1888 219163           record_value = record->value.node->number;
1889 219163           break;
1890             }
1891 203947           case MMDBW_RECORD_TYPE_DATA: {
1892 203947           SV **cache_record = hv_fetch(args->data_pointer_cache,
1893             record->value.key,
1894             SHA1_KEY_LENGTH,
1895             0);
1896 203947 100         if (cache_record) {
1897             /* It is ok to return this without the size check below as it
1898             would have already croaked when it was inserted if it was too
1899             big. */
1900 202020           return SvIV(*cache_record);
1901             }
1902              
1903 1927           SV *data = newSVsv(data_for_key(tree, record->value.key));
1904 1927 50         if (!SvOK(data)) {
1905 0           croak("No data associated with key - %s", record->value.key);
1906             }
1907              
1908 1927           dSP;
1909 1927           ENTER;
1910 1927           SAVETMPS;
1911              
1912 1927 50         PUSHMARK(SP);
1913 1927 50         EXTEND(SP, 5);
1914 1927           PUSHs(args->serializer);
1915 1927           PUSHs(args->root_data_type);
1916 1927           mPUSHs(data);
1917 1927           PUSHs(&PL_sv_undef);
1918 1927           mPUSHp(record->value.key, strlen(record->value.key));
1919 1927           PUTBACK;
1920              
1921 1927           int count = call_method("store_data", G_SCALAR);
1922              
1923 1927           SPAGAIN;
1924              
1925 1927 50         if (count != 1) {
1926 0           croak("Expected 1 item back from ->store_data() call");
1927             }
1928              
1929 1927           SV *rval = POPs;
1930 1927 50         if (!(SvIOK(rval) || SvUOK(rval))) {
1931 0           croak("The serializer's store_data() method returned an SV "
1932             "which is not SvIOK or SvUOK!");
1933             }
1934 1927           uint32_t position = (uint32_t)SvUV(rval);
1935              
1936 1927           PUTBACK;
1937 1927 50         FREETMPS;
1938 1927           LEAVE;
1939              
1940 1927           record_value =
1941 1927           position + tree->node_count + DATA_SECTION_SEPARATOR_SIZE;
1942              
1943 1927           SV *value = newSViv(record_value);
1944 1927           (void)hv_store(args->data_pointer_cache,
1945             record->value.key,
1946             SHA1_KEY_LENGTH,
1947             value,
1948             0);
1949 1927           break;
1950             }
1951             }
1952              
1953 236320 50         if (record_value > max_record_value(tree)) {
1954 0           croak("Node value of %" PRIu32 " exceeds the record size of %" PRIu8
1955             " bits",
1956             record_value,
1957             tree->record_size);
1958             }
1959              
1960             return record_value;
1961             }
1962              
1963 236377           uint32_t max_record_value(MMDBW_tree_s *tree) {
1964 236377           uint8_t record_size = tree->record_size;
1965 236377 100         return record_size == 32 ? UINT32_MAX : (uint32_t)(1 << record_size) - 1;
1966             }
1967              
1968 186           void start_iteration(MMDBW_tree_s *tree,
1969             bool depth_first,
1970             void *args,
1971             MMDBW_iterator_callback callback) {
1972 186           uint128_t network = 0;
1973 186           uint8_t depth = 0;
1974              
1975             // We disallow this as the callback is based on nodes rather than records,
1976             // and changing that is a rabbit hole that I don't want to go down
1977             // currently. (I stuck my head in and regretted it.)
1978 186 100         if (MMDBW_RECORD_TYPE_NODE != tree->root_record.type &&
1979             MMDBW_RECORD_TYPE_FIXED_NODE != tree->root_record.type) {
1980 2           croak("Iteration is not currently allowed in trees with no nodes. "
1981             "Record type: %s",
1982             record_type_name(tree->root_record.type));
1983             }
1984              
1985 184           iterate_tree(
1986             tree, &tree->root_record, network, depth, depth_first, args, callback);
1987              
1988 184           return;
1989             }
1990              
1991 1571408           static void iterate_tree(MMDBW_tree_s *tree,
1992             MMDBW_record_s *record,
1993             uint128_t network,
1994             const uint8_t depth,
1995             bool depth_first,
1996             void *args,
1997             MMDBW_iterator_callback callback) {
1998 1592810 100         if (depth > tree_depth0(tree) + 1) {
    50          
1999 0           char ip[INET6_ADDRSTRLEN];
2000 0           integer_to_ip_string(tree->ip_version, network, ip, sizeof(ip));
2001 0           croak("Depth during iteration is greater than 127 (depth: %u, "
2002             "start IP: %s)! The tree is wonky.\n",
2003             depth,
2004             ip);
2005             }
2006              
2007 1571408 100         if (record->type == MMDBW_RECORD_TYPE_NODE ||
2008             record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
2009 785612           MMDBW_node_s *node = record->value.node;
2010              
2011 785612 100         if (!depth_first) {
2012 779530           callback(tree, node, network, depth, args);
2013             }
2014              
2015 785612           iterate_tree(tree,
2016             &node->left_record,
2017             network,
2018 785612           depth + 1,
2019             depth_first,
2020             args,
2021             callback);
2022              
2023 785612 100         if (depth_first) {
2024 6082           callback(tree, node, network, depth, args);
2025             }
2026              
2027 785612           iterate_tree(tree,
2028             &node->right_record,
2029             flip_network_bit(tree, network, depth),
2030             depth + 1,
2031             depth_first,
2032             args,
2033             callback);
2034             }
2035 1571408           }
2036              
2037             uint128_t
2038 845655           flip_network_bit(MMDBW_tree_s *tree, uint128_t network, uint8_t depth) {
2039 845655 100         return network | ((uint128_t)1 << (tree_depth0(tree) - depth));
2040             }
2041              
2042 98           static SV *key_for_data(SV *data) {
2043 98           dSP;
2044 98           ENTER;
2045 98           SAVETMPS;
2046              
2047 98 50         PUSHMARK(SP);
2048 98 50         EXTEND(SP, 1);
2049 98           PUSHs(data);
2050 98           PUTBACK;
2051              
2052 98           const char *const sub = "MaxMind::DB::Writer::Util::key_for_data";
2053 98           int count = call_pv(sub, G_SCALAR);
2054              
2055 98           SPAGAIN;
2056              
2057 98 50         if (count != 1) {
2058 0           croak("Expected 1 item back from %s() call", sub);
2059             }
2060              
2061 98           SV *key = POPs;
2062 98           SvREFCNT_inc_simple_void_NN(key);
2063              
2064 98           PUTBACK;
2065 98 50         FREETMPS;
2066 98           LEAVE;
2067              
2068 98           return key;
2069             }
2070              
2071 17666           SV *data_for_key(MMDBW_tree_s *tree, const char *const key) {
2072 17666           MMDBW_data_hash_s *data = NULL;
2073 81428 50         HASH_FIND(hh, tree->data_table, key, strlen(key), data);
    100          
    50          
    50          
    100          
    50          
    50          
2074              
2075 17666 50         if (NULL != data) {
2076 17666           return data->data_sv;
2077             } else {
2078             return &PL_sv_undef;
2079             }
2080             }
2081              
2082 5419           static const char *merge_cache_lookup(MMDBW_tree_s *tree,
2083             char *merge_cache_key) {
2084 5419           MMDBW_merge_cache_s *cache = NULL;
2085 32214 100         HASH_FIND(hh, tree->merge_cache, merge_cache_key, MERGE_KEY_SIZE, cache);
    100          
    100          
    50          
    50          
    0          
    100          
2086              
2087 5359 100         if (!cache) {
2088             return NULL;
2089             }
2090              
2091             // We have to check that the value has not been removed from the data
2092             // table
2093 5317           MMDBW_data_hash_s *data = NULL;
2094 21270 50         HASH_FIND(hh, tree->data_table, cache->value, SHA1_KEY_LENGTH, data);
    100          
    50          
    50          
    100          
    50          
    50          
2095 5317 50         if (data != NULL) {
2096             return cache->value;
2097             }
2098              
2099             // Item has been removed from data table. Remove the cached merge too.
2100 0 0         HASH_DEL(tree->merge_cache, cache);
    0          
    0          
    0          
    0          
    0          
    0          
    0          
2101             return NULL;
2102             }
2103              
2104 98           static void store_in_merge_cache(MMDBW_tree_s *tree,
2105             char *merge_cache_key,
2106             const char *const new_key) {
2107 98           MMDBW_merge_cache_s *data = checked_malloc(sizeof(MMDBW_merge_cache_s));
2108 98           data->value = checked_malloc(SHA1_KEY_LENGTH + 1);
2109 98           strncpy((char *)data->value, new_key, SHA1_KEY_LENGTH + 1);
2110              
2111 98           data->key = checked_malloc(MERGE_KEY_SIZE + 1);
2112 98 100         strncpy((char *)data->key, merge_cache_key, MERGE_KEY_SIZE + 1);
2113              
2114 490 100         HASH_ADD_KEYPTR(hh, tree->merge_cache, data->key, MERGE_KEY_SIZE, data);
    50          
    50          
    100          
    50          
    50          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
2115 98           }
2116              
2117 203           void free_tree(MMDBW_tree_s *tree) {
2118 203           free_record_value(tree, &tree->root_record, true);
2119 203           free_merge_cache(tree);
2120              
2121 203 50         int hash_count = HASH_COUNT(tree->data_table);
2122 0 0         if (0 != hash_count) {
2123 0           croak("%d elements left in data table after freeing all nodes!",
2124             hash_count);
2125             }
2126              
2127 203           free(tree);
2128 203           }
2129              
2130 203           void free_merge_cache(MMDBW_tree_s *tree) {
2131 203           MMDBW_merge_cache_s *cache, *tmp = NULL;
2132 546 100         HASH_ITER(hh, tree->merge_cache, cache, tmp) {
    100          
2133 98 50         HASH_DEL(tree->merge_cache, cache);
    100          
    50          
    50          
    50          
    50          
    50          
    50          
2134 98           free((void *)cache->key);
2135 98           free((void *)cache->value);
2136 98 100         free(cache);
2137             }
2138 203           }
2139              
2140 918642           static void *checked_malloc(size_t size) {
2141 918642           void *ptr = malloc(size);
2142 918642 50         if (!ptr) {
2143 0           abort();
2144             }
2145              
2146 918642           return ptr;
2147             }
2148              
2149             static void
2150 317634           checked_fwrite(FILE *file, char *filename, void *buffer, size_t count) {
2151 317634           size_t result = fwrite(buffer, 1, count, file);
2152 317634 50         if (result != count) {
2153 0           fclose(file);
2154 0           croak("Write to %s did not write the expected amount of data (wrote "
2155             "%zu instead of %zu): %s",
2156             filename,
2157             result,
2158             count,
2159             strerror(errno));
2160             }
2161 317634           }
2162              
2163 219170           static void check_perlio_result(SSize_t result, SSize_t expected, char *op) {
2164 219170 50         if (result < 0) {
2165 0           croak("%s operation failed: %s\n", op, strerror(result));
2166 219170 50         } else if (result != expected) {
2167 0           croak("%s operation wrote %zd bytes when we expected to write %zd",
2168             op,
2169             result,
2170             expected);
2171             }
2172 219170           }
2173              
2174 8           static char *status_error_message(MMDBW_status status) {
2175 8           switch (status) {
2176             case MMDBW_SUCCESS:
2177             return "Success";
2178 3           case MMDBW_INSERT_INTO_ALIAS_NODE_ERROR:
2179 3           return "Attempted to insert into an aliased network.";
2180 0           case MMDBW_INSERT_INVALID_RECORD_TYPE_ERROR:
2181 0           return "Invalid record type given to insert.";
2182 0           case MMDBW_FREED_ALIAS_NODE_ERROR:
2183 0           return "Attempted to free an IPv4 alias node. Did you try to "
2184             "overwrite an alias network?";
2185 0           case MMDBW_FREED_FIXED_EMPTY_ERROR:
2186 0           return "Attempted to free a fixed empty node. This should never "
2187             "happen.";
2188 0           case MMDBW_FREED_FIXED_NODE_ERROR:
2189 0           return "Attempted to free a fixed node. This should never happen.";
2190 5           case MMDBW_ALIAS_OVERWRITE_ATTEMPT_ERROR:
2191 5           return "Attempted to overwrite an aliased network.";
2192 0           case MMDBW_FIXED_NODE_OVERWRITE_ATTEMPT_ERROR:
2193 0           return "Attempted to overwrite a fixed node.";
2194 0           case MMDBW_RESOLVING_IP_ERROR:
2195 0           return "Failed to resolve IP address.";
2196             }
2197             // We should get a compile time warning if an enum is missing
2198 0           return "Unknown error";
2199             }
2200              
2201 2           static const char *record_type_name(MMDBW_record_type type) {
2202 2           switch (type) {
2203             case MMDBW_RECORD_TYPE_EMPTY:
2204             return "empty";
2205 0           case MMDBW_RECORD_TYPE_FIXED_EMPTY:
2206 0           return "fixed_empty";
2207 0           case MMDBW_RECORD_TYPE_DATA:
2208 0           return "data";
2209 0           case MMDBW_RECORD_TYPE_NODE:
2210 0           return "node";
2211 0           case MMDBW_RECORD_TYPE_FIXED_NODE:
2212 0           return "fixed_node";
2213 0           case MMDBW_RECORD_TYPE_ALIAS:
2214 0           return "alias";
2215             }
2216 0           return "unknown type";
2217             }