File Coverage

blib/lib/Tie/ShadowHash.pm
Criterion Covered Total %
statement 88 90 97.7
branch 32 34 94.1
condition 3 6 50.0
subroutine 14 14 100.0
pod 1 1 100.0
total 138 145 95.1


line stmt bran cond sub pod time code
1             # Tie::ShadowHash -- Merge multiple data sources into a hash.
2             #
3             # This module combines multiple sources of data into a single tied hash, so
4             # that they can all be queried simultaneously, the source of any given
5             # key-value pair irrelevant to the client script. Data sources are searched
6             # in the order that they're added to the shadow hash. Changes to the hashed
7             # data aren't propagated back to the actual data files; instead, they're saved
8             # within the tied hash and override any data obtained from the data sources.
9             #
10             # SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl
11              
12             ##############################################################################
13             # Modules and declarations
14             ##############################################################################
15              
16             package Tie::ShadowHash 2.01;
17              
18 1     1   708 use 5.024;
  1         3  
19 1     1   5 use autodie;
  1         2  
  1         7  
20 1     1   4385 use warnings;
  1         2  
  1         33  
21              
22 1     1   4 use Carp qw(croak);
  1         2  
  1         1064  
23              
24             ##############################################################################
25             # Regular methods
26             ##############################################################################
27              
28             # Given a file name and optionally a split regex, builds a hash out of the
29             # contents of the file.
30             #
31             # If the split sub exists, use it to split each line into an array; if the
32             # array has two elements, those are taken as the key and value. If there are
33             # more, the value is an anonymous array containing everything but the first.
34             #
35             # If there's no split sub, take the entire line modulo the line terminator as
36             # the key and the value the number of times it occurs in the file.
37             #
38             # $file - File containing the data
39             # $split - Optional anonymous sub that splits a line into key and value
40             #
41             # Returns: Hash created by loading the file
42             sub _text_source {
43 4     4   8 my ($self, $file, $split) = @_;
44 4         4 my %hash;
45 4         11 open(my $fh, '<', $file);
46 4         1853 while (defined(my $line = <$fh>)) {
47 48         59 chomp($line);
48 48 100       56 if (defined($split)) {
49 6         12 my ($key, @rest) = $split->($line);
50 6 100       60 $hash{$key} = (@rest == 1) ? $rest[0] : [@rest];
51             } else {
52 42         129 $hash{$line}++;
53             }
54             }
55 4         14 close($fh);
56 4         951 return \%hash;
57             }
58              
59             # Add data sources to the shadow hash.
60             #
61             # Each data source is one of the following:
62             #
63             # - An anonymous array, in which case the first element is the type of source
64             # and the rest are arguments. Currently, "text" is the only supported type.
65             #
66             # - A file name, which is taken to be a text file with each line as a key and
67             # a value equal to the number of times that line appears.
68             #
69             # - A hash reference, possibly to a tied hash.
70             #
71             # @sources - Data sources to add
72             #
73             # Returns: True
74             sub add {
75 8     8 1 1064 my ($self, @sources) = @_;
76 8         13 for my $source (@sources) {
77 9 100       25 if (ref($source) eq 'ARRAY') {
    100          
78 2         4 my ($type, @args) = $source->@*;
79 2 50       4 if ($type eq 'text') {
80 2         4 $source = $self->_text_source(@args);
81             } else {
82 0         0 croak("invalid source type $type");
83             }
84             } elsif (!ref($source)) {
85 2         4 $source = $self->_text_source($source);
86             }
87 9         28 push($self->{SOURCES}->@*, $source);
88             }
89 8         27 return 1;
90             }
91              
92             ##############################################################################
93             # Tie methods
94             ##############################################################################
95              
96             # Create a new tied hash.
97             #
98             # @sources - Sources to add to the new hash
99             #
100             # Returns: Newly created tied hash
101             sub TIEHASH {
102 2     2   4970 my ($class, @sources) = @_;
103 2   33     11 $class = ref($class) || $class;
104             #<<<
105 2         6 my $self = {
106             DELETED => {}, # All keys that have been deleted
107             EACH => -1, # Index of source currently being traversed
108             OVERRIDE => {}, # Values set directly by the user
109             SOURCES => [], # Array of all of the underlying hashes
110             };
111             #>>>
112 2         4 bless($self, $class);
113 2         5 $self->add(@sources);
114 2         6 return $self;
115             }
116              
117             # Retrieve a value.
118             #
119             # This doesn't work quite right in the case of keys with undefined values, but
120             # we can't make it work right since that would require using exists and a lot
121             # of common data sources (such as NDBM_File tied hashes) don't implement
122             # exists.
123             #
124             # $key - Key to look up
125             #
126             # Returns: Value for that key, undef if it is not present
127             sub FETCH {
128 23     23   3945 my ($self, $key) = @_;
129 23 100       58 if ($self->{DELETED}{$key}) {
    100          
130 1         3 return;
131             } elsif (exists($self->{OVERRIDE}{$key})) {
132 6         20 return $self->{OVERRIDE}{$key};
133             } else {
134 16         26 for my $source ($self->{SOURCES}->@*) {
135 18 100       77 if (defined($source->{$key})) {
136 13         43 return $source->{$key};
137             }
138             }
139 3         9 return;
140             }
141             }
142              
143             # Store a value. This goes into the override hash, which is checked before
144             # any of the underlying data sources.
145             #
146             # $key - Key to store a value for
147             # $value - Value to store
148             sub STORE {
149 8     8   1599 my ($self, $key, $value) = @_;
150 8         13 delete $self->{DELETED}{$key};
151 8         17 $self->{OVERRIDE}{$key} = $value;
152 8         16 return;
153             }
154              
155             # Delete a key. The key is flagged in the deleted hash, which ensures that
156             # undef will be returned for any future retrieval. Dropping the override
157             # value isn't required for currect future FETCH behavior, but it drops the
158             # reference so that memory can be released.
159             #
160             # $key - Key to delete
161             sub DELETE {
162 5     5   1170 my ($self, $key) = @_;
163 5         7 delete $self->{OVERRIDE}{$key};
164 5         9 $self->{DELETED}{$key} = 1;
165 5         10 return;
166             }
167              
168             # Clear the hash. Removes all sources and all overrides and resets any
169             # iteration.
170             sub CLEAR {
171 5     5   1565 my ($self) = @_;
172 5         10 $self->{DELETED} = {};
173 5         9 $self->{OVERRIDE} = {};
174 5         13 $self->{SOURCES} = [];
175 5         7 $self->{EACH} = -1;
176 5         10 return;
177             }
178              
179             # Return whether a key exists.
180             #
181             # This could throw an exception if any underlying source doesn't support
182             # exists (like NDBM_File).
183             #
184             # $key - Key to query for existence
185             #
186             # Returns: True if the key exists, false otherwise
187             sub EXISTS {
188 5     5   516 my ($self, $key) = @_;
189 5 100       14 return if exists($self->{DELETED}{$key});
190 4         5 for my $source ($self->{OVERRIDE}, $self->{SOURCES}->@*) {
191 8 100       19 return 1 if exists($source->{$key});
192             }
193 1         5 return;
194             }
195              
196             # Start an iteration.
197             #
198             # We have to reset the each counter on all hashes. For tied hashes, we call
199             # FIRSTKEY directly because it's potentially more efficient than calling keys
200             # on the hash.
201             sub FIRSTKEY {
202 12     12   245 my ($self) = @_;
203 12         22 keys($self->{OVERRIDE}->%*);
204 12         21 for my $source ($self->{SOURCES}->@*) {
205 21         25 my $tie = tied($source);
206 21 50       28 if ($tie) {
207 0         0 $tie->FIRSTKEY();
208             } else {
209 21         29 keys($source->%*);
210             }
211             }
212 12         17 $self->{EACH} = -1;
213 12         20 return $self->NEXTKEY();
214             }
215              
216             # Iterate through the hashes.
217             #
218             # Walk the sources by calling each on each one in turn, skipping deleted keys
219             # and keys shadowed by earlier hashes and using $self->{EACH} to store the
220             # number of source we're at.
221             #
222             # Returns: Next key in iteration, or undef if sources are exhausted
223             sub NEXTKEY {
224 278     278   402 my ($self) = @_;
225              
226             # EACH is the numeric index in the SOURCES list for the source we're
227             # currently calling each on, or -1 if we're just starting and thus
228             # operating on the OVERRIDE hash.
229             #
230             # We have to loop until we find the next value, which may take several
231             # iterations since keys could have been overridden by an earlier hash or
232             # deleted.
233             SOURCE:
234 278         431 while ($self->{EACH} < $self->{SOURCES}->@*) {
235 312         314 my $key;
236              
237             ## no critic (Each)
238 312 100       419 if ($self->{EACH} == -1) {
239 18         29 $key = each($self->{OVERRIDE}->%*);
240             } else {
241 294         715 $key = each($self->{SOURCES}[$self->{EACH}]->%*);
242             }
243             ## use critic
244              
245             # If we got a valid result, we have to check against DELETED,
246             # OVERRIDE, and all earlier sources before returning it.
247 312 100       463 if (defined($key)) {
248 280 100 66     677 if ($self->{DELETED}{$key}) {
    100          
    100          
249 4         9 next;
250             } elsif ($self->{EACH} >= 0 && exists($self->{OVERRIDE}{$key})) {
251 7         14 next;
252             } elsif ($self->{EACH} > 0) {
253 126         180 for my $index (reverse(0 .. $self->{EACH} - 1)) {
254 126 100       508 if (defined($self->{SOURCES}[$index]{$key})) {
255 2         6 next SOURCE;
256             }
257             }
258             }
259 267         609 return $key;
260             }
261 32         57 $self->{EACH}++;
262             }
263              
264             # We have exhausted all of the sources.
265 11         75 return;
266             }
267              
268             ##############################################################################
269             # Module return value and documentation
270             ##############################################################################
271              
272             # Make sure the module returns true.
273             1;
274              
275             __DATA__