File Coverage

blib/lib/Data/TableData/Lookup.pm
Criterion Covered Total %
statement 58 69 84.0
branch 16 26 61.5
condition 5 9 55.5
subroutine 5 5 100.0
pod 1 1 100.0
total 85 110 77.2


line stmt bran cond sub pod time code
1             package Data::TableData::Lookup;
2              
3             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
4             our $DATE = '2021-04-11'; # DATE
5             our $DIST = 'Data-TableData-Lookup'; # DIST
6             our $VERSION = '0.002'; # VERSION
7              
8 1     1   61086 use 5.010001;
  1         11  
9 1     1   6 use strict;
  1         2  
  1         21  
10 1     1   5 use warnings;
  1         1  
  1         43  
11              
12 1     1   6 use Exporter 'import';
  1         2  
  1         516  
13             our @EXPORT_OK = qw(table_vlookup);
14              
15             our %SPEC;
16              
17             $SPEC{table_vlookup} = {
18             v => 1.1,
19             summary => 'Look up value in a table row by row',
20             description => <<'_',
21              
22             This routine looks up value in a table row by row. It is similar to the
23             spreadsheet function VLOOKUP, hence the same name being used. It is basically a
24             glorified map()+grep() that returns a single value (or you can also say it's a
25             glorified map+L::first()).
26              
27             Given a table, which is either an array-of-arrayrefs (aoa) or array-of-hashrefs
28             (aoh), this routine will run through it row by row until it finds the value that
29             you want. Once found, the value will be returned. Otherwise, undef is returned.
30              
31             **Exact matching**
32              
33             The table is expected to be sorted in ascending order by the lookup field. You
34             specify a lookup value, which will be looked up in the lookup field. Once the
35             value is found, the result field of the correspending row is returned and lookup
36             is completed. When the lookup field already exceeds the lookup value, the
37             routine also concludes that the value is not found, and the lookup is completed.
38              
39             Example:
40              
41             table => [
42             {min_income=> 0, tax_rate=>0.13},
43             {min_income=> 8_000, tax_rate=>0.18},
44             {min_income=> 15_000, tax_rate=>0.22},
45             {min_income=> 35_000, tax_rate=>0.30},
46             {min_income=> 85_000, tax_rate=>0.39},
47             {min_income=>140_000, tax_rate=>0.45},
48             ],
49             lookup_field => 'min_income',
50             lookup_value => 35_000,
51             result_field => 'tax_rate',
52              
53             will result in:
54              
55             0.30
56              
57             while if the lookup_value is 40_000, undef will be returned since it is not
58             found in any row of the table.
59              
60             **Approximate matching**
61              
62             If `approx` option is set to true, once the lookup field in a row exceeds the
63             lookup value, the result field of the previous row will be returned (if any).
64             For example, if lookup value is 40_000 then 0.30 will be returned (the row where
65             `min_income` is 35_000) since the next row has `min_income` of 85_000 which
66             already exceeds 40_000.
67              
68             **Interpolation of result**
69              
70             If, additionally, `interpolate` option is also set to true in addition to
71             `approx` option being set to true, a linear interpolation will be done when an
72             exact match fails. In the previous example, when lookup value is 40_000, 0.309
73             will be returned, which is calculated with:
74              
75             0.3 + (40_000 - 35_000)/(85_000 - 35_000)*(0.39 - 0.30)
76              
77             In the case of there is no next row after `min_income` of 35_000, 0.30 will
78             still be returned.
79              
80              
81             _
82             args => {
83             table => {
84             schema => 'array*',
85             req => 1,
86             },
87             lookup_value => {
88             summary => 'The value that you want to look up in the lookup field',
89             description => <<'_',
90              
91             Instead of `lookup_value` and `lookup_field`, you can also specify `lookup_code`
92             instead.
93              
94             _
95             schema => 'any*',
96             },
97              
98             lookup_field => {
99             summary => 'Where to look up the lookup value in',
100             description => <<'_',
101              
102             Either an integer array index (for aoa table) or a string hash key (for aoh
103             table).
104              
105             Instead of `lookup_value` and `lookup_field`, you can also specify `lookup_code`
106             instead.
107              
108             _
109             schema => 'str*',
110             },
111              
112             lookup_code => {
113             summary => 'Supply code to match a row',
114             description => <<'_',
115              
116             Unless what you want to match is custom, you usually specify `lookup_value` and
117             `lookup_field` instead.
118              
119             The code will be passed the row (which is an arrayref or a hashref) and
120             optionally the lookup value too as the second argument if the lookup value is
121             specified. It is expected to return either -1, 0, 1 like the Perl's `cmp` or
122             `<=>` operator. -1 means the lookup field is less than the lookup value, 0 means
123             equal, and 1 means greater than.
124              
125             With `approx` option not set to true, lookup will succeed once 0 is returned.
126             With `approx` set to true, lookup will succeed once 0 or 1 is returned.
127              
128             _
129             schema => 'code*',
130             },
131              
132             result_field => {
133             summary => 'Where to get the result from',
134             schema => 'str*',
135             description => <<'_',
136              
137             Either an integer array index (for aoa table) or a string hash key (for aoh
138             table).
139              
140             _
141             req => 1,
142             },
143              
144             # XXX result_code (instead of result_field)
145              
146             approx => {
147             summary => 'Whether to do an approximate instead of an exact match',
148             schema => 'bool*',
149             description => <<'_',
150              
151             See example in the function description.
152              
153             _
154             },
155             interpolate => {
156             summary => 'Do a linear interpolation',
157             schema => 'bool*',
158             description => <<'_',
159              
160             When this option is set to true, will do a linear interpolation of result when
161             an exact match is not found. This will only be performed if `approx` is also set
162             to true.
163              
164             See example in the function description.
165              
166             Currently, you cannot use `interpolate` with `lookup_code`.
167              
168             _
169             },
170             },
171             args_rels => [
172             'choose_all&' => [
173             [qw/lookup_field lookup_value/],
174             ],
175             'req_one&' => [
176             [qw/lookup_field lookup_code/],
177             ],
178             'dep_any&' => [
179             ['interpolate' => ['approx']],
180             ],
181             ],
182             result_naked => 1,
183             };
184             sub table_vlookup {
185 13     13 1 147 my %args = @_;
186              
187 13         16 my $table = $args{table};
188 13         25 my $approx = $args{approx};
189 13         50 my $interpolate = $args{interpolate};
190 13         19 my $lookup_value = $args{lookup_value};
191 13         20 my $lookup_field = $args{lookup_field};
192 13         15 my $lookup_code = $args{lookup_code};
193 13         16 my $lookup_value_specified = exists $args{lookup_code};
194 13         19 my $result_field = $args{result_field};
195              
196 13         20 my $ref_row;
197 13         26 my ($matching_row, $prev_row);
198 13         0 my $result;
199             ROW:
200 13         20 for my $row (@$table) {
201 24         32 $ref_row = ref $row;
202              
203 24         27 my $cmp;
204 24 50       40 if ($lookup_code) {
205 0         0 my @lcargs = ($row);
206 0 0       0 push @lcargs, $lookup_value if $lookup_value_specified;
207 0         0 $cmp = $lookup_code->(@lcargs);
208             } else {
209 24 50       38 if ($ref_row eq 'ARRAY') {
210 0         0 $cmp = $row->[$lookup_field] <=> $lookup_value;
211             } else {
212 24         41 $cmp = $row->{$lookup_field} <=> $lookup_value;
213             }
214             }
215 24 100       56 if ($cmp == 0) {
    100          
    50          
216             # an exact match
217 5         6 $matching_row = $row;
218 5         31 goto GET_EXACT_RESULT;
219             } elsif ($cmp == 1) {
220             # lookup field has exceeded lookup value
221 6 100 100     21 if ($approx && $prev_row) {
222 3 100       8 if ($interpolate) {
223 1         2 $matching_row = $row;
224 1         17 goto GET_INTERPOLATED_RESULT;
225             } else {
226 2         2 $matching_row = $prev_row;
227 2         13 goto GET_EXACT_RESULT;
228             }
229             } else {
230             # no exact match, not found
231 3         28 goto RETURN_RESULT;
232             }
233             } elsif ($cmp == -1) {
234             # lookup value has not exceeded lookup value, continue to the next
235             # row
236             } else {
237 0         0 die "Something's wrong, cmp is not -1|0|1 ($cmp)";
238             }
239 13         19 $prev_row = $row;
240             }
241              
242 2 50 33     10 if ($approx && $prev_row) {
243 2         3 $matching_row = $prev_row;
244 2         6 goto GET_EXACT_RESULT;
245             } else {
246             # not found
247 0         0 goto RETURN_RESULT;
248             }
249              
250             GET_EXACT_RESULT: {
251 9 50       11 last unless $matching_row; # sanity check
  9         18  
252 9 50       17 if ($ref_row eq 'ARRAY') {
253 0         0 $result = $matching_row->[$result_field];
254             } else {
255 9         13 $result = $matching_row->{$result_field};
256             }
257 9         24 goto RETURN_RESULT;
258             }
259              
260             GET_INTERPOLATED_RESULT: {
261 1 50 33     3 last unless $matching_row && $prev_row; # sanity check
  1         5  
262 1         3 my ($x1, $x2, $y1, $y2);
263 1 50       3 if ($ref_row eq 'ARRAY') {
264 0         0 $x1 = $prev_row ->[$lookup_field];
265 0         0 $x2 = $matching_row->[$lookup_field];
266 0         0 $y1 = $prev_row ->[$result_field];
267 0         0 $y2 = $matching_row->[$result_field];
268             } else {
269 1         2 $x1 = $prev_row ->{$lookup_field};
270 1         2 $x2 = $matching_row->{$lookup_field};
271 1         1 $y1 = $prev_row ->{$result_field};
272 1         2 $y2 = $matching_row->{$result_field};
273             }
274 1         5 $result = $y1 + ($lookup_value - $x1)/($x2-$x1)*($y2-$y1);
275             }
276              
277             RETURN_RESULT:
278 13         69 $result;
279             }
280              
281             1;
282             # ABSTRACT: Lookup value in a table data structure
283              
284             __END__