File Coverage

blib/lib/Algorithm/Points/MinimumDistance.pm
Criterion Covered Total %
statement 73 73 100.0
branch 6 6 100.0
condition 11 13 84.6
subroutine 12 12 100.0
pod 4 6 66.6
total 106 110 96.3


line stmt bran cond sub pod time code
1             package Algorithm::Points::MinimumDistance;
2 1     1   1037 use strict;
  1         2  
  1         40  
3 1     1   8 use vars qw( $VERSION );
  1         3  
  1         1129  
4             $VERSION = '0.01';
5              
6             =head1 NAME
7              
8             Algorithm::Points::MinimumDistance - Works out the distance from each point to its nearest neighbour. Kinda.
9              
10             =head1 DESCRIPTION
11              
12             Given a set of points in N-dimensional Euclidean space, works out for
13             each point the distance to its nearest neighbour (unless its nearest
14             neighbour isn't very close). The distance metric is a method;
15             subclass and override it for non-Euclidean space.
16              
17             =head1 SYNOPSIS
18              
19             use Algorithm::Points::MinimumDistance;
20              
21             my @points = ( [1, 4], [3, 1], [5, 7] );
22             my $dists = Algorithm::Points::MinimumDistance->new( points => \@points );
23              
24             foreach my $point (@points) {
25             print "($point->[0], $point->[1]: Nearest neighbour distance is "
26             . $dists->distance( point => $point ) . "\n";
27             }
28              
29             print "Smallest distance between any two points is "
30             . $dists->min_distance . "\n";
31              
32             =head1 METHODS
33              
34             =over 4
35              
36             =item B
37              
38             my @points = ( [1, 4], [3, 1], [5, 7] );
39             my $dists = Algorithm::Points::MinimumDistance->new( points => \@points,
40             boxsize => 2 );
41              
42             C should be an arrayref containing every point in your set.
43             The representation of a point is as an N-element arrayref where N is
44             the number of dimensions in which we are working. There is no check
45             that all of your points have the right dimension. Whatever dimension
46             the first point has is assumed to be the dimension of the space. So
47             get it right.
48              
49             C defaults to 20.
50              
51             =cut
52              
53             sub new {
54 4     4 1 1670 my ($class, %args) = @_;
55 4         4 my @points = @{ $args{points} };
  4         12  
56 4         4 my $dim = scalar @{ $points[0] };
  4         7  
57 4   50     85 my $boxsize = $args{boxsize} || 20;
58              
59             # Precomputation for working out all boxes adjacent to a given box
60             # (a point will be in all regions centred on its box or the
61             # adjacent ones).
62             # To find an adjacent box, vector-add one of these entries to it,
63             # eg [1, 1] + [2, 0] - with a boxsize of 2, [3, 1] is adjacent to [1, 1].
64 4         14 my @offsets = ( [ -$boxsize ], [ 0 ] , [ $boxsize ] );
65 4         10 foreach (2..$dim) {
66 4         5 @offsets = map { [ @$_, -$boxsize ], [ @$_, 0 ], [ @$_, $boxsize] }
  12         57  
67             @offsets;
68             }
69              
70 4         21 my $self = { dimensions => $dim,
71             points => \@points,
72             boxsize => $boxsize,
73             offsets => \@offsets,
74             regions => { },
75             distances => { }
76             };
77 4         11 bless $self, $class;
78 4         9 $self->_work_out_distances;
79              
80 4         17 return $self;
81             }
82              
83             =item B
84              
85             my @box = $nn->box( [1, 2] );
86              
87             Returns the identifier of the box that the point lives in.
88             A box is labelled by its "bottom left-hand" corner point.
89              
90             =cut
91              
92             sub box {
93 36     36 1 3006 my ($self, $point) = @_;
94 36         57 my @box = map { $_ - ($_ % $self->{boxsize}) } @$point;
  72         157  
95 36         90 return @box;
96             }
97              
98             sub _offsets {
99 16     16   21 my $self = shift;
100 16         17 return @{ $self->{offsets} };
  16         43  
101             }
102              
103             # Accessor for the region centred on the box $args{centre}. Returns a ref to
104             # an array of the points that are in that region.
105             sub region {
106 161     161 0 410 my ($self, %args) = @_;
107 161         166 my @centre = @{$args{centre}};
  161         302  
108 161         355 my $key = join(",", @centre);
109 161         275 my $regions = $self->{regions};
110 161   100     658 $regions->{$key} ||= [];
111 161         523 return $regions->{$key};
112             }
113              
114             # Shevek says: "This is where the CPU time goes, but, gentle reader,
115             # please note that the complexity is LINEAR in the number of
116             # points. This is shit. It's also trivial, so do it in XS."
117             # Kake says: "I don't speak XS yet."
118              
119             sub _hash {
120 16     16   22 my ($self, $point) = @_;
121              
122             # Compute the box in which $point lives.
123 16         30 my @home_box = $self->box($point);
124              
125             # $point lives in the region centred on this box, plus all surrounding
126             # regions. Push it into each of these regions. A region is
127             # identified by the box at its centre.
128 16         28 foreach my $delta ( $self->_offsets ) {
129 144         230 my @centre = map { $delta->[$_] + $home_box[$_] } (0..$#home_box);
  288         544  
130 144         304 my $region = $self->region( centre => \@centre );
131 144         354 push @$region, $point;
132             }
133             }
134              
135             sub _work_out_distances {
136 4     4   6 my $self = shift;
137 4         11 my $points = $self->{points};
138              
139             # Work out which points live in which regions.
140 4         12 $self->_hash($_) foreach (@$points);
141              
142             # For each point, check its distance from every other point inside
143             # the region centred on its home box. All points outside this region
144             # are at least a distance 'boxsize' away.
145 4         7 foreach my $point (@$points) {
146 16         36 my @box = $self->box($point);
147 16         18 my $min;
148 16         32 my $region = $self->region( centre => \@box );
149 16         24 foreach my $neighbour (@$region) {
150 48 100       114 next if $neighbour == $point; # Reference equality
151 32         60 my $d = $self->d($point, $neighbour);
152 32 100 100     136 $min = $d if (!defined $min or $d < $min);
153             }
154 16   66     38 $min ||= $self->{boxsize};
155 16         35 $self->_store_distance( point => $point, distance => $min );
156             }
157             }
158              
159             sub _store_distance {
160 16     16   38 my ($self, %args) = @_;
161 16         25 my ($point, $distance) = @args{ qw( point distance ) };
162 16         29 my $key = join(",", @$point);
163 16         63 $self->{distances}{$key} = $distance;
164             }
165              
166             # Override this for a different metric.
167             sub d {
168 32     32 0 43 my ($self, $point1, $point2) = @_;
169 32         31 my $t = 0;
170 32         59 foreach (0..$#$point1) {
171 64         127 $t += ($point1->[$_] - $point2->[$_]) ** 2;
172             }
173 32         60 return sqrt($t);
174             }
175              
176             =item B
177              
178             my $nn = Algorithm::Points::MinimumDistance->new( ... );
179             my $nn_dist = $nn->distance( point => [1, 4] );
180              
181             Returns the distance between the specified point and its nearest
182             neighbour. The point should be one of your original set. There is no
183             check that this is the case. Note that if a point has no particularly
184             close neighbours, then C will be returned instead.
185              
186             =cut
187              
188             sub distance {
189 6     6 1 1786 my ($self, %args) = @_;
190 6         9 my $point = $args{point};
191 6         17 my $key = join(",", @$point);
192 6         33 return $self->{distances}{$key};
193             }
194              
195             =item B
196              
197             my $nn = Algorithm::Points::MinimumDistance->new( ... );
198             my $nn_dist = $nn->min_distance;
199              
200             Returns the minimum nearest-neighbour distance for all points in the set.
201             Or C if none of the points are close to each other.
202              
203             =cut
204              
205             sub min_distance {
206 3     3 1 7 my $self = shift;
207 3         5 my $dists = $self->{distances};
208 3         5 my $min;
209 3         10 foreach my $dist ( values %$dists ) {
210 12 100 100     179 $min = $dist if (!defined $min or $dist < $min);
211             }
212 3         13 return $min;
213             }
214              
215             =back
216              
217             =head1 ALGORITHM
218              
219             We use the hash as an approximate conservative metric to basically do
220             clipping of space. A box is one cell of the space defined by the grid
221             size. A region is a box and all the neighbouring boxes in all directions,
222             i.e. all the boxes b such that
223             d(b, c) <= 1 in the d-infinity metric
224             Noting that d(b, c) is always an integer in this case.
225              
226             +-+-+-+-+-+
227             | | | | | |
228             +-+-+-+-+-+
229             | |x|x|x| |
230             +-+-+-+-+-+
231             | |x|b|x| |
232             +-+-+-+-+-+
233             | |x|x|x| |
234             +-+-+-+-+-+
235             | | | | | |
236             +-+-+-+-+-+
237              
238             Now all points outside the region defined by the box b and the
239             neighbours x can not be within maximum radius $C of any point in box b.
240              
241             So we reverse the stunt and shove any point in box b into the hash
242             lists for all boxes b and x so that when testing a point in any box,
243             we have a list of all points in that box and any neighbouring boxes
244             (the region).
245              
246             =head1 AUTHOR
247              
248             Algorithm by Shevek, modularisation by Kake Pugh (kake@earth.li).
249              
250             =head1 COPYRIGHT
251              
252             Copyright (C) 2003 Kake Pugh. All Rights Reserved.
253              
254             This module is free software; you can redistribute it and/or modify it
255             under the same terms as Perl itself.
256              
257             =head1 CREDITS
258              
259             Shevek is utterly fab for telling me how to solve this problem. Greg
260             McCarroll is also fab for telling me what to call the module.
261              
262             =cut
263              
264             1;