File Coverage

blib/lib/Algorithm/DistanceMatrix.pm
Criterion Covered Total %
statement 2 4 50.0
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 4 6 66.6


line stmt bran cond sub pod time code
1             #!/usr/bin/env perl
2             # ABSTRACT: Compute distance matrix for any distance metric
3              
4             package Algorithm::DistanceMatrix;
5             BEGIN {
6 1     1   47086 $Algorithm::DistanceMatrix::VERSION = '0.04';
7             }
8 1     1   1811 use Moose;
  0            
  0            
9              
10             has 'mode' =>(
11             is => 'rw',
12             isa => 'Str',
13             default => 'lower',
14             );
15              
16            
17             has 'metric' => (
18             is=>'rw',
19             isa=>'CodeRef',
20             default=>sub{abs($_[0]-$_[1])},
21             );
22              
23              
24             has 'objects' => (
25             is => 'rw',
26             isa => 'ArrayRef',
27             );
28            
29            
30             sub distancematrix {
31             my ($self, ) = @_;
32             # Callback function
33             my $metric = $self->metric;
34             my $objects = $self->objects;
35             my $n = @$objects;
36             my $distances = [];
37             for (my $i = 0; $i < $n; $i++) {
38             # This initialization is required to prevent 'undef' at [0,0],
39             $distances->[$i] ||= [];
40             # Diagonal or full matrix?
41             my $start = $self->mode =~ /full/i ? 0 : $i+1;
42             for (my $j = $start; $j < $n; $j++) {
43             # Use a pointer, then determine if it's row-major or col-major order
44             # Swap i and j if lower diagonal (default)
45             my $ref = $self->mode =~ /lower/i ?
46             \$distances->[$j][$i] : \$distances->[$i][$j];
47             # Callback function provides the distance
48             $$ref = $metric->($objects->[$i], $objects->[$j]);
49             }
50             }
51             # Last diagonal element is undef, unless explicitly computed
52             $distances->[$n-1] = [(undef)x$n] if $self->mode =~ /upper/i;
53             return $distances;
54             }
55              
56              
57             __PACKAGE__->meta->make_immutable;
58             no Moose;
59             1;
60             __END__
61             =pod
62              
63             =head1 NAME
64              
65             Algorithm::DistanceMatrix - Compute distance matrix for any distance metric
66              
67             =head1 VERSION
68              
69             version 0.04
70              
71             =head1 SYNOPSIS
72              
73             use Algorithm::DistanceMatrix;
74             my $m = Algorithm::DistanceMatrix->new(
75             metric=>\&mydistance,objects=\@myarray);
76             my $distmatrix = $m->distancematrix;
77            
78             use Algorithm::Cluster qw/treecluster/;
79             # method=>
80             # s: single-linkage clustering
81             # http://en.wikipedia.org/wiki/Single-linkage_clustering
82             # m: maximum- (or complete-) linkage clustering
83             # http://en.wikipedia.org/wiki/Complete_linkage_clustering
84             # a: average-linkage clustering (UPGMA)
85             # http://en.wikipedia.org/wiki/UPGMA
86            
87             my $tree = treecluster(data=>$distmat, method=>'a');
88            
89             # Get your objects and the cluster IDs they belong to, assuming 5 clusters
90             my $cluster_ids = $tree->cut(5);
91             # Index corresponds to that of the original objects
92             print $objects->[2], ' belongs to cluster ', $cluster_ids->[2], "\n";
93              
94             =head1 DESCRIPTION
95              
96             This is a small helper package for L<Algorithm::Cluster>. That module provides
97             many facilities for clustering data. It also provides a C<distancematrix> function,
98             but assumes tabular data, which is the standard for gene expression data.
99              
100             If your data is tabular, you should first have a look at C<distancematrix> in
101             L<Algorithm::Cluster>
102              
103             http://cpansearch.perl.org/src/MDEHOON/Algorithm-Cluster-1.48/doc/cluster.pdf
104              
105             Otherwise, this package provides a simple distance matrix, given an arbitrary
106             distance function. It does not assume anything about your data. You simply
107             provide a callback function for measuring the distance between any two objects.
108             It produces a lower diagonal (by default) distance matrix that is fit to be used
109             by the clustering algorithms of L<Algorithm::Cluster>.
110              
111             =head1 NAME
112              
113             Algorithm::DistanceMatrix - Compute distance matrix for any distance metric
114              
115             =head1 VERSION
116              
117             version 0.04
118              
119             =head1 METHODS
120              
121             =head2 mode
122              
123             One of C<qw/lower upper full/> for a lower diagonal, upper diagonal, or full
124             distance matrix.
125              
126             =head2 metric
127              
128             Callback for computing the distance, similarity, or whatever measure you like.
129              
130             $matrix->metric(\@mydistance);
131              
132             Where C<mydistance> receives two objects as it's first two arguments.
133              
134             If you need to pass special parameters to your method:
135              
136             $matrix->metric(sub{my($x,$y)=@_;mydistance(first=>$x,second=>$y,mode=>'fast')};
137              
138             You may use any metric, and may return any number or object. Note that if you
139             plan to use this with L<Algorithm::Cluster> this needs to be a distance metric.
140             So, if you're measure how similar two things are, on a scale of 1-10, then you
141             should return C<10-$similarity> to get a distance.
142              
143             Default is the absolute values of the scalar difference (i.e. C<abs(X-Y)>)
144              
145             =head2 objects
146              
147             Array reference. Doesn't matter what kind of objects are in the array, as long
148             as your C<metric> can process them.
149              
150             =head2 distancematrix
151              
152             2D array of distances (or similarities, or whatever) between your objects.
153              
154             (An ArrayRef of ArrayRefs.)
155              
156             =head1 AUTHOR
157              
158             Chad A. Davis <chad.a.davis@gmail.com>
159              
160             =head1 COPYRIGHT AND LICENSE
161              
162             This software is copyright (c) 2011 by Chad A. Davis.
163              
164             This is free software; you can redistribute it and/or modify it under
165             the same terms as the Perl 5 programming language system itself.
166              
167             =cut
168