File Coverage

blib/lib/Algorithm/KernelKMeans.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package Algorithm::KernelKMeans;
2              
3 2     2   897 use 5.010;
  2         5  
  2         68  
4 2     2   3140 use namespace::autoclean;
  2         101450  
  2         12  
5 2     2   2108 use version;
  2         4567  
  2         13  
6 2     2   5028 use Moose;
  0            
  0            
7             use UNIVERSAL::require;
8              
9             our $VERSION = '0.03_02';
10              
11             our $IMPLEMENTATION;
12             for my $impl (qw/XS PP/) {
13             my $impl_class = __PACKAGE__ . '::' . $impl;
14             if ($impl_class->require) {
15             next if $impl eq 'XS'
16             and version->parse($impl_class->VERSION) < version->parse('0.02_02');
17             $IMPLEMENTATION = $impl_class;
18             extends $IMPLEMENTATION;
19             last;
20             }
21             }
22              
23             __PACKAGE__->meta->make_immutable;
24              
25             __END__
26              
27             =head1 NAME
28              
29             Algorithm::KernelKMeans - Weighted kernel k-means clusterer
30              
31             =head1 SYNOPSIS
32              
33             use Algorithm::KernelKMeans;
34             use Algorithm::KernelKMeans::Util qw/$KERNEL_POLYNOMINAL/;
35             use List::MoreUtils qw/zip/;
36             use Try::Tiny;
37            
38             my @vectors = map {
39             my @values = split /\s/;
40             my @keys = 0 .. $#values;
41             +{ zip @keys, @values };
42             } (<>);
43             my $wkkm = Algorithm::KernelKMeans->new( # default weights are 1
44             vectors => \@vectors,
45             kernel => [$KERNEL_POLYNOMINAL => (1, 2)] # K(x1, x2) = (1 + x1x2)^2
46             );
47            
48             try {
49             my $clusters = $wkkm->run(k => 6);
50             for my $cluster (@$clusters) {
51             ...
52             }
53             } catch {
54             # during iteration, number of clusters became less than k_min
55             if (/number of clusters/i) { ... }
56             }
57              
58             =head1 DESCRIPTION
59              
60             C<Algorithm::KernelKMeans> provides weighted kernel k-means vector clusterer.
61              
62             Note that this is a very early release. All APIs may be changed incompatibly.
63              
64             =head2 IMPLEMENTATION
65              
66             This class is just a placeholder. Implementation code is in other class and this class just inherits it.
67              
68             Currently there are 2 implementations: L<Algorithm::KernelKMeans::PP> and L<Algorithm::KernelKMeans::XS>.
69              
70             C<$Algorithm::KernelKMeans::IMPLEMENTATION> indicates which implementation is loaded.
71              
72             Both of these implements same interface (documented below) and C<Algorithm::KernelKMeans> uses faster (XS) implementation if it's available.
73             So it's not necessary usually to use the classes directly tough, you can do it if you want.
74              
75             =head1 METHODS
76              
77             =head2 new(%opts)
78              
79             Constructor. you can specify options below:
80              
81             =head3 vectors
82              
83             Required. Array of vectors.
84             Each vector is represented as an hash of positive real numbers.
85              
86             e.g.:
87              
88             my $wkkm = Algorithm::KernelKMeans->new(
89             vectors => [ +{ prop1 => 229, prop2 => 151, prop3 => 42 },
90             +{ prop1 => 61, prop2 => 151, prop4 => 251 },
91             +{ prop2 => 11, prop3 => 120, prop4 => 55 } ],
92             kernel => [$KERNEL_POLYNOMINAL => (1, 2)]
93             );
94              
95             =head3 weights
96              
97             Array of positive real numbers. Defaults to list of 1s.
98              
99             =head3 kernel
100              
101             Function projects 2 vectors into higher dimentional space and computes inner product.
102              
103             Kernel function can be specified as a tuple or a code reference.
104              
105             Tuple is formed with descriptor and parameter(s). For example:
106              
107             [$KERNEL_POLYNOMINAL => (1, 2)]
108              
109             C<$KERNEL_POLYNOMINAL> is a descriptor. And rest of the elements are parameters.
110              
111             L<Algorithm::KernelKMeans::Util> has some descriptors for some popular kernel functions.
112              
113             =head3 kernel_matrix
114              
115             2D array of kernel values.
116              
117             A matrix whose element at (i, j) is K(xi, xj) where i >= j.
118             This is derived automatically from C<kernel> by default, however you can specify it manually if you already have it.
119              
120             Note that the clusterer only uses lower triangle part of the matrix.
121             So it is not necessary for the matrix to have element at (i, j) where i < j.
122              
123             Note that C<kernel> and C<kernel_matrix> are exclusive and either of these is required.
124              
125             =head2 run(%opts)
126              
127             Executes clustering. Return value is an array ref of clusters.
128              
129             =head3 k
130              
131             Required. (maximum) number of clusters.
132              
133             =head3 k_min
134              
135             Some clusters may be empty during clustering.
136             In the case, the clusterer just removes the empty clusters and checks number of rest clusters. If it is less than C<k_min>, the clusterer throws an error.
137              
138             Default is same as C<k>.
139              
140             =head3 initializer
141              
142             Specifies cluster initializing method.
143             By default, the clusterer initializes clusters using KKZ, which is known as a good initializing procedure.
144              
145             You can C<import> some initializer descriptors from C<Algorithm::KernelKMeans::Util>.
146              
147             =head3 converged
148              
149             Function predicates that clustering is converged.
150             Iteration is broken off and returns result when the predicate returns true.
151              
152             For each iteration, 2 values will be specified:
153             objective function value of current clusters and new clusters' one.
154             As clusters converges, the value decreases.
155              
156             Default predicate just checks if these 2 values are equal.
157              
158             =head2 cluster_indices(%opts)
159              
160             This method is similar to C<run>, but returns clusters contain indices instead of vectors.
161              
162             =head1 AUTHOR
163              
164             Koichi SATOH E<lt>sekia@cpan.orgE<gt>
165              
166             =head1 SEE ALSO
167              
168             L<Algorithm::KernelKMeans::PP> - Default implementation
169              
170             L<Algorithm::KernelKMeans::XS> - Yet another implementation. Fast!
171              
172             =head1 LICENSE
173              
174             The MIT License
175              
176             Copyright (C) 2010 by Koichi SATOH
177              
178             Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
179              
180             The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
181              
182             THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
183              
184             =cut