line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Algorithm::KernelKMeans::Util; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
940
|
use 5.010; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
40
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
33
|
|
5
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
34
|
|
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
942
|
use Attribute::Constant; |
|
1
|
|
|
|
|
21387
|
|
|
1
|
|
|
|
|
34
|
|
8
|
1
|
|
|
1
|
|
900
|
use Exporter::Lite; |
|
1
|
|
|
|
|
713
|
|
|
1
|
|
|
|
|
6
|
|
9
|
1
|
|
|
1
|
|
54
|
use List::Util qw/sum/; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
162
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our @EXPORT_OK = qw/centroid |
12
|
|
|
|
|
|
|
diff_vector |
13
|
|
|
|
|
|
|
inner_product |
14
|
|
|
|
|
|
|
euclidean_distance |
15
|
|
|
|
|
|
|
$KERNEL_POLYNOMINAL $KERNEL_GAUSSIAN $KERNEL_SIGMOID |
16
|
|
|
|
|
|
|
$INITIALIZE_SIMPLE $INITIALIZE_SHUFFLE $INITIALIZE_KKZ/; |
17
|
|
|
|
|
|
|
|
18
|
1
|
|
|
1
|
|
8
|
our $KERNEL_POLYNOMINAL : Constant(0); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
6
|
|
19
|
1
|
|
|
1
|
|
197
|
our $KERNEL_GAUSSIAN : Constant(1); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5
|
|
20
|
1
|
|
|
1
|
|
161
|
our $KERNEL_SIGMOID : Constant(2); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
4
|
|
21
|
|
|
|
|
|
|
|
22
|
1
|
|
|
1
|
|
178
|
our $INITIALIZE_SIMPLE : Constant(0); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5
|
|
23
|
1
|
|
|
1
|
|
159
|
our $INITIALIZE_SHUFFLE : Constant(1); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
4
|
|
24
|
1
|
|
|
1
|
|
309
|
our $INITIALIZE_KKZ : Constant(2); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
6
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub centroid { |
27
|
2
|
|
|
2
|
1
|
47932
|
my $cluster = shift; |
28
|
2
|
|
|
|
|
6
|
my %centroid; |
29
|
2
|
|
|
|
|
6
|
for my $vector (@$cluster) { |
30
|
6
|
|
|
|
|
27
|
while (my ($key, $val) = each %$vector) { |
31
|
18
|
|
100
|
|
|
56
|
$centroid{$key} //= 0; |
32
|
18
|
|
|
|
|
54
|
$centroid{$key} += $val; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
} |
35
|
2
|
|
|
|
|
7
|
for my $key (keys %centroid) { $centroid{$key} /= @$cluster } |
|
10
|
|
|
|
|
18
|
|
36
|
2
|
|
|
|
|
11
|
return \%centroid; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub inner_product { |
40
|
1
|
|
|
1
|
1
|
2
|
my ($x1, $x2) = @_; |
41
|
1
|
|
|
|
|
4
|
my @common_keys = grep { exists $x2->{$_} } keys %$x1; |
|
2
|
|
|
|
|
7
|
|
42
|
1
|
50
|
|
|
|
5
|
return 0 if @common_keys == 0; |
43
|
1
|
|
|
|
|
3
|
sum map { $x1->{$_} * $x2->{$_} } @common_keys; |
|
2
|
|
|
|
|
29
|
|
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub diff_vector { |
47
|
2
|
|
|
2
|
0
|
14
|
my ($v, $u) = @_; |
48
|
2
|
|
|
|
|
2
|
my %tmp; @tmp{keys %$v, keys %$u} = (); |
|
2
|
|
|
|
|
7
|
|
49
|
5
|
|
100
|
|
|
25
|
my %diff = map { |
|
|
|
100
|
|
|
|
|
50
|
2
|
|
|
|
|
5
|
my ($e1, $e2) = (($v->{$_} // 0), ($u->{$_} // 0)); |
51
|
5
|
|
|
|
|
16
|
($_ => $e1 - $e2); |
52
|
|
|
|
|
|
|
} keys %tmp; |
53
|
2
|
|
|
|
|
7
|
\%diff; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub euclidean_distance { |
57
|
1
|
|
|
1
|
1
|
10
|
my ($v, $u) = @_; |
58
|
1
|
|
|
|
|
4
|
my $sub = diff_vector($v, $u); |
59
|
1
|
|
|
|
|
4
|
sqrt inner_product($sub, $sub); |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
1; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
__END__ |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 NAME |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Algorithm::KernelKMeans::Util |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head1 DESCRIPTION |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
This module provides some constants and functions suitable to use with C<Algorithm::KernelKMeans>. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head1 CONSTANTS |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Constants listed below represent kernels/initializers, which some methods require. |
77
|
|
|
|
|
|
|
It's recommended to use these constants instead of code reference, because clusterer implementations might have its own kernel/initializer code. |
78
|
|
|
|
|
|
|
So code references should be used iff there's no equivalent constants. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
All constants are C<import()>able. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head2 $KERNEL_POLYNOMINAL |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Polynominal kernel. Takes 2 parameters ($l, $p) then will be formed like C<K(x1, x2) = ($l + x1 . x2)^$p, where "x1 . x2" represents inner product>. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head2 $KERNEL_GAUSSIAN |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Gaussian kernel. Takes 1 parameter ($sigma). |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
C<K(x1, x2) = exp(-||x1 - x2||^2 / (2 * $sigma)^2)> |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head2 $KERNEL_SIGMOID |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
Sigmoid kernel. Takes 2 parameters ($s, $theta). |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
C<K(x1, x2) = tanh($s * (x1 . x2) + $theta)> |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head2 $INITIALIZE_SIMPLE |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head2 $INITIALIZE_SHUFFLE |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head2 $INITIALIZE_KKZ |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head1 FUNCTIONS |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
This module exports nothing by default. You can C<import> functions below: |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head2 centroid($cluster) |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Takes array ref of vectors and returns centroid vector of the cluster. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head2 inner_product($v, $u) |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Calculates inner product of C<$v> and C<$u>. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=head2 euclidean_distance($v, $u) |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
Computes euclidean distance between C<$v> and C<$u>. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head1 AUTHOR |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Koichi SATOH E<lt>r.sekia@gmail.comE<gt> |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=cut |