| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# GENERATED WITH PDL::PP from Cluster.pd! Don't modify! |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
package PDL::Cluster; |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our @EXPORT_OK = qw(cmean cmedian calculate_weights clusterdistance distancematrix getclustercentroids getclustermean getclustermedian getclustermedoids kcluster kmedoids treecluster treeclusterd cuttree somcluster pca rowdistances clusterdistances clustersizes clusterelements clusterelementmask clusterdistancematrix clusterenc clusterdec clusteroffsets clusterdistancematrixenc clusterdistancesenc getclusterwsum attachtonearest attachtonearestd checkprototypes checkpartitions randomprototypes randompartition ); |
|
7
|
|
|
|
|
|
|
our %EXPORT_TAGS = (Func=>\@EXPORT_OK); |
|
8
|
|
|
|
|
|
|
|
|
9
|
9
|
|
|
9
|
|
2947935
|
use PDL::Core; |
|
|
9
|
|
|
|
|
25
|
|
|
|
9
|
|
|
|
|
70
|
|
|
10
|
9
|
|
|
9
|
|
3786
|
use PDL::Exporter; |
|
|
9
|
|
|
|
|
21
|
|
|
|
9
|
|
|
|
|
101
|
|
|
11
|
9
|
|
|
9
|
|
433
|
use DynaLoader; |
|
|
9
|
|
|
|
|
19
|
|
|
|
9
|
|
|
|
|
15340
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our $VERSION = '1.54.004'; |
|
15
|
|
|
|
|
|
|
our @ISA = ( 'PDL::Exporter','DynaLoader' ); |
|
16
|
|
|
|
|
|
|
push @PDL::Core::PP, __PACKAGE__; |
|
17
|
|
|
|
|
|
|
bootstrap PDL::Cluster $VERSION; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#line 14 "Cluster.pd" |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
#--------------------------------------------------------------------------- |
|
29
|
|
|
|
|
|
|
# File: PDL::Cluster.pm |
|
30
|
|
|
|
|
|
|
# Author: Bryan Jurish |
|
31
|
|
|
|
|
|
|
# Description: PDL wrappers for the C Clustering library. |
|
32
|
|
|
|
|
|
|
# |
|
33
|
|
|
|
|
|
|
# Copyright (c) 2005-2021 Bryan Jurish. All rights reserved. |
|
34
|
|
|
|
|
|
|
# This program is free software. You may modify and/or |
|
35
|
|
|
|
|
|
|
# distribute it under the same terms as Perl itself. |
|
36
|
|
|
|
|
|
|
# |
|
37
|
|
|
|
|
|
|
#--------------------------------------------------------------------------- |
|
38
|
|
|
|
|
|
|
# Based on the C clustering library for cDNA microarray data, |
|
39
|
|
|
|
|
|
|
# Copyright (C) 2002-2005 Michiel Jan Laurens de Hoon. |
|
40
|
|
|
|
|
|
|
# |
|
41
|
|
|
|
|
|
|
# The C clustering library was written at the Laboratory of DNA Information |
|
42
|
|
|
|
|
|
|
# Analysis, Human Genome Center, Institute of Medical Science, University of |
|
43
|
|
|
|
|
|
|
# Tokyo, 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan. |
|
44
|
|
|
|
|
|
|
# Contact: michiel.dehoon 'AT' riken.jp |
|
45
|
|
|
|
|
|
|
# |
|
46
|
|
|
|
|
|
|
# See the files "cluster.c" and "cluster.h" in the PDL::Cluster distribution |
|
47
|
|
|
|
|
|
|
# for details. |
|
48
|
|
|
|
|
|
|
#--------------------------------------------------------------------------- |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=pod |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 NAME |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
PDL::Cluster - PDL interface to the C Clustering Library |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
use PDL::Cluster; |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
##----------------------------------------------------- |
|
61
|
|
|
|
|
|
|
## Data Format |
|
62
|
|
|
|
|
|
|
$d = 42; ##-- number of features |
|
63
|
|
|
|
|
|
|
$n = 1024; ##-- number of data elements |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
$data = random($d,$n); ##-- data matrix |
|
66
|
|
|
|
|
|
|
$elt = $data->slice(",($i)"); ##-- element data vector |
|
67
|
|
|
|
|
|
|
$ftr = $data->slice("($j),"); ##-- feature vector over all elements |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
$wts = ones($d)/$d; ##-- feature weights |
|
70
|
|
|
|
|
|
|
$msk = ones($d,$n); ##-- missing-datum mask (1=ok) |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
##----------------------------------------------------- |
|
73
|
|
|
|
|
|
|
## Library Utilties |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
$mean = $ftr->cmean(); |
|
76
|
|
|
|
|
|
|
$median = $ftr->cmedian(); |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
calculate_weights($data,$msk,$wts, $cutoff,$expnt, |
|
79
|
|
|
|
|
|
|
$weights); |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
##----------------------------------------------------- |
|
82
|
|
|
|
|
|
|
## Distance Functions |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
clusterdistance($data,$msk,$wts, $n1,$n2,$idx1,$idx2, |
|
85
|
|
|
|
|
|
|
$dist, |
|
86
|
|
|
|
|
|
|
$distFlag, $methodFlag2); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
distancematrix($data,$msk,$wts, $distmat, $distFlag); |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
##----------------------------------------------------- |
|
91
|
|
|
|
|
|
|
## Partitioning Algorithms |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
getclustermean($data,$msk,$clusterids, |
|
94
|
|
|
|
|
|
|
$ctrdata, $ctrmask); |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
getclustermedian($data,$msk,$clusterids, |
|
97
|
|
|
|
|
|
|
$ctrdata, $ctrmask); |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
getclustermedoid($distmat,$clusterids,$centroids, |
|
100
|
|
|
|
|
|
|
$errorsums); |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
kcluster($k, $data,$msk,$wts, $npass, |
|
103
|
|
|
|
|
|
|
$clusterids, $error, $nfound, |
|
104
|
|
|
|
|
|
|
$distFlag, $methodFlag); |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
kmedoids($k, $distmat,$npass, |
|
107
|
|
|
|
|
|
|
$clusterids, $error, $nfound); |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
##----------------------------------------------------- |
|
110
|
|
|
|
|
|
|
## Hierarchical Algorithms |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
treecluster($data,$msk,$wts, |
|
113
|
|
|
|
|
|
|
$tree, $lnkdist, |
|
114
|
|
|
|
|
|
|
$distFlag, $methodFlag); |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
treeclusterd($data,$msk,$wts, $distmat, |
|
117
|
|
|
|
|
|
|
$tree, $lnkdist, |
|
118
|
|
|
|
|
|
|
$distFlag, $methodFlag); |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
cuttree($tree, $nclusters, |
|
121
|
|
|
|
|
|
|
$clusterids); |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
##----------------------------------------------------- |
|
124
|
|
|
|
|
|
|
## Self-Organizing Maps |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
somcluster($data,$msk,$wts, $nx,$ny,$tau,$niter, |
|
127
|
|
|
|
|
|
|
$clusterids, |
|
128
|
|
|
|
|
|
|
$distFlag); |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
##----------------------------------------------------- |
|
131
|
|
|
|
|
|
|
## Principal Component Analysis |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
pca($U, $S, $V); |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
##----------------------------------------------------- |
|
136
|
|
|
|
|
|
|
## Extensions |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
rowdistances($data,$msk,$wts, $rowids1,$rowids2, $distvec, $distFlag); |
|
139
|
|
|
|
|
|
|
clusterdistances($data,$msk,$wts, $rowids, $index2, |
|
140
|
|
|
|
|
|
|
$dist, |
|
141
|
|
|
|
|
|
|
$distFlag, $methodFlag); |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
clustersizes($clusterids, $clustersizes); |
|
144
|
|
|
|
|
|
|
clusterelements($clustierids, $clustersizes, $eltids); |
|
145
|
|
|
|
|
|
|
clusterelementmask($clusterids, $eltmask); |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
clusterdistancematrix($data,$msk,$wts, |
|
148
|
|
|
|
|
|
|
$rowids, $clustersizes, $eltids, |
|
149
|
|
|
|
|
|
|
$dist, |
|
150
|
|
|
|
|
|
|
$distFlag, $methodFlag); |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
clusterenc($clusterids, $clens,$cvals,$crowids, $k); |
|
153
|
|
|
|
|
|
|
clusterdec($clens,$cvals,$crowids, $clusterids, $k); |
|
154
|
|
|
|
|
|
|
clusteroffsets($clusterids, $coffsets,$cvals,$crowids, $k); |
|
155
|
|
|
|
|
|
|
clusterdistancematrixenc($data,$msk,$wts, |
|
156
|
|
|
|
|
|
|
$clens1,$crowids1, $clens2,$crowids2, |
|
157
|
|
|
|
|
|
|
$dist, |
|
158
|
|
|
|
|
|
|
$distFlag, $methodFlag); |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=cut |
|
161
|
|
|
|
|
|
|
#line 162 "Cluster.pm" |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head1 FUNCTIONS |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=cut |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head2 cmean |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=for sig |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
Signature: (double a(n); double [o]b()) |
|
178
|
|
|
|
|
|
|
Types: (double) |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=for usage |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
$b = cmean($a); |
|
183
|
|
|
|
|
|
|
cmean($a, $b); # all arguments given |
|
184
|
|
|
|
|
|
|
$b = $a->cmean; # method call |
|
185
|
|
|
|
|
|
|
$a->cmean($b); |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=for ref |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
Computes arithmetic mean of the vector $a(). See also PDL::Primitive::avg(). |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=pod |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=for bad |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
C does not process bad values. |
|
198
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=cut |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
*cmean = \&PDL::cmean; |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=head2 cmedian |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=for sig |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
Signature: (double a(n); double [o]b()) |
|
217
|
|
|
|
|
|
|
Types: (double) |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=for usage |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
$b = cmedian($a); |
|
222
|
|
|
|
|
|
|
cmedian($a, $b); # all arguments given |
|
223
|
|
|
|
|
|
|
$b = $a->cmedian; # method call |
|
224
|
|
|
|
|
|
|
$a->cmedian($b); |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=for ref |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Computes median of the vector $a(). See also PDL::Primitive::median(). |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=pod |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=for bad |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
C does not process bad values. |
|
237
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=cut |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
*cmedian = \&PDL::cmedian; |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=head2 calculate_weights |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=for sig |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
Signature: ( |
|
256
|
|
|
|
|
|
|
double data(d,n); |
|
257
|
|
|
|
|
|
|
int mask(d,n); |
|
258
|
|
|
|
|
|
|
double weight(d); |
|
259
|
|
|
|
|
|
|
double cutoff(); |
|
260
|
|
|
|
|
|
|
double exponent(); |
|
261
|
|
|
|
|
|
|
double [o]oweights(d); |
|
262
|
|
|
|
|
|
|
; char *distFlag; |
|
263
|
|
|
|
|
|
|
) |
|
264
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
265
|
|
|
|
|
|
|
float double ldouble) |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=for usage |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
$oweights = calculate_weights($data, $mask, $weight, $cutoff, $exponent, $distFlag); |
|
270
|
|
|
|
|
|
|
calculate_weights($data, $mask, $weight, $cutoff, $exponent, $oweights, $distFlag); # all arguments given |
|
271
|
|
|
|
|
|
|
$oweights = $data->calculate_weights($mask, $weight, $cutoff, $exponent, $distFlag); # method call |
|
272
|
|
|
|
|
|
|
$data->calculate_weights($mask, $weight, $cutoff, $exponent, $oweights, $distFlag); |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
This function calculates weights for the features using the weighting scheme |
|
275
|
|
|
|
|
|
|
proposed by Michael Eisen: |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
w[i] = 1.0 / sum_{j where dist(i,j)
|
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
where the cutoff and the exponent are specified by the user. |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=pod |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=for bad |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
C does not process bad values. |
|
288
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=cut |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
*calculate_weights = \&PDL::calculate_weights; |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=head2 clusterdistance |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=for sig |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Signature: ( |
|
307
|
|
|
|
|
|
|
double data(d,n); |
|
308
|
|
|
|
|
|
|
int mask(d,n); |
|
309
|
|
|
|
|
|
|
double weight(d); |
|
310
|
|
|
|
|
|
|
int n1(); |
|
311
|
|
|
|
|
|
|
int n2(); |
|
312
|
|
|
|
|
|
|
int index1(n1); |
|
313
|
|
|
|
|
|
|
int index2(n2); |
|
314
|
|
|
|
|
|
|
double [o]dist(); |
|
315
|
|
|
|
|
|
|
; |
|
316
|
|
|
|
|
|
|
char *distFlag; |
|
317
|
|
|
|
|
|
|
char *methodFlag; |
|
318
|
|
|
|
|
|
|
) |
|
319
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
320
|
|
|
|
|
|
|
float double ldouble) |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=for usage |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
$dist = clusterdistance($data, $mask, $weight, $n1, $n2, $index1, $index2, $distFlag, $methodFlag); |
|
325
|
|
|
|
|
|
|
clusterdistance($data, $mask, $weight, $n1, $n2, $index1, $index2, $dist, $distFlag, $methodFlag); # all arguments given |
|
326
|
|
|
|
|
|
|
$dist = $data->clusterdistance($mask, $weight, $n1, $n2, $index1, $index2, $distFlag, $methodFlag); # method call |
|
327
|
|
|
|
|
|
|
$data->clusterdistance($mask, $weight, $n1, $n2, $index1, $index2, $dist, $distFlag, $methodFlag); |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
Computes distance between two clusters $index1() and $index2(). |
|
330
|
|
|
|
|
|
|
Each of the $index() vectors represents a single cluster whose values |
|
331
|
|
|
|
|
|
|
are the row-indices in the $data() matrix of the elements assigned |
|
332
|
|
|
|
|
|
|
to the respective cluster. $n1() and $n2() are the number of elements |
|
333
|
|
|
|
|
|
|
in $index1() and $index2(), respectively. Each $index$i() must have |
|
334
|
|
|
|
|
|
|
at least $n$i() elements allocated. |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
B the $methodFlag argument is interpreted differently than |
|
337
|
|
|
|
|
|
|
by the treecluster() method, namely: |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=over 4 |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=item a |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Distance between the arithmetic means of the two clusters, |
|
344
|
|
|
|
|
|
|
as for treecluster() "f". |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
=item m |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
Distance between the medians of the two clusters, |
|
349
|
|
|
|
|
|
|
as for treecluster() "c". |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=item s |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Minimum pairwise distance between members of the two clusters, |
|
354
|
|
|
|
|
|
|
as for treecluster() "s". |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=item x |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Maximum pairwise distance between members of the two clusters |
|
359
|
|
|
|
|
|
|
as for treecluster() "m". |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=item v |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
Average of the pairwise distances between members of the two clusters, |
|
364
|
|
|
|
|
|
|
as for treecluster() "a". |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=back |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=pod |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=for bad |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
C does not process bad values. |
|
375
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=cut |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
*clusterdistance = \&PDL::clusterdistance; |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=head2 distancematrix |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=for sig |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Signature: ( |
|
394
|
|
|
|
|
|
|
double data(d,n); |
|
395
|
|
|
|
|
|
|
int mask(d,n); |
|
396
|
|
|
|
|
|
|
double weight(d); |
|
397
|
|
|
|
|
|
|
double [o]dists(n,n); |
|
398
|
|
|
|
|
|
|
; char *distFlag; |
|
399
|
|
|
|
|
|
|
) |
|
400
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
401
|
|
|
|
|
|
|
float double ldouble) |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=for usage |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
$dists = distancematrix($data, $mask, $weight, $distFlag); |
|
406
|
|
|
|
|
|
|
distancematrix($data, $mask, $weight, $dists, $distFlag); # all arguments given |
|
407
|
|
|
|
|
|
|
$dists = $data->distancematrix($mask, $weight, $distFlag); # method call |
|
408
|
|
|
|
|
|
|
$data->distancematrix($mask, $weight, $dists, $distFlag); |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=for ref |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
Compute triangular distance matrix over all data points. |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=pod |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=for bad |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
C does not process bad values. |
|
421
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=cut |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
*distancematrix = \&PDL::distancematrix; |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=head2 getclustercentroids |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=for sig |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
Signature: ( |
|
440
|
|
|
|
|
|
|
double data(d,n); |
|
441
|
|
|
|
|
|
|
int mask(d,n); |
|
442
|
|
|
|
|
|
|
int clusterids(n); |
|
443
|
|
|
|
|
|
|
double [o]cdata(d,k); |
|
444
|
|
|
|
|
|
|
int [o]cmask(d,k); |
|
445
|
|
|
|
|
|
|
; char *ctrMethodFlag; |
|
446
|
|
|
|
|
|
|
) |
|
447
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
448
|
|
|
|
|
|
|
float double ldouble) |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=for usage |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
($cdata, $cmask) = getclustercentroids($data, $mask, $clusterids, $ctrMethodFlag); |
|
453
|
|
|
|
|
|
|
getclustercentroids($data, $mask, $clusterids, $cdata, $cmask, $ctrMethodFlag); # all arguments given |
|
454
|
|
|
|
|
|
|
($cdata, $cmask) = $data->getclustercentroids($mask, $clusterids, $ctrMethodFlag); # method call |
|
455
|
|
|
|
|
|
|
$data->getclustercentroids($mask, $clusterids, $cdata, $cmask, $ctrMethodFlag); |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=for ref |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Find cluster centroids by arithmetic mean (C) or median over each dimension (C). |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=pod |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=for bad |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
C does not process bad values. |
|
468
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
*getclustercentroids = \&PDL::getclustercentroids; |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
#line 589 "Cluster.pd" |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
=pod |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=head2 getclustermean |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=for sig |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
Signature: ( |
|
490
|
|
|
|
|
|
|
double data(d,n); |
|
491
|
|
|
|
|
|
|
int mask(d,n); |
|
492
|
|
|
|
|
|
|
int clusterids(n); |
|
493
|
|
|
|
|
|
|
double [o]cdata(d,k); |
|
494
|
|
|
|
|
|
|
int [o]cmask(d,k); |
|
495
|
|
|
|
|
|
|
) |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
Really just a wrapper for getclustercentroids(...,"a"). |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=cut |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
sub getclustermean { |
|
502
|
|
|
|
|
|
|
my ($data,$mask,$cids,$cdata,$cmask) = @_; |
|
503
|
|
|
|
|
|
|
return getclustercentroids($dat,$mask,$cids,$cdata,$cmask,'a'); |
|
504
|
|
|
|
|
|
|
} |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
#line 620 "Cluster.pd" |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=pod |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=head2 getclustermedian |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=for sig |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
Signature: ( |
|
515
|
|
|
|
|
|
|
double data(d,n); |
|
516
|
|
|
|
|
|
|
int mask(d,n); |
|
517
|
|
|
|
|
|
|
int clusterids(n); |
|
518
|
|
|
|
|
|
|
double [o]cdata(d,k); |
|
519
|
|
|
|
|
|
|
int [o]cmask(d,k); |
|
520
|
|
|
|
|
|
|
) |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
Really just a wrapper for getclustercentroids(...,"m"). |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=cut |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
sub getclustermedian { |
|
527
|
|
|
|
|
|
|
my ($data,$mask,$cids,$cdata,$cmask) = @_; |
|
528
|
|
|
|
|
|
|
return getclustercentroids($dat,$mask,$cids,$cdata,$cmask,'m'); |
|
529
|
|
|
|
|
|
|
} |
|
530
|
|
|
|
|
|
|
#line 531 "Cluster.pm" |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=head2 getclustermedoids |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=for sig |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Signature: ( |
|
538
|
|
|
|
|
|
|
double distance(n,n); |
|
539
|
|
|
|
|
|
|
int clusterids(n); |
|
540
|
|
|
|
|
|
|
int [o]centroids(k); |
|
541
|
|
|
|
|
|
|
double [o]errors(k); |
|
542
|
|
|
|
|
|
|
) |
|
543
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
544
|
|
|
|
|
|
|
float double ldouble) |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=for usage |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
($centroids, $errors) = getclustermedoids($distance, $clusterids); |
|
549
|
|
|
|
|
|
|
getclustermedoids($distance, $clusterids, $centroids, $errors); # all arguments given |
|
550
|
|
|
|
|
|
|
($centroids, $errors) = $distance->getclustermedoids($clusterids); # method call |
|
551
|
|
|
|
|
|
|
$distance->getclustermedoids($clusterids, $centroids, $errors); |
|
552
|
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
The getclustermedoid routine calculates the cluster centroids, given to which |
|
554
|
|
|
|
|
|
|
cluster each element belongs. The centroid is defined as the element with the |
|
555
|
|
|
|
|
|
|
smallest sum of distances to the other elements. |
|
556
|
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
=pod |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=for bad |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
C does not process bad values. |
|
564
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
565
|
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=cut |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
*getclustermedoids = \&PDL::getclustermedoids; |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
=head2 kcluster |
|
579
|
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
=for sig |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
Signature: ( |
|
583
|
|
|
|
|
|
|
int nclusters(); |
|
584
|
|
|
|
|
|
|
double data(d,n); |
|
585
|
|
|
|
|
|
|
int mask(d,n); |
|
586
|
|
|
|
|
|
|
double weight(d); |
|
587
|
|
|
|
|
|
|
int npass(); |
|
588
|
|
|
|
|
|
|
int [o]clusterids(n); |
|
589
|
|
|
|
|
|
|
double [o]error(); |
|
590
|
|
|
|
|
|
|
int [o]nfound(); |
|
591
|
|
|
|
|
|
|
; |
|
592
|
|
|
|
|
|
|
char *distFlag; |
|
593
|
|
|
|
|
|
|
char *ctrMethodFlag; |
|
594
|
|
|
|
|
|
|
) |
|
595
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
596
|
|
|
|
|
|
|
float double ldouble) |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=for usage |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
($clusterids, $error, $nfound) = kcluster($nclusters, $data, $mask, $weight, $npass, $distFlag, $ctrMethodFlag); |
|
601
|
|
|
|
|
|
|
kcluster($nclusters, $data, $mask, $weight, $npass, $clusterids, $error, $nfound, $distFlag, $ctrMethodFlag); # all arguments given |
|
602
|
|
|
|
|
|
|
($clusterids, $error, $nfound) = $nclusters->kcluster($data, $mask, $weight, $npass, $distFlag, $ctrMethodFlag); # method call |
|
603
|
|
|
|
|
|
|
$nclusters->kcluster($data, $mask, $weight, $npass, $clusterids, $error, $nfound, $distFlag, $ctrMethodFlag); |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
K-Means clustering algorithm. The "ctrMethodFlag" determines how |
|
606
|
|
|
|
|
|
|
clusters centroids are to be computed; see getclustercentroids() for details. |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
Because the C library code reads from the C if and only if |
|
609
|
|
|
|
|
|
|
C is 0, before writing to it, it would be inconvenient to |
|
610
|
|
|
|
|
|
|
set it to C<[io]>. However for efficiency reasons, as of 2.096, PDL |
|
611
|
|
|
|
|
|
|
will not convert it (force a read-back on the conversion) for you |
|
612
|
|
|
|
|
|
|
if you pass in the wrongly-typed data. This means that you should |
|
613
|
|
|
|
|
|
|
be careful to pass in C data of the right size if you set C |
|
614
|
|
|
|
|
|
|
to 0. |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
See also: kmedoids(). |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=pod |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=for bad |
|
623
|
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
C does not process bad values. |
|
625
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=cut |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
*kcluster = \&PDL::kcluster; |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=head2 kmedoids |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=for sig |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
Signature: ( |
|
644
|
|
|
|
|
|
|
int nclusters(); |
|
645
|
|
|
|
|
|
|
double distance(n,n); |
|
646
|
|
|
|
|
|
|
int npass(); |
|
647
|
|
|
|
|
|
|
int [o]clusterids(n); |
|
648
|
|
|
|
|
|
|
double [o]error(); |
|
649
|
|
|
|
|
|
|
int [o]nfound(); |
|
650
|
|
|
|
|
|
|
) |
|
651
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
652
|
|
|
|
|
|
|
float double ldouble) |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=for usage |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
($clusterids, $error, $nfound) = kmedoids($nclusters, $distance, $npass); |
|
657
|
|
|
|
|
|
|
kmedoids($nclusters, $distance, $npass, $clusterids, $error, $nfound); # all arguments given |
|
658
|
|
|
|
|
|
|
($clusterids, $error, $nfound) = $nclusters->kmedoids($distance, $npass); # method call |
|
659
|
|
|
|
|
|
|
$nclusters->kmedoids($distance, $npass, $clusterids, $error, $nfound); |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
K-Medoids clustering algorithm (uses distance matrix). |
|
662
|
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
See also: kcluster(). |
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=pod |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=for bad |
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
C does not process bad values. |
|
672
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=cut |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
*kmedoids = \&PDL::kmedoids; |
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=head2 treecluster |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
=for sig |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
Signature: ( |
|
691
|
|
|
|
|
|
|
double data(d,n); |
|
692
|
|
|
|
|
|
|
int mask(d,n); |
|
693
|
|
|
|
|
|
|
double weight(d); |
|
694
|
|
|
|
|
|
|
int [o]tree(2,n); |
|
695
|
|
|
|
|
|
|
double [o]lnkdist(n); |
|
696
|
|
|
|
|
|
|
; |
|
697
|
|
|
|
|
|
|
char *distFlag; |
|
698
|
|
|
|
|
|
|
char *methodFlag; |
|
699
|
|
|
|
|
|
|
) |
|
700
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
701
|
|
|
|
|
|
|
float double ldouble) |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
=for usage |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
($tree, $lnkdist) = treecluster($data, $mask, $weight, $distFlag, $methodFlag); |
|
706
|
|
|
|
|
|
|
treecluster($data, $mask, $weight, $tree, $lnkdist, $distFlag, $methodFlag); # all arguments given |
|
707
|
|
|
|
|
|
|
($tree, $lnkdist) = $data->treecluster($mask, $weight, $distFlag, $methodFlag); # method call |
|
708
|
|
|
|
|
|
|
$data->treecluster($mask, $weight, $tree, $lnkdist, $distFlag, $methodFlag); |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
Hierachical agglomerative clustering. |
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
$tree(2,n) represents the clustering solution. |
|
713
|
|
|
|
|
|
|
Each row in the matrix describes one linking event, |
|
714
|
|
|
|
|
|
|
with the two columns containing the name of the nodes that were joined. |
|
715
|
|
|
|
|
|
|
The original genes are numbered 0..(n-1), nodes are numbered |
|
716
|
|
|
|
|
|
|
-1..-(n-1). |
|
717
|
|
|
|
|
|
|
$tree(2,n) thus actually uses only (2,n-1) cells. |
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
$lnkdist(n) represents the distance between the two subnodes that were joined. |
|
720
|
|
|
|
|
|
|
As for $tree(), $lnkdist() uses only (n-1) cells. |
|
721
|
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
=pod |
|
723
|
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
725
|
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=for bad |
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
C does not process bad values. |
|
729
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
730
|
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
=cut |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
*treecluster = \&PDL::treecluster; |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
=head2 treeclusterd |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
=for sig |
|
746
|
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
Signature: ( |
|
748
|
|
|
|
|
|
|
double data(d,n); |
|
749
|
|
|
|
|
|
|
int mask(d,n); |
|
750
|
|
|
|
|
|
|
double weight(d); |
|
751
|
|
|
|
|
|
|
double distances(n,n); |
|
752
|
|
|
|
|
|
|
int [o]tree(2,n); |
|
753
|
|
|
|
|
|
|
double [o]lnkdist(n); |
|
754
|
|
|
|
|
|
|
; |
|
755
|
|
|
|
|
|
|
char *distFlag; |
|
756
|
|
|
|
|
|
|
char *methodFlag; |
|
757
|
|
|
|
|
|
|
) |
|
758
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
759
|
|
|
|
|
|
|
float double ldouble) |
|
760
|
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
=for usage |
|
762
|
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
($tree, $lnkdist) = treeclusterd($data, $mask, $weight, $distances, $distFlag, $methodFlag); |
|
764
|
|
|
|
|
|
|
treeclusterd($data, $mask, $weight, $distances, $tree, $lnkdist, $distFlag, $methodFlag); # all arguments given |
|
765
|
|
|
|
|
|
|
($tree, $lnkdist) = $data->treeclusterd($mask, $weight, $distances, $distFlag, $methodFlag); # method call |
|
766
|
|
|
|
|
|
|
$data->treeclusterd($mask, $weight, $distances, $tree, $lnkdist, $distFlag, $methodFlag); |
|
767
|
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
Hierachical agglomerative clustering using given distance matrix. |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
See distancematrix() and treecluster(), above. |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=pod |
|
773
|
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
775
|
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=for bad |
|
777
|
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
C does not process bad values. |
|
779
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
780
|
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
=cut |
|
782
|
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
*treeclusterd = \&PDL::treeclusterd; |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
=head2 cuttree |
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
=for sig |
|
796
|
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
Signature: ( |
|
798
|
|
|
|
|
|
|
int tree(2,n); |
|
799
|
|
|
|
|
|
|
int nclusters(); |
|
800
|
|
|
|
|
|
|
int [o]clusterids(n); |
|
801
|
|
|
|
|
|
|
) |
|
802
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
803
|
|
|
|
|
|
|
float double ldouble) |
|
804
|
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
=for usage |
|
806
|
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
$clusterids = cuttree($tree, $nclusters); |
|
808
|
|
|
|
|
|
|
cuttree($tree, $nclusters, $clusterids); # all arguments given |
|
809
|
|
|
|
|
|
|
$clusterids = $tree->cuttree($nclusters); # method call |
|
810
|
|
|
|
|
|
|
$tree->cuttree($nclusters, $clusterids); |
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
Cluster selection for hierarchical clustering trees. |
|
813
|
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
=pod |
|
815
|
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=for bad |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
C does not process bad values. |
|
821
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
822
|
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
=cut |
|
824
|
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
*cuttree = \&PDL::cuttree; |
|
829
|
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
=head2 somcluster |
|
836
|
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
=for sig |
|
838
|
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
Signature: ( |
|
840
|
|
|
|
|
|
|
double data(d,n); |
|
841
|
|
|
|
|
|
|
int mask(d,n); |
|
842
|
|
|
|
|
|
|
double weight(d); |
|
843
|
|
|
|
|
|
|
int nxnodes(); |
|
844
|
|
|
|
|
|
|
int nynodes(); |
|
845
|
|
|
|
|
|
|
double inittau(); |
|
846
|
|
|
|
|
|
|
int niter(); |
|
847
|
|
|
|
|
|
|
int [o]clusterids(2,n); |
|
848
|
|
|
|
|
|
|
; char *distFlag; |
|
849
|
|
|
|
|
|
|
) |
|
850
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
851
|
|
|
|
|
|
|
float double ldouble) |
|
852
|
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
=for usage |
|
854
|
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
$clusterids = somcluster($data, $mask, $weight, $nxnodes, $nynodes, $inittau, $niter, $distFlag); |
|
856
|
|
|
|
|
|
|
somcluster($data, $mask, $weight, $nxnodes, $nynodes, $inittau, $niter, $clusterids, $distFlag); # all arguments given |
|
857
|
|
|
|
|
|
|
$clusterids = $data->somcluster($mask, $weight, $nxnodes, $nynodes, $inittau, $niter, $distFlag); # method call |
|
858
|
|
|
|
|
|
|
$data->somcluster($mask, $weight, $nxnodes, $nynodes, $inittau, $niter, $clusterids, $distFlag); |
|
859
|
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
=for ref |
|
861
|
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
Self-Organizing Map clustering, does not return centroid data. |
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=pod |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
867
|
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
=for bad |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
C does not process bad values. |
|
871
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
=cut |
|
874
|
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
*somcluster = \&PDL::somcluster; |
|
879
|
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
=head2 pca |
|
886
|
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
=for sig |
|
888
|
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
Signature: ( |
|
890
|
|
|
|
|
|
|
double [o]U(d,n); |
|
891
|
|
|
|
|
|
|
double [o]S(d); |
|
892
|
|
|
|
|
|
|
double [o]V(d,d); |
|
893
|
|
|
|
|
|
|
) |
|
894
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
895
|
|
|
|
|
|
|
float double ldouble) |
|
896
|
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
=for usage |
|
898
|
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
($U, $S, $V) = pca(); |
|
900
|
|
|
|
|
|
|
pca($U, $S, $V); # all arguments given |
|
901
|
|
|
|
|
|
|
$U->pca($S, $V); |
|
902
|
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
Principal Component Analysis (SVD), operates in-place on $U() and requires ($SIZE(n) E= $SIZE(d)). |
|
904
|
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
=pod |
|
906
|
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
908
|
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
=for bad |
|
910
|
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
C does not process bad values. |
|
912
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
913
|
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
=cut |
|
915
|
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
*pca = \&PDL::pca; |
|
920
|
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
=head2 rowdistances |
|
927
|
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
=for sig |
|
929
|
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
Signature: ( |
|
931
|
|
|
|
|
|
|
double data(d,n); |
|
932
|
|
|
|
|
|
|
int mask(d,n); |
|
933
|
|
|
|
|
|
|
double weight(d); |
|
934
|
|
|
|
|
|
|
int rowids1(ncmps); |
|
935
|
|
|
|
|
|
|
int rowids2(ncmps); |
|
936
|
|
|
|
|
|
|
double [o]dist(ncmps); |
|
937
|
|
|
|
|
|
|
; char *distFlag; |
|
938
|
|
|
|
|
|
|
) |
|
939
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
940
|
|
|
|
|
|
|
float double ldouble) |
|
941
|
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
=for usage |
|
943
|
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
$dist = rowdistances($data, $mask, $weight, $rowids1, $rowids2, $distFlag); |
|
945
|
|
|
|
|
|
|
rowdistances($data, $mask, $weight, $rowids1, $rowids2, $dist, $distFlag); # all arguments given |
|
946
|
|
|
|
|
|
|
$dist = $data->rowdistances($mask, $weight, $rowids1, $rowids2, $distFlag); # method call |
|
947
|
|
|
|
|
|
|
$data->rowdistances($mask, $weight, $rowids1, $rowids2, $dist, $distFlag); |
|
948
|
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
Computes pairwise distances between rows of $data(). |
|
950
|
|
|
|
|
|
|
$rowids1() contains the row-indices of the left (first) comparison operand, |
|
951
|
|
|
|
|
|
|
and $rowids2() the row-indices of the right (second) comparison operand. Since each |
|
952
|
|
|
|
|
|
|
of these are assumed to be indices into the first dimension $data(), it should be the case that: |
|
953
|
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
0 <= $rowids1(i),rowids2(i) < $SIZE(n) for 0 <= i < $SIZE(ncmps) |
|
955
|
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
See also clusterdistance(), clusterdistances(), clusterdistancematrixenc(), distancematrix(). |
|
957
|
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
=pod |
|
959
|
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
961
|
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
=for bad |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
C does not process bad values. |
|
965
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
966
|
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
=cut |
|
968
|
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
*rowdistances = \&PDL::rowdistances; |
|
973
|
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
=head2 clusterdistances |
|
980
|
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
=for sig |
|
982
|
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
Signature: ( |
|
984
|
|
|
|
|
|
|
double data(d,n); |
|
985
|
|
|
|
|
|
|
int mask(d,n); |
|
986
|
|
|
|
|
|
|
double weight(d); |
|
987
|
|
|
|
|
|
|
int rowids(nr); |
|
988
|
|
|
|
|
|
|
int index2(n2); |
|
989
|
|
|
|
|
|
|
double [o]dist(nr); |
|
990
|
|
|
|
|
|
|
; |
|
991
|
|
|
|
|
|
|
char *distFlag; |
|
992
|
|
|
|
|
|
|
char *methodFlag; |
|
993
|
|
|
|
|
|
|
) |
|
994
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
995
|
|
|
|
|
|
|
float double ldouble) |
|
996
|
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
=for usage |
|
998
|
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
$dist = clusterdistances($data, $mask, $weight, $rowids, $index2, $distFlag, $methodFlag); |
|
1000
|
|
|
|
|
|
|
clusterdistances($data, $mask, $weight, $rowids, $index2, $dist, $distFlag, $methodFlag); # all arguments given |
|
1001
|
|
|
|
|
|
|
$dist = $data->clusterdistances($mask, $weight, $rowids, $index2, $distFlag, $methodFlag); # method call |
|
1002
|
|
|
|
|
|
|
$data->clusterdistances($mask, $weight, $rowids, $index2, $dist, $distFlag, $methodFlag); |
|
1003
|
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
Computes pairwise distance(s) from each of $rowids() as a singleton cluster |
|
1005
|
|
|
|
|
|
|
with the cluster represented by $index2(), which should be an index |
|
1006
|
|
|
|
|
|
|
vector as for clusterdistance(). See also clusterdistancematrixenc(). |
|
1007
|
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
=pod |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1011
|
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
=for bad |
|
1013
|
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
C does not process bad values. |
|
1015
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
1016
|
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
=cut |
|
1018
|
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
*clusterdistances = \&PDL::clusterdistances; |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
=head2 clustersizes |
|
1030
|
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
=for sig |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
Signature: (int clusterids(n); int [o]clustersizes(k)) |
|
1034
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
1035
|
|
|
|
|
|
|
float double ldouble) |
|
1036
|
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
=for usage |
|
1038
|
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
$clustersizes = clustersizes($clusterids); |
|
1040
|
|
|
|
|
|
|
clustersizes($clusterids, $clustersizes); # all arguments given |
|
1041
|
|
|
|
|
|
|
$clustersizes = $clusterids->clustersizes; # method call |
|
1042
|
|
|
|
|
|
|
$clusterids->clustersizes($clustersizes); |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
Computes the size (number of elements) of each cluster in $clusterids(). |
|
1045
|
|
|
|
|
|
|
Useful for allocating less than maximmal space for $clusterelements(). |
|
1046
|
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
=pod |
|
1048
|
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1050
|
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
=for bad |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
The output piddle should never be marked BAD. |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
=cut |
|
1056
|
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
*clustersizes = \&PDL::clustersizes; |
|
1061
|
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
=head2 clusterelements |
|
1068
|
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
=for sig |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
Signature: (int clusterids(n); int [o]clustersizes(k); int [o]eltids(mcsize,k)) |
|
1072
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
1073
|
|
|
|
|
|
|
float double ldouble) |
|
1074
|
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
=for usage |
|
1076
|
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
($clustersizes, $eltids) = clusterelements($clusterids); |
|
1078
|
|
|
|
|
|
|
clusterelements($clusterids, $clustersizes, $eltids); # all arguments given |
|
1079
|
|
|
|
|
|
|
($clustersizes, $eltids) = $clusterids->clusterelements; # method call |
|
1080
|
|
|
|
|
|
|
$clusterids->clusterelements($clustersizes, $eltids); |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
Converts the vector $clusterids() to a matrix $eltids() of element (row) indices |
|
1083
|
|
|
|
|
|
|
indexed by cluster-id. $mcsize() is the maximum number of elements per cluster, |
|
1084
|
|
|
|
|
|
|
at most $n. The output PDLs $clustersizes() and $eltids() can be passed to |
|
1085
|
|
|
|
|
|
|
clusterdistancematrix(). |
|
1086
|
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
=pod |
|
1088
|
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1090
|
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
=for bad |
|
1092
|
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
C does not process bad values. |
|
1094
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
=cut |
|
1097
|
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
*clusterelements = \&PDL::clusterelements; |
|
1102
|
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
=head2 clusterelementmask |
|
1109
|
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
=for sig |
|
1111
|
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
Signature: (int clusterids(n); byte [o]eltmask(k,n)) |
|
1113
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
1114
|
|
|
|
|
|
|
float double ldouble) |
|
1115
|
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
=for usage |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
$eltmask = clusterelementmask($clusterids); |
|
1119
|
|
|
|
|
|
|
clusterelementmask($clusterids, $eltmask); # all arguments given |
|
1120
|
|
|
|
|
|
|
$eltmask = $clusterids->clusterelementmask; # method call |
|
1121
|
|
|
|
|
|
|
$clusterids->clusterelementmask($eltmask); |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
Get boolean membership mask $eltmask() based on cluster assignment in $clusterids(). |
|
1124
|
|
|
|
|
|
|
No value in $clusterids() may be greater than or equal to $k. |
|
1125
|
|
|
|
|
|
|
On completion, $eltmask(k,n) is a true value iff $clusterids(n)=$k. |
|
1126
|
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
=pod |
|
1128
|
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1130
|
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
=for bad |
|
1132
|
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
C does not process bad values. |
|
1134
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
=cut |
|
1137
|
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
*clusterelementmask = \&PDL::clusterelementmask; |
|
1142
|
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
=head2 clusterdistancematrix |
|
1149
|
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
=for sig |
|
1151
|
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
Signature: ( |
|
1153
|
|
|
|
|
|
|
double data(d,n); |
|
1154
|
|
|
|
|
|
|
int mask(d,n); |
|
1155
|
|
|
|
|
|
|
double weight(d); |
|
1156
|
|
|
|
|
|
|
int rowids(nr); |
|
1157
|
|
|
|
|
|
|
int clustersizes(k); |
|
1158
|
|
|
|
|
|
|
int eltids(mcsize,k); |
|
1159
|
|
|
|
|
|
|
double [o]dist(k,nr); |
|
1160
|
|
|
|
|
|
|
; |
|
1161
|
|
|
|
|
|
|
char *distFlag; |
|
1162
|
|
|
|
|
|
|
char *methodFlag; |
|
1163
|
|
|
|
|
|
|
) |
|
1164
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
1165
|
|
|
|
|
|
|
float double ldouble) |
|
1166
|
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
=for usage |
|
1168
|
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
$dist = clusterdistancematrix($data, $mask, $weight, $rowids, $clustersizes, $eltids, $distFlag, $methodFlag); |
|
1170
|
|
|
|
|
|
|
clusterdistancematrix($data, $mask, $weight, $rowids, $clustersizes, $eltids, $dist, $distFlag, $methodFlag); # all arguments given |
|
1171
|
|
|
|
|
|
|
$dist = $data->clusterdistancematrix($mask, $weight, $rowids, $clustersizes, $eltids, $distFlag, $methodFlag); # method call |
|
1172
|
|
|
|
|
|
|
$data->clusterdistancematrix($mask, $weight, $rowids, $clustersizes, $eltids, $dist, $distFlag, $methodFlag); |
|
1173
|
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
B in favor of clusterdistancematrixenc(). |
|
1175
|
|
|
|
|
|
|
In the future, this method is expected to become a wrapper for clusterdistancematrixenc(). |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
Computes distance between each row index in $rowids() |
|
1178
|
|
|
|
|
|
|
considered as a singleton cluster |
|
1179
|
|
|
|
|
|
|
and each of the $k clusters whose elements are given by a single row of $eltids(). |
|
1180
|
|
|
|
|
|
|
$clustersizes() and $eltids() are as output by the clusterelements() method. |
|
1181
|
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
See also clusterdistance(), clusterdistances(), clustersizes(), clusterelements(), clusterdistancematrixenc(). |
|
1183
|
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
=pod |
|
1185
|
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
=for bad |
|
1189
|
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
C does not process bad values. |
|
1191
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
1192
|
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
=cut |
|
1194
|
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
*clusterdistancematrix = \&PDL::clusterdistancematrix; |
|
1199
|
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
#line 1196 "Cluster.pd" |
|
1205
|
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
=pod |
|
1207
|
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
=head2 clusterenc |
|
1209
|
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
=for sig |
|
1211
|
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
Signature: ( |
|
1213
|
|
|
|
|
|
|
int clusterids(n); |
|
1214
|
|
|
|
|
|
|
int [o]clusterlens(k1); |
|
1215
|
|
|
|
|
|
|
int [o]clustervals(k1); |
|
1216
|
|
|
|
|
|
|
int [o]clusterrows(n); |
|
1217
|
|
|
|
|
|
|
; |
|
1218
|
|
|
|
|
|
|
int k1; |
|
1219
|
|
|
|
|
|
|
) |
|
1220
|
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
Encodes datum-to-cluster vector $clusterids() for efficiently mapping |
|
1222
|
|
|
|
|
|
|
clusters-to-data. Returned PDL $clusterlens() holds the lengths of each |
|
1223
|
|
|
|
|
|
|
cluster containing at least one element. $clustervals() holds the IDs |
|
1224
|
|
|
|
|
|
|
of such clusters as they appear as values in $clusterids(). $clusterrows() |
|
1225
|
|
|
|
|
|
|
is such that: |
|
1226
|
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
all( rld($clusterlens, $clustervals) == $clusterids ) |
|
1228
|
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
... if all available cluster-ids are in use. |
|
1230
|
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
If specified, $k1 is a perl scalar |
|
1232
|
|
|
|
|
|
|
holding the number of clusters (maximum cluster index + 1); an |
|
1233
|
|
|
|
|
|
|
appropriate value will guessed from $clusterids() otherwise. |
|
1234
|
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
Really just a wrapper for some lower-level PDL and PDL::Cluster calls. |
|
1236
|
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
=cut |
|
1238
|
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
sub clusterenc { |
|
1240
|
|
|
|
|
|
|
my ($cids, $clens,$cvals,$crows, $kmax) = @_; |
|
1241
|
|
|
|
|
|
|
$kmax = $cids->max+1 if (!defined($kmax)); |
|
1242
|
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
##-- cluster sizes |
|
1244
|
|
|
|
|
|
|
$clens = zeroes(long, $kmax) if (!defined($clens)); |
|
1245
|
|
|
|
|
|
|
clustersizes($cids,$clens); |
|
1246
|
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
##-- cluster-id values |
|
1248
|
|
|
|
|
|
|
if (!defined($cvals)) { $cvals = PDL->sequence(long,$kmax); } |
|
1249
|
|
|
|
|
|
|
else { $cvals .= PDL->sequence(long,$kmax); } |
|
1250
|
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
##-- cluster-row values: handle BAD and negative values |
|
1252
|
|
|
|
|
|
|
#if (!defined($crows)) { $crows = $cids->qsorti->where($cids->isgood & $cids>=0); } |
|
1253
|
|
|
|
|
|
|
#else { $crows .= $cids->qsorti->where($cids->isgood & $cids>=0); } |
|
1254
|
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
##-- cluster-row values: treat BAD and negative values like anything else |
|
1256
|
|
|
|
|
|
|
if (!defined($crows)) { $crows = $cids->qsorti; } |
|
1257
|
|
|
|
|
|
|
else { $crows .= $cids->qsorti; } |
|
1258
|
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
return ($clens,$cvals,$crows); |
|
1260
|
|
|
|
|
|
|
} |
|
1261
|
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
#line 1262 "Cluster.pd" |
|
1263
|
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
=pod |
|
1265
|
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
=head2 clusterdec |
|
1267
|
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=for sig |
|
1269
|
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
Signature: ( |
|
1271
|
|
|
|
|
|
|
int clusterlens(k1); |
|
1272
|
|
|
|
|
|
|
int clustervals(k1); |
|
1273
|
|
|
|
|
|
|
int clusterrows(n); |
|
1274
|
|
|
|
|
|
|
int [o]clusterids(n); |
|
1275
|
|
|
|
|
|
|
) |
|
1276
|
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
Decodes cluster-to-datum vectors ($clusterlens,$clustervals,$clusterrows) |
|
1278
|
|
|
|
|
|
|
into a single datum-to-cluster vector $clusterids(). |
|
1279
|
|
|
|
|
|
|
$(clusterlens,$clustervals,$clusterrows) are as returned by the clusterenc() method. |
|
1280
|
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
Un-addressed row-index values in $clusterrows() will be assigned the pseudo-cluster (-1) |
|
1282
|
|
|
|
|
|
|
in $clusterids(). |
|
1283
|
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
Really just a wrapper for some lower-level PDL calls. |
|
1285
|
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
=cut |
|
1287
|
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
sub clusterdec { |
|
1289
|
|
|
|
|
|
|
my ($clens,$cvals,$crows, $cids2) = @_; |
|
1290
|
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
##-- get $cids |
|
1292
|
|
|
|
|
|
|
$cids2 = zeroes($cvals->type, $crows->dims) if (!defined($cids2)); |
|
1293
|
|
|
|
|
|
|
$cids2 .= -1; |
|
1294
|
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
##-- trim $crows |
|
1296
|
|
|
|
|
|
|
#my $crows_good = $crows->slice("0:".($clens->sum-1)); ##-- assume bad indices are at END of $crows (BAD,inf,...) |
|
1297
|
|
|
|
|
|
|
my $crows_good = $crows->slice(-$clens->sum.":-1"); ##-- assume bad indices are at BEGINNING of $crows (-1, ...) |
|
1298
|
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
##-- decode |
|
1300
|
|
|
|
|
|
|
$clens->rld($cvals, $cids2->index($crows_good)); |
|
1301
|
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
return $cids2; |
|
1303
|
|
|
|
|
|
|
} |
|
1304
|
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
#line 1312 "Cluster.pd" |
|
1306
|
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
=pod |
|
1308
|
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
=head2 clusteroffsets |
|
1310
|
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
=for sig |
|
1312
|
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
Signature: ( |
|
1314
|
|
|
|
|
|
|
int clusterids(n); |
|
1315
|
|
|
|
|
|
|
int [o]clusteroffsets(k1+1); |
|
1316
|
|
|
|
|
|
|
int [o]clustervals(k1); |
|
1317
|
|
|
|
|
|
|
int [o]clusterrows(n); |
|
1318
|
|
|
|
|
|
|
; |
|
1319
|
|
|
|
|
|
|
int k1; |
|
1320
|
|
|
|
|
|
|
) |
|
1321
|
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
Encodes datum-to-cluster vector $clusterids() for efficiently mapping |
|
1323
|
|
|
|
|
|
|
clusters-to-data. Like clusterenc(), but returns cumulative offsets |
|
1324
|
|
|
|
|
|
|
instead of lengths. |
|
1325
|
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
Really just a wrapper for clusterenc(), cumusumover(), and append(). |
|
1327
|
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
=cut |
|
1329
|
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
sub clusteroffsets { |
|
1331
|
|
|
|
|
|
|
my ($cids, $coffsets,$cvals,$crows, $kmax) = @_; |
|
1332
|
|
|
|
|
|
|
my ($clens); |
|
1333
|
|
|
|
|
|
|
($clens,$cvals,$crows) = clusterenc($cids,undef,$cvals,$crows,$kmax); |
|
1334
|
|
|
|
|
|
|
$coffsets = $clens->append(0)->rotate(1)->cumusumover; |
|
1335
|
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
return ($coffsets,$cvals,$crows); |
|
1337
|
|
|
|
|
|
|
} |
|
1338
|
|
|
|
|
|
|
#line 1339 "Cluster.pm" |
|
1339
|
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
=head2 clusterdistancematrixenc |
|
1342
|
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
=for sig |
|
1344
|
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
Signature: ( |
|
1346
|
|
|
|
|
|
|
double data(d,n); |
|
1347
|
|
|
|
|
|
|
int mask(d,n); |
|
1348
|
|
|
|
|
|
|
double weight(d); |
|
1349
|
|
|
|
|
|
|
int clens1(k1); |
|
1350
|
|
|
|
|
|
|
int crowids1(nc1); |
|
1351
|
|
|
|
|
|
|
int clens2(k2); |
|
1352
|
|
|
|
|
|
|
int crowids2(nc2); |
|
1353
|
|
|
|
|
|
|
double [o]dist(k1,k2); |
|
1354
|
|
|
|
|
|
|
; |
|
1355
|
|
|
|
|
|
|
char *distFlag; |
|
1356
|
|
|
|
|
|
|
char *methodFlag; |
|
1357
|
|
|
|
|
|
|
) |
|
1358
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
1359
|
|
|
|
|
|
|
float double ldouble) |
|
1360
|
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
=for usage |
|
1362
|
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
$dist = clusterdistancematrixenc($data, $mask, $weight, $clens1, $crowids1, $clens2, $crowids2, $distFlag, $methodFlag); |
|
1364
|
|
|
|
|
|
|
clusterdistancematrixenc($data, $mask, $weight, $clens1, $crowids1, $clens2, $crowids2, $dist, $distFlag, $methodFlag); # all arguments given |
|
1365
|
|
|
|
|
|
|
$dist = $data->clusterdistancematrixenc($mask, $weight, $clens1, $crowids1, $clens2, $crowids2, $distFlag, $methodFlag); # method call |
|
1366
|
|
|
|
|
|
|
$data->clusterdistancematrixenc($mask, $weight, $clens1, $crowids1, $clens2, $crowids2, $dist, $distFlag, $methodFlag); |
|
1367
|
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
Computes cluster-distance between each pair of clusters in (sequence($k1) x sequence($k2)), where 'x' |
|
1369
|
|
|
|
|
|
|
is the Cartesian product. Cluster contents are passed as pairs ($clens(),$crowids()) as returned |
|
1370
|
|
|
|
|
|
|
by the clusterenc() function (assuming that the $cvals() vector returned by clusterenc() is a flat sequence). |
|
1371
|
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
The deprecated method clusterdistancematrix() can be simulated by this function in the following |
|
1373
|
|
|
|
|
|
|
manner: if a clusterdistancematrix() call was: |
|
1374
|
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
clustersizes ($cids, $csizes=zeroes(long,$k)); |
|
1376
|
|
|
|
|
|
|
clusterelements($cids, $celts =zeroes(long,$csizes->max)-1); |
|
1377
|
|
|
|
|
|
|
clusterdistancematrix($data,$msk,$wt, $rowids, $csizes,$celts, |
|
1378
|
|
|
|
|
|
|
$cdmat=zeroes(double,$k,$rowids->dim(0)), |
|
1379
|
|
|
|
|
|
|
$distFlag, $methodFlag |
|
1380
|
|
|
|
|
|
|
); |
|
1381
|
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
Then the corresponding use of clusterdistancematrixenc() would be: |
|
1383
|
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
($clens,$cvals,$crows) = clusterenc($cids); |
|
1385
|
|
|
|
|
|
|
clusterdistancematrixenc($data,$msk,$wt, |
|
1386
|
|
|
|
|
|
|
$clens, $crows, ##-- "real" clusters in output dim 0 |
|
1387
|
|
|
|
|
|
|
$rowids->ones, $rowids, ##-- $rowids as singleton clusters in output dim 1 |
|
1388
|
|
|
|
|
|
|
$cdmat=zeroes(double,$clens->dim(0),$rowids->dim(0)), |
|
1389
|
|
|
|
|
|
|
$distFlag, $methodFlag); |
|
1390
|
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
If your $cvals() are not a flat sequence, you will probably need to do some index-twiddling |
|
1392
|
|
|
|
|
|
|
to get things into the proper shape: |
|
1393
|
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
if ( !all($cvals==$cvals->sequence) || $cvals->dim(0) != $k ) |
|
1395
|
|
|
|
|
|
|
{ |
|
1396
|
|
|
|
|
|
|
my $cdmat0 = $cdmat; |
|
1397
|
|
|
|
|
|
|
my $nr = $rowids->dim(0); |
|
1398
|
|
|
|
|
|
|
$cdmat = pdl(double,"inf")->slice("*$k,*$nr")->make_physical(); ##-- "missing" distances are infinite |
|
1399
|
|
|
|
|
|
|
$cdmat->dice_axis(0,$cvals) .= $cdmat0; |
|
1400
|
|
|
|
|
|
|
} |
|
1401
|
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
$distFlag and $methodFlag are interpreted as for clusterdistance(). |
|
1403
|
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
See also clusterenc(), clusterdistancematrix(). |
|
1405
|
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
=pod |
|
1407
|
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1409
|
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
=for bad |
|
1411
|
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
C does not process bad values. |
|
1413
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
=cut |
|
1416
|
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
*clusterdistancematrixenc = \&PDL::clusterdistancematrixenc; |
|
1421
|
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
=head2 clusterdistancesenc |
|
1428
|
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
=for sig |
|
1430
|
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
Signature: ( |
|
1432
|
|
|
|
|
|
|
double data(d,n); |
|
1433
|
|
|
|
|
|
|
int mask(d,n); |
|
1434
|
|
|
|
|
|
|
double weight(d); |
|
1435
|
|
|
|
|
|
|
int coffsets1(k1); |
|
1436
|
|
|
|
|
|
|
int crowids1(nc1); |
|
1437
|
|
|
|
|
|
|
int cwhich1(ncmps); |
|
1438
|
|
|
|
|
|
|
int coffsets2(k2); |
|
1439
|
|
|
|
|
|
|
int crowids2(nc2); |
|
1440
|
|
|
|
|
|
|
int cwhich2(ncmps); |
|
1441
|
|
|
|
|
|
|
double [o]dists(ncmps); |
|
1442
|
|
|
|
|
|
|
; |
|
1443
|
|
|
|
|
|
|
char *distFlag; |
|
1444
|
|
|
|
|
|
|
char *methodFlag; |
|
1445
|
|
|
|
|
|
|
) |
|
1446
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
1447
|
|
|
|
|
|
|
float double ldouble) |
|
1448
|
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
=for usage |
|
1450
|
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
$dists = clusterdistancesenc($data, $mask, $weight, $coffsets1, $crowids1, $cwhich1, $coffsets2, $crowids2, $cwhich2, $distFlag, $methodFlag); |
|
1452
|
|
|
|
|
|
|
clusterdistancesenc($data, $mask, $weight, $coffsets1, $crowids1, $cwhich1, $coffsets2, $crowids2, $cwhich2, $dists, $distFlag, $methodFlag); # all arguments given |
|
1453
|
|
|
|
|
|
|
$dists = $data->clusterdistancesenc($mask, $weight, $coffsets1, $crowids1, $cwhich1, $coffsets2, $crowids2, $cwhich2, $distFlag, $methodFlag); # method call |
|
1454
|
|
|
|
|
|
|
$data->clusterdistancesenc($mask, $weight, $coffsets1, $crowids1, $cwhich1, $coffsets2, $crowids2, $cwhich2, $dists, $distFlag, $methodFlag); |
|
1455
|
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
Computes cluster-distance between selected pairs of co-indexed clusters in ($cwhich1,$cwhich2). |
|
1457
|
|
|
|
|
|
|
Cluster contents are passed as pairs ($coffsetsX(),$crowidsX()) as returned |
|
1458
|
|
|
|
|
|
|
by the clusteroffsets() function. |
|
1459
|
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
$distFlag and $methodFlag are interpreted as for clusterdistance(). |
|
1461
|
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
See also clusterenc(), clusterdistancematrixenc(). |
|
1463
|
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
=pod |
|
1465
|
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1467
|
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
=for bad |
|
1469
|
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
C does not process bad values. |
|
1471
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
1472
|
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
=cut |
|
1474
|
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
*clusterdistancesenc = \&PDL::clusterdistancesenc; |
|
1479
|
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
|
|
1485
|
|
|
|
|
|
|
=head2 getclusterwsum |
|
1486
|
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
=for sig |
|
1488
|
|
|
|
|
|
|
|
|
1489
|
|
|
|
|
|
|
Signature: ( |
|
1490
|
|
|
|
|
|
|
double data(d,n); |
|
1491
|
|
|
|
|
|
|
int mask(d,n); |
|
1492
|
|
|
|
|
|
|
double clusterwts(k,n); |
|
1493
|
|
|
|
|
|
|
double [o]cdata(d,k); |
|
1494
|
|
|
|
|
|
|
int [o]cmask(d,k); |
|
1495
|
|
|
|
|
|
|
) |
|
1496
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
1497
|
|
|
|
|
|
|
float double ldouble) |
|
1498
|
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=for usage |
|
1500
|
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
($cdata, $cmask) = getclusterwsum($data, $mask, $clusterwts); |
|
1502
|
|
|
|
|
|
|
getclusterwsum($data, $mask, $clusterwts, $cdata, $cmask); # all arguments given |
|
1503
|
|
|
|
|
|
|
($cdata, $cmask) = $data->getclusterwsum($mask, $clusterwts); # method call |
|
1504
|
|
|
|
|
|
|
$data->getclusterwsum($mask, $clusterwts, $cdata, $cmask); |
|
1505
|
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
Find cluster centroids by weighted sum. This can be considered an |
|
1507
|
|
|
|
|
|
|
expensive generalization of the getclustermean() and getclustermedian() |
|
1508
|
|
|
|
|
|
|
functions. Here, the input PDLs $data() and $mask(), as well as the |
|
1509
|
|
|
|
|
|
|
output PDL $cdata() are as for getclustermean(). The matrix $clusterwts() |
|
1510
|
|
|
|
|
|
|
determines the relative weight of each data row in determining the |
|
1511
|
|
|
|
|
|
|
centroid of each cluster, potentially useful for "fuzzy" clustering. |
|
1512
|
|
|
|
|
|
|
The equation used to compute cluster means is: |
|
1513
|
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
$cdata(d,k) = sum_{n} $clusterwts(k,n) * $data(d,n) * $mask(d,n) |
|
1515
|
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
For centroids in the same range as data elements, $clusterwts() |
|
1517
|
|
|
|
|
|
|
should sum to 1 over each column (k): |
|
1518
|
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
all($clusterwts->xchg(0,1)->sumover == 1) |
|
1520
|
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
getclustermean() can be simulated by instantiating $clusterwts() with |
|
1522
|
|
|
|
|
|
|
a uniform distribution over cluster elements: |
|
1523
|
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
$clusterwts = zeroes($k,$n); |
|
1525
|
|
|
|
|
|
|
$clusterwts->indexND(cat($clusterids, xvals($clusterids))->xchg(0,1)) .= 1; |
|
1526
|
|
|
|
|
|
|
$clusterwts /= $clusterwts->xchg(0,1)->sumover; |
|
1527
|
|
|
|
|
|
|
getclusterwsum($data,$mask, $clusterwts, $cdata=zeroes($d,$k)); |
|
1528
|
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
Similarly, getclustermedian() can be simulated by setting $clusterwts() to |
|
1530
|
|
|
|
|
|
|
1 for cluster medians and otherwise to 0. More sophisticated centroid |
|
1531
|
|
|
|
|
|
|
discovery methods can be computed by this function by setting |
|
1532
|
|
|
|
|
|
|
$clusterwts(k,n) to some estimate of the conditional probability |
|
1533
|
|
|
|
|
|
|
of the datum at row $n given the cluster with index $k: |
|
1534
|
|
|
|
|
|
|
p(Elt==n|Cluster==k). One |
|
1535
|
|
|
|
|
|
|
way to achieve such an estimate is to use (normalized inverses of) the |
|
1536
|
|
|
|
|
|
|
singleton-row-to-cluster distances as output by clusterdistancematrix(). |
|
1537
|
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
=pod |
|
1539
|
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1541
|
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
=for bad |
|
1543
|
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
C does not process bad values. |
|
1545
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
1546
|
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
=cut |
|
1548
|
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
*getclusterwsum = \&PDL::getclusterwsum; |
|
1553
|
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
=head2 attachtonearest |
|
1560
|
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
=for sig |
|
1562
|
|
|
|
|
|
|
|
|
1563
|
|
|
|
|
|
|
Signature: ( |
|
1564
|
|
|
|
|
|
|
double data(d,n); |
|
1565
|
|
|
|
|
|
|
int mask(d,n); |
|
1566
|
|
|
|
|
|
|
double weight(d); |
|
1567
|
|
|
|
|
|
|
int rowids(nr); |
|
1568
|
|
|
|
|
|
|
double cdata(d,k); |
|
1569
|
|
|
|
|
|
|
int cmask(d,k); |
|
1570
|
|
|
|
|
|
|
int [o]clusterids(nr); |
|
1571
|
|
|
|
|
|
|
double [o]cdist(nr); |
|
1572
|
|
|
|
|
|
|
; |
|
1573
|
|
|
|
|
|
|
char *distFlag; |
|
1574
|
|
|
|
|
|
|
char *methodFlag; |
|
1575
|
|
|
|
|
|
|
) |
|
1576
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
1577
|
|
|
|
|
|
|
float double ldouble) |
|
1578
|
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
=for usage |
|
1580
|
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
($clusterids, $cdist) = attachtonearest($data, $mask, $weight, $rowids, $cdata, $cmask, $distFlag, $methodFlag); |
|
1582
|
|
|
|
|
|
|
attachtonearest($data, $mask, $weight, $rowids, $cdata, $cmask, $clusterids, $cdist, $distFlag, $methodFlag); # all arguments given |
|
1583
|
|
|
|
|
|
|
($clusterids, $cdist) = $data->attachtonearest($mask, $weight, $rowids, $cdata, $cmask, $distFlag, $methodFlag); # method call |
|
1584
|
|
|
|
|
|
|
$data->attachtonearest($mask, $weight, $rowids, $cdata, $cmask, $clusterids, $cdist, $distFlag, $methodFlag); |
|
1585
|
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
Assigns each specified data row to the nearest cluster centroid. |
|
1587
|
|
|
|
|
|
|
Data elements are given by $data() and $mask(), feature weights are |
|
1588
|
|
|
|
|
|
|
given by $weight(), as usual. Cluster centroids are defined by |
|
1589
|
|
|
|
|
|
|
by $cdata() and $cmask(), and the indices of rows to be attached |
|
1590
|
|
|
|
|
|
|
are given in the vector $rowids(). The output vector $clusterids() |
|
1591
|
|
|
|
|
|
|
contains for each specified row index the identifier of the nearest |
|
1592
|
|
|
|
|
|
|
cluster centroid. The vector $cdist() contains the distance to |
|
1593
|
|
|
|
|
|
|
the best clusters. |
|
1594
|
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
See also: clusterdistancematrix(), attachtonearestd(). |
|
1596
|
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
=pod |
|
1598
|
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1600
|
|
|
|
|
|
|
|
|
1601
|
|
|
|
|
|
|
=for bad |
|
1602
|
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
C does not process bad values. |
|
1604
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
1605
|
|
|
|
|
|
|
|
|
1606
|
|
|
|
|
|
|
=cut |
|
1607
|
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
|
|
1610
|
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
*attachtonearest = \&PDL::attachtonearest; |
|
1612
|
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
#line 1659 "Cluster.pd" |
|
1618
|
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
=pod |
|
1620
|
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
=head2 attachtonearestd |
|
1622
|
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
=for sig |
|
1624
|
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
Signature: ( |
|
1626
|
|
|
|
|
|
|
double cdistmat(k,n); |
|
1627
|
|
|
|
|
|
|
int rowids(nr); |
|
1628
|
|
|
|
|
|
|
int [o]clusterids(nr); |
|
1629
|
|
|
|
|
|
|
double [o]dists(nr); |
|
1630
|
|
|
|
|
|
|
) |
|
1631
|
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
Assigns each specified data row to the nearest cluster centroid, |
|
1633
|
|
|
|
|
|
|
as for attachtonearest(), given the datum-to-cluster distance |
|
1634
|
|
|
|
|
|
|
matrix $cdistmat(). Currently just a wrapper for a few PDL calls. |
|
1635
|
|
|
|
|
|
|
In scalar context returns $clusterids(), in list context returns |
|
1636
|
|
|
|
|
|
|
the list ($clusterids(),$dists()). |
|
1637
|
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
=cut |
|
1639
|
|
|
|
|
|
|
|
|
1640
|
|
|
|
|
|
|
sub attachtonearestd { |
|
1641
|
|
|
|
|
|
|
my ($cdm,$rowids,$cids,$dists)=@_; |
|
1642
|
|
|
|
|
|
|
$cids = zeroes(long, $rowids->dim(0)) if (!defined($cids)); |
|
1643
|
|
|
|
|
|
|
$dists = zeroes(double, $rowids->dim(0)) if (!defined($dists)); |
|
1644
|
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
##-- dice matrix |
|
1646
|
|
|
|
|
|
|
my $cdmr = $cdm->dice_axis(1,$rowids); |
|
1647
|
|
|
|
|
|
|
|
|
1648
|
|
|
|
|
|
|
##-- get best |
|
1649
|
|
|
|
|
|
|
$cdmr->minimum_ind($cids); |
|
1650
|
|
|
|
|
|
|
$dists .= $cdmr->index($cids); |
|
1651
|
|
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
|
return wantarray ? ($cids,$dists) : $cids; |
|
1653
|
|
|
|
|
|
|
} |
|
1654
|
|
|
|
|
|
|
#line 1655 "Cluster.pm" |
|
1655
|
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
=head2 checkprototypes |
|
1658
|
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
=for sig |
|
1660
|
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
Signature: ( |
|
1662
|
|
|
|
|
|
|
protos(k); |
|
1663
|
|
|
|
|
|
|
[o]cprotos(k); |
|
1664
|
|
|
|
|
|
|
byte [t]otmp(n); |
|
1665
|
|
|
|
|
|
|
; int nsize => n) |
|
1666
|
|
|
|
|
|
|
Types: (byte short ushort long) |
|
1667
|
|
|
|
|
|
|
|
|
1668
|
|
|
|
|
|
|
=for usage |
|
1669
|
|
|
|
|
|
|
|
|
1670
|
|
|
|
|
|
|
$cprotos = checkprototypes($protos, $nsize); |
|
1671
|
|
|
|
|
|
|
checkprototypes($protos, $cprotos, $nsize); # all arguments given |
|
1672
|
|
|
|
|
|
|
$cprotos = $protos->checkprototypes($nsize); # method call |
|
1673
|
|
|
|
|
|
|
$protos->checkprototypes($cprotos, $nsize); |
|
1674
|
|
|
|
|
|
|
$protos->inplace->checkprototypes($nsize); # can be used inplace |
|
1675
|
|
|
|
|
|
|
checkprototypes($protos->inplace, $nsize); |
|
1676
|
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
(Deterministic) |
|
1678
|
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
Ensure that the assignment $protos() from $k objects to |
|
1680
|
|
|
|
|
|
|
integer "prototype" indices in the range [0,$n( contains no repetitions of any |
|
1681
|
|
|
|
|
|
|
of the $n possible prototype values. One use for this function is |
|
1682
|
|
|
|
|
|
|
the restriction of (randomly generated) potential clustering solutions |
|
1683
|
|
|
|
|
|
|
for $k clusters in which each cluster is represented by a |
|
1684
|
|
|
|
|
|
|
"prototypical" element from a data sample of size $n. |
|
1685
|
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
Requires: $n >= $k. |
|
1687
|
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
=pod |
|
1689
|
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1691
|
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
=for bad |
|
1693
|
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
C does not process bad values. |
|
1695
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
1696
|
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
=cut |
|
1698
|
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
*checkprototypes = \&PDL::checkprototypes; |
|
1703
|
|
|
|
|
|
|
|
|
1704
|
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|
|
|
1709
|
|
|
|
|
|
|
=head2 checkpartitions |
|
1710
|
|
|
|
|
|
|
|
|
1711
|
|
|
|
|
|
|
=for sig |
|
1712
|
|
|
|
|
|
|
|
|
1713
|
|
|
|
|
|
|
Signature: ( |
|
1714
|
|
|
|
|
|
|
part(n); |
|
1715
|
|
|
|
|
|
|
[o]cpart(n); |
|
1716
|
|
|
|
|
|
|
[t]ptmp(k); |
|
1717
|
|
|
|
|
|
|
; int ksize => k) |
|
1718
|
|
|
|
|
|
|
Types: (byte short ushort long) |
|
1719
|
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
=for usage |
|
1721
|
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
$cpart = checkpartitions($part, $ksize); |
|
1723
|
|
|
|
|
|
|
checkpartitions($part, $cpart, $ksize); # all arguments given |
|
1724
|
|
|
|
|
|
|
$cpart = $part->checkpartitions($ksize); # method call |
|
1725
|
|
|
|
|
|
|
$part->checkpartitions($cpart, $ksize); |
|
1726
|
|
|
|
|
|
|
$part->inplace->checkpartitions($ksize); # can be used inplace |
|
1727
|
|
|
|
|
|
|
checkpartitions($part->inplace, $ksize); |
|
1728
|
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
(Deterministic) |
|
1730
|
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
Ensure that the partitioning $part() of $n objects into $k bins |
|
1732
|
|
|
|
|
|
|
(identified by integer values in the range [0,$k-1]) |
|
1733
|
|
|
|
|
|
|
contains at least one instance of each of the |
|
1734
|
|
|
|
|
|
|
$k possible values. One use for this function is |
|
1735
|
|
|
|
|
|
|
the restriction of (randomly generated) potential clustering solutions |
|
1736
|
|
|
|
|
|
|
for $n elements into $k clusters to those which assign at least one |
|
1737
|
|
|
|
|
|
|
element to each cluster. |
|
1738
|
|
|
|
|
|
|
|
|
1739
|
|
|
|
|
|
|
Requires: $n >= $k. |
|
1740
|
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
=pod |
|
1742
|
|
|
|
|
|
|
|
|
1743
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
1744
|
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
=for bad |
|
1746
|
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
C does not process bad values. |
|
1748
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
1749
|
|
|
|
|
|
|
|
|
1750
|
|
|
|
|
|
|
=cut |
|
1751
|
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
|
|
1753
|
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
|
|
1755
|
|
|
|
|
|
|
*checkpartitions = \&PDL::checkpartitions; |
|
1756
|
|
|
|
|
|
|
|
|
1757
|
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
|
|
1759
|
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
#line 1813 "Cluster.pd" |
|
1762
|
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
=pod |
|
1764
|
|
|
|
|
|
|
|
|
1765
|
|
|
|
|
|
|
=head2 randomprototypes |
|
1766
|
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
=for sig |
|
1768
|
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
Signature: (int k; int n; [o]prototypes(k)) |
|
1770
|
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
Generate a random set of $k prototype indices drawn from $n objects, |
|
1772
|
|
|
|
|
|
|
ensuring that no object is used more than once. Calls checkprototypes(). |
|
1773
|
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
See also: checkprototypes(), randomassign(), checkpartitions(), randompartition(). |
|
1775
|
|
|
|
|
|
|
|
|
1776
|
|
|
|
|
|
|
=cut |
|
1777
|
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
sub randomprototypes { |
|
1779
|
|
|
|
|
|
|
my ($k,$n,$protos) = @_; |
|
1780
|
|
|
|
|
|
|
$protos = zeroes(long, $k) if (!defined($protos)); |
|
1781
|
|
|
|
|
|
|
$protos .= PDL->random($k)*$n; |
|
1782
|
|
|
|
|
|
|
checkprototypes($protos->inplace, $n); |
|
1783
|
|
|
|
|
|
|
return $protos; |
|
1784
|
|
|
|
|
|
|
} |
|
1785
|
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
#line 1845 "Cluster.pd" |
|
1787
|
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
=pod |
|
1789
|
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
=head2 randompartition |
|
1791
|
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
=for sig |
|
1793
|
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
Signature: (int k; int n; [o]partition(n)) |
|
1795
|
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
Generate a partitioning of $n objects into $k clusters, |
|
1797
|
|
|
|
|
|
|
ensuring that every cluster contains at least one object. |
|
1798
|
|
|
|
|
|
|
Calls checkpartitions(). |
|
1799
|
|
|
|
|
|
|
This method is identical in functionality to randomassign(), |
|
1800
|
|
|
|
|
|
|
but may be faster if $k is significantly smaller than $n. |
|
1801
|
|
|
|
|
|
|
|
|
1802
|
|
|
|
|
|
|
See also: randomassign(), checkpartitions(), checkprototypes(), randomprototypes(). |
|
1803
|
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
=cut |
|
1805
|
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
sub randompartition { |
|
1807
|
|
|
|
|
|
|
my ($k,$n,$part) = @_; |
|
1808
|
|
|
|
|
|
|
$part = zeroes(long, $n) if (!defined($part)); |
|
1809
|
|
|
|
|
|
|
$part .= PDL->random($n)*$k; |
|
1810
|
|
|
|
|
|
|
checkpartitions($part->inplace, $k); |
|
1811
|
|
|
|
|
|
|
return $part; |
|
1812
|
|
|
|
|
|
|
} |
|
1813
|
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
#line 1884 "Cluster.pd" |
|
1815
|
|
|
|
|
|
|
##--------------------------------------------------------------------- |
|
1816
|
|
|
|
|
|
|
=pod |
|
1817
|
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
=head1 COMMON ARGUMENTS |
|
1819
|
|
|
|
|
|
|
|
|
1820
|
|
|
|
|
|
|
Many of the functions described above require one or |
|
1821
|
|
|
|
|
|
|
more of the following parameters: |
|
1822
|
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
=over 4 |
|
1824
|
|
|
|
|
|
|
|
|
1825
|
|
|
|
|
|
|
=item d |
|
1826
|
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
The number of features defined for each data element. |
|
1828
|
|
|
|
|
|
|
|
|
1829
|
|
|
|
|
|
|
=item n |
|
1830
|
|
|
|
|
|
|
|
|
1831
|
|
|
|
|
|
|
The number of data elements to be clustered. |
|
1832
|
|
|
|
|
|
|
|
|
1833
|
|
|
|
|
|
|
=item k |
|
1834
|
|
|
|
|
|
|
|
|
1835
|
|
|
|
|
|
|
=item nclusters |
|
1836
|
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
The number of desired clusters. |
|
1838
|
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
=item data(d,n) |
|
1840
|
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
A matrix representing the data to be clustered, double-valued. |
|
1842
|
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
=item mask(d,n) |
|
1844
|
|
|
|
|
|
|
|
|
1845
|
|
|
|
|
|
|
A matrix indicating which data values are missing. If |
|
1846
|
|
|
|
|
|
|
mask(i,j) == 0, then data(i,j) is treated as missing. |
|
1847
|
|
|
|
|
|
|
|
|
1848
|
|
|
|
|
|
|
=item weights(d) |
|
1849
|
|
|
|
|
|
|
|
|
1850
|
|
|
|
|
|
|
The (feature-) weights that are used to calculate the distance. |
|
1851
|
|
|
|
|
|
|
|
|
1852
|
|
|
|
|
|
|
B Not all distance metrics make use of weights; |
|
1853
|
|
|
|
|
|
|
you must provide some nonetheless. |
|
1854
|
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
=item clusterids(n) |
|
1856
|
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
A clustering solution. $clusterids() maps data elements |
|
1858
|
|
|
|
|
|
|
(row indices in $data()) to values in the range [0,$k-1]. |
|
1859
|
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
=back |
|
1861
|
|
|
|
|
|
|
|
|
1862
|
|
|
|
|
|
|
=cut |
|
1863
|
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
##--------------------------------------------------------------------- |
|
1865
|
|
|
|
|
|
|
=pod |
|
1866
|
|
|
|
|
|
|
|
|
1867
|
|
|
|
|
|
|
=head2 Distance Metrics |
|
1868
|
|
|
|
|
|
|
|
|
1869
|
|
|
|
|
|
|
Distances between data elements (and cluster centroids, where applicable) |
|
1870
|
|
|
|
|
|
|
are computed using one of a number of built-in metrics. Which metric |
|
1871
|
|
|
|
|
|
|
is to be used for a given computation is indicated by a character |
|
1872
|
|
|
|
|
|
|
flag denoted above with $distFlag(). In the following, w[i] represents |
|
1873
|
|
|
|
|
|
|
a weighting factor in the $weights() matrix, and $W represents the total |
|
1874
|
|
|
|
|
|
|
of all weights. |
|
1875
|
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
Currently implemented distance |
|
1877
|
|
|
|
|
|
|
metrics and the corresponding flags are: |
|
1878
|
|
|
|
|
|
|
|
|
1879
|
|
|
|
|
|
|
=over 4 |
|
1880
|
|
|
|
|
|
|
|
|
1881
|
|
|
|
|
|
|
=item e |
|
1882
|
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
Pseudo-Euclidean distance: |
|
1884
|
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
dist_e(x,y) = 1/W * sum_{i=1..d} w[i] * (x[i] - y[i])^2 |
|
1886
|
|
|
|
|
|
|
|
|
1887
|
|
|
|
|
|
|
Note that this is not the "true" Euclidean distance, which is defined as: |
|
1888
|
|
|
|
|
|
|
|
|
1889
|
|
|
|
|
|
|
dist_E(x,y) = sqrt( sum_{i=1..d} (x[i] - y[i])^2 ) |
|
1890
|
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
=item b |
|
1892
|
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
City-block ("Manhattan") distance: |
|
1894
|
|
|
|
|
|
|
|
|
1895
|
|
|
|
|
|
|
dist_b(x,y) = 1/W * sum_{i=1..d} w[i] * |x[i] - y[i]| |
|
1896
|
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
=item c |
|
1898
|
|
|
|
|
|
|
|
|
1899
|
|
|
|
|
|
|
Pearson correlation distance: |
|
1900
|
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
dist_c(x,y) = 1-r(x,y) |
|
1902
|
|
|
|
|
|
|
|
|
1903
|
|
|
|
|
|
|
where r is the Pearson correlation coefficient: |
|
1904
|
|
|
|
|
|
|
|
|
1905
|
|
|
|
|
|
|
r(x,y) = 1/d * sum_{i=1..d} (x[i]-mean(x))/stddev(x) * (y[i]-mean(y))/stddev(y) |
|
1906
|
|
|
|
|
|
|
|
|
1907
|
|
|
|
|
|
|
=item a |
|
1908
|
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
Absolute value of the correlation, |
|
1910
|
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
dist_a(x,y) = 1-|r(x,y)| |
|
1912
|
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
where r(x,y) is the Pearson correlation coefficient. |
|
1914
|
|
|
|
|
|
|
|
|
1915
|
|
|
|
|
|
|
=item u |
|
1916
|
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
Uncentered correlation (cosine of the angle): |
|
1918
|
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
dist_u(x,y) = 1-r_u(x,y) |
|
1920
|
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
where: |
|
1922
|
|
|
|
|
|
|
|
|
1923
|
|
|
|
|
|
|
r_u(x,y) = 1/d * sum_{i=1..d} (x[i]/sigma0(x)) * (y[i]/sigma0(y)) |
|
1924
|
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
and: |
|
1926
|
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
sigma0(w) = sqrt( 1/d * sum_{i=1..d} w[i]^2 ) |
|
1928
|
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
=item x |
|
1930
|
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
Absolute uncentered correlation, |
|
1932
|
|
|
|
|
|
|
|
|
1933
|
|
|
|
|
|
|
dist_x(x,y) = 1-|r_u(x,y)| |
|
1934
|
|
|
|
|
|
|
|
|
1935
|
|
|
|
|
|
|
=item s |
|
1936
|
|
|
|
|
|
|
|
|
1937
|
|
|
|
|
|
|
Spearman's rank correlation. |
|
1938
|
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
dist_s(x,y) = 1-r_s(x,y) ~= dist_c(ranks(x),ranks(y)) |
|
1940
|
|
|
|
|
|
|
|
|
1941
|
|
|
|
|
|
|
where r_s(x,y) is the Spearman rank correlation. Weights are ignored. |
|
1942
|
|
|
|
|
|
|
|
|
1943
|
|
|
|
|
|
|
=item k |
|
1944
|
|
|
|
|
|
|
|
|
1945
|
|
|
|
|
|
|
Kendall's tau (does not use weights). |
|
1946
|
|
|
|
|
|
|
|
|
1947
|
|
|
|
|
|
|
dist_k(x,y) = 1 - tau(x,y) |
|
1948
|
|
|
|
|
|
|
|
|
1949
|
|
|
|
|
|
|
=item (other values) |
|
1950
|
|
|
|
|
|
|
|
|
1951
|
|
|
|
|
|
|
For other values of dist, the default (Euclidean distance) is used. |
|
1952
|
|
|
|
|
|
|
|
|
1953
|
|
|
|
|
|
|
=back |
|
1954
|
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
=cut |
|
1956
|
|
|
|
|
|
|
|
|
1957
|
|
|
|
|
|
|
##--------------------------------------------------------------------- |
|
1958
|
|
|
|
|
|
|
=pod |
|
1959
|
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
=head2 Link Methods |
|
1961
|
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
For hierarchical clustering, the 'link method' must be specified |
|
1963
|
|
|
|
|
|
|
by a character flag, denoted above as $methodFlag. |
|
1964
|
|
|
|
|
|
|
Known link methods are: |
|
1965
|
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
=over 4 |
|
1967
|
|
|
|
|
|
|
|
|
1968
|
|
|
|
|
|
|
=item s |
|
1969
|
|
|
|
|
|
|
|
|
1970
|
|
|
|
|
|
|
Pairwise minimum-linkage ("single") clustering. |
|
1971
|
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
Defines the distance between two clusters as the |
|
1973
|
|
|
|
|
|
|
least distance between any two of their respective elements. |
|
1974
|
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
=item m |
|
1976
|
|
|
|
|
|
|
|
|
1977
|
|
|
|
|
|
|
Pairwise maximum-linkage ("complete") clustering. |
|
1978
|
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
Defines the distance between two clusters as the |
|
1980
|
|
|
|
|
|
|
greatest distance between any two of their respective elements. |
|
1981
|
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
=item a |
|
1983
|
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
Pairwise average-linkage clustering (centroid distance using arithmetic mean). |
|
1985
|
|
|
|
|
|
|
|
|
1986
|
|
|
|
|
|
|
Defines the distance between two clusters as the |
|
1987
|
|
|
|
|
|
|
distance between their respective centroids, where each |
|
1988
|
|
|
|
|
|
|
cluster centroid is defined as the arithmetic mean of |
|
1989
|
|
|
|
|
|
|
that cluster's elements. |
|
1990
|
|
|
|
|
|
|
|
|
1991
|
|
|
|
|
|
|
=item c |
|
1992
|
|
|
|
|
|
|
|
|
1993
|
|
|
|
|
|
|
Pairwise centroid-linkage clustering (centroid distance using median). |
|
1994
|
|
|
|
|
|
|
|
|
1995
|
|
|
|
|
|
|
Identifies the distance between two clusters as the |
|
1996
|
|
|
|
|
|
|
distance between their respective centroids, where each |
|
1997
|
|
|
|
|
|
|
cluster centroid is computed as the median of |
|
1998
|
|
|
|
|
|
|
that cluster's elements. |
|
1999
|
|
|
|
|
|
|
|
|
2000
|
|
|
|
|
|
|
=item (other values) |
|
2001
|
|
|
|
|
|
|
|
|
2002
|
|
|
|
|
|
|
Behavior for other values is currently undefined. |
|
2003
|
|
|
|
|
|
|
|
|
2004
|
|
|
|
|
|
|
=back |
|
2005
|
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
For the first three, either the distance matrix or the gene expression data is |
|
2007
|
|
|
|
|
|
|
sufficient to perform the clustering algorithm. For pairwise centroid-linkage |
|
2008
|
|
|
|
|
|
|
clustering, however, the gene expression data are always needed, even if the |
|
2009
|
|
|
|
|
|
|
distance matrix itself is available. |
|
2010
|
|
|
|
|
|
|
|
|
2011
|
|
|
|
|
|
|
=cut |
|
2012
|
|
|
|
|
|
|
|
|
2013
|
|
|
|
|
|
|
##--------------------------------------------------------------------- |
|
2014
|
|
|
|
|
|
|
=pod |
|
2015
|
|
|
|
|
|
|
|
|
2016
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
2017
|
|
|
|
|
|
|
|
|
2018
|
|
|
|
|
|
|
Perl by Larry Wall. |
|
2019
|
|
|
|
|
|
|
|
|
2020
|
|
|
|
|
|
|
PDL by Karl Glazebrook, Tuomas J. Lukka, Christian Soeller, and others. |
|
2021
|
|
|
|
|
|
|
|
|
2022
|
|
|
|
|
|
|
C Clustering Library by |
|
2023
|
|
|
|
|
|
|
Michiel de Hoon, |
|
2024
|
|
|
|
|
|
|
Seiya Imoto, |
|
2025
|
|
|
|
|
|
|
and Satoru Miyano. |
|
2026
|
|
|
|
|
|
|
|
|
2027
|
|
|
|
|
|
|
Orignal Algorithm::Cluster module by John Nolan and Michiel de Hoon. |
|
2028
|
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
=cut |
|
2030
|
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
##---------------------------------------------------------------------- |
|
2032
|
|
|
|
|
|
|
=pod |
|
2033
|
|
|
|
|
|
|
|
|
2034
|
|
|
|
|
|
|
=head1 KNOWN BUGS |
|
2035
|
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
Dimensional requirements are sometimes too strict. |
|
2037
|
|
|
|
|
|
|
|
|
2038
|
|
|
|
|
|
|
Passing weights to Spearman and Kendall link methods wastes space. |
|
2039
|
|
|
|
|
|
|
|
|
2040
|
|
|
|
|
|
|
=cut |
|
2041
|
|
|
|
|
|
|
|
|
2042
|
|
|
|
|
|
|
##--------------------------------------------------------------------- |
|
2043
|
|
|
|
|
|
|
=pod |
|
2044
|
|
|
|
|
|
|
|
|
2045
|
|
|
|
|
|
|
=head1 AUTHOR |
|
2046
|
|
|
|
|
|
|
|
|
2047
|
|
|
|
|
|
|
Bryan Jurish Emoocow@cpan.orgE wrote and maintains the PDL::Cluster distribution. |
|
2048
|
|
|
|
|
|
|
|
|
2049
|
|
|
|
|
|
|
Michiel de Hoon wrote the underlying C clustering library for cDNA microarray data. |
|
2050
|
|
|
|
|
|
|
|
|
2051
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
2052
|
|
|
|
|
|
|
|
|
2053
|
|
|
|
|
|
|
PDL::Cluster is a set of wrappers around the C Clustering library for cDNA microarray data. |
|
2054
|
|
|
|
|
|
|
|
|
2055
|
|
|
|
|
|
|
=over 4 |
|
2056
|
|
|
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
=item * |
|
2058
|
|
|
|
|
|
|
|
|
2059
|
|
|
|
|
|
|
The C clustering library for cDNA microarray data. |
|
2060
|
|
|
|
|
|
|
Copyright (C) 2002-2005 Michiel Jan Laurens de Hoon. |
|
2061
|
|
|
|
|
|
|
|
|
2062
|
|
|
|
|
|
|
This library was written at the Laboratory of DNA Information Analysis, |
|
2063
|
|
|
|
|
|
|
Human Genome Center, Institute of Medical Science, University of Tokyo, |
|
2064
|
|
|
|
|
|
|
4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan. |
|
2065
|
|
|
|
|
|
|
Contact: michiel.dehoon 'AT' riken.jp |
|
2066
|
|
|
|
|
|
|
|
|
2067
|
|
|
|
|
|
|
See the files F, F and F in the PDL::Cluster distribution |
|
2068
|
|
|
|
|
|
|
for details. |
|
2069
|
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
=item * |
|
2071
|
|
|
|
|
|
|
|
|
2072
|
|
|
|
|
|
|
PDL::Cluster wrappers copyright (C) Bryan Jurish 2005-2018. All rights reserved. |
|
2073
|
|
|
|
|
|
|
This package is free software, and entirely without warranty. |
|
2074
|
|
|
|
|
|
|
You may redistribute it and/or modify it under the same terms |
|
2075
|
|
|
|
|
|
|
as Perl itself. |
|
2076
|
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
=back |
|
2078
|
|
|
|
|
|
|
|
|
2079
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
2080
|
|
|
|
|
|
|
|
|
2081
|
|
|
|
|
|
|
perl(1), PDL(3perl), Algorithm::Cluster(3perl), cluster(1), |
|
2082
|
|
|
|
|
|
|
L |
|
2083
|
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
=cut |
|
2085
|
|
|
|
|
|
|
#line 2086 "Cluster.pm" |
|
2086
|
|
|
|
|
|
|
|
|
2087
|
|
|
|
|
|
|
# Exit with OK status |
|
2088
|
|
|
|
|
|
|
|
|
2089
|
|
|
|
|
|
|
1; |