2 |
26 Feb 07 |
jari |
1 |
/* |
2 |
26 Feb 07 |
jari |
Copyright @ 1999-2003, The Institute for Genomic Research (TIGR). |
2 |
26 Feb 07 |
jari |
All rights reserved. |
2 |
26 Feb 07 |
jari |
4 |
*/ |
2 |
26 Feb 07 |
jari |
5 |
/* |
2 |
26 Feb 07 |
jari |
* $RCSfile: KMC.java,v $ |
2 |
26 Feb 07 |
jari |
* $Revision: 1.5 $ |
2 |
26 Feb 07 |
jari |
* $Date: 2006/06/30 15:22:27 $ |
2 |
26 Feb 07 |
jari |
* $Author: braistedj $ |
2 |
26 Feb 07 |
jari |
* $State: Exp $ |
2 |
26 Feb 07 |
jari |
11 |
*/ |
2 |
26 Feb 07 |
jari |
12 |
package org.tigr.microarray.mev.cluster.algorithm.impl; |
2 |
26 Feb 07 |
jari |
13 |
|
2 |
26 Feb 07 |
jari |
14 |
import java.util.ArrayList; |
2 |
26 Feb 07 |
jari |
15 |
import java.util.Arrays; |
2 |
26 Feb 07 |
jari |
16 |
import java.util.Random; |
2 |
26 Feb 07 |
jari |
17 |
|
2 |
26 Feb 07 |
jari |
18 |
import org.tigr.microarray.mev.cluster.Cluster; |
2 |
26 Feb 07 |
jari |
19 |
import org.tigr.microarray.mev.cluster.Node; |
2 |
26 Feb 07 |
jari |
20 |
import org.tigr.microarray.mev.cluster.NodeList; |
2 |
26 Feb 07 |
jari |
21 |
import org.tigr.microarray.mev.cluster.NodeValue; |
2 |
26 Feb 07 |
jari |
22 |
import org.tigr.microarray.mev.cluster.NodeValueList; |
2 |
26 Feb 07 |
jari |
23 |
import org.tigr.microarray.mev.cluster.algorithm.AbortException; |
2 |
26 Feb 07 |
jari |
24 |
import org.tigr.microarray.mev.cluster.algorithm.AbstractAlgorithm; |
2 |
26 Feb 07 |
jari |
25 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmData; |
2 |
26 Feb 07 |
jari |
26 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmEvent; |
2 |
26 Feb 07 |
jari |
27 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmException; |
2 |
26 Feb 07 |
jari |
28 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmParameters; |
2 |
26 Feb 07 |
jari |
29 |
import org.tigr.util.FloatMatrix; |
2 |
26 Feb 07 |
jari |
30 |
import org.tigr.util.QSort; |
2 |
26 Feb 07 |
jari |
31 |
|
2 |
26 Feb 07 |
jari |
32 |
public class KMC extends AbstractAlgorithm { |
2 |
26 Feb 07 |
jari |
33 |
|
2 |
26 Feb 07 |
jari |
34 |
private boolean stop = false; |
2 |
26 Feb 07 |
jari |
35 |
|
2 |
26 Feb 07 |
jari |
36 |
private int function; |
2 |
26 Feb 07 |
jari |
37 |
private float factor; |
2 |
26 Feb 07 |
jari |
38 |
private boolean absolute; |
2 |
26 Feb 07 |
jari |
39 |
|
2 |
26 Feb 07 |
jari |
40 |
private int number_of_genes; |
2 |
26 Feb 07 |
jari |
41 |
private int number_of_samples; |
2 |
26 Feb 07 |
jari |
42 |
|
2 |
26 Feb 07 |
jari |
43 |
private FloatMatrix expMatrix; |
2 |
26 Feb 07 |
jari |
44 |
private boolean calculateMeans; |
2 |
26 Feb 07 |
jari |
45 |
private int iterations; |
2 |
26 Feb 07 |
jari |
46 |
private boolean converged; |
2 |
26 Feb 07 |
jari |
47 |
private boolean kmcGenes; //indicates gene clustering |
2 |
26 Feb 07 |
jari |
48 |
private int validN; |
2 |
26 Feb 07 |
jari |
49 |
|
2 |
26 Feb 07 |
jari |
50 |
private int hcl_function; |
2 |
26 Feb 07 |
jari |
51 |
private boolean hcl_absolute; |
2 |
26 Feb 07 |
jari |
52 |
|
2 |
26 Feb 07 |
jari |
//captures the interation number of each cluster reorganization |
2 |
26 Feb 07 |
jari |
//this will indicate when particular clusters stabalized |
2 |
26 Feb 07 |
jari |
55 |
int [] clusterConvergence; |
2 |
26 Feb 07 |
jari |
56 |
|
2 |
26 Feb 07 |
jari |
57 |
public AlgorithmData execute(AlgorithmData data) throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
58 |
|
2 |
26 Feb 07 |
jari |
59 |
AlgorithmParameters map = data.getParams(); |
2 |
26 Feb 07 |
jari |
60 |
|
2 |
26 Feb 07 |
jari |
61 |
function = map.getInt("distance-function", EUCLIDEAN); |
2 |
26 Feb 07 |
jari |
62 |
factor = map.getFloat("distance-factor", 1.0f); |
2 |
26 Feb 07 |
jari |
63 |
absolute = map.getBoolean("distance-absolute", false); |
2 |
26 Feb 07 |
jari |
64 |
calculateMeans = map.getBoolean("calculate-means", true); |
2 |
26 Feb 07 |
jari |
65 |
kmcGenes = map.getBoolean("kmc-cluster-genes", true); |
2 |
26 Feb 07 |
jari |
66 |
|
2 |
26 Feb 07 |
jari |
67 |
hcl_function = map.getInt("hcl-distance-function", EUCLIDEAN); |
2 |
26 Feb 07 |
jari |
68 |
hcl_absolute = map.getBoolean("hcl-distance-absolute", false); |
2 |
26 Feb 07 |
jari |
69 |
|
2 |
26 Feb 07 |
jari |
70 |
int number_of_iterations = map.getInt("number-of-iterations", 0); |
2 |
26 Feb 07 |
jari |
71 |
int number_of_clusters = map.getInt("number-of-clusters", 0); |
2 |
26 Feb 07 |
jari |
72 |
boolean hierarchical_tree = map.getBoolean("hierarchical-tree", false); |
2 |
26 Feb 07 |
jari |
73 |
int method_linkage = map.getInt("method-linkage", 0); |
2 |
26 Feb 07 |
jari |
74 |
boolean calculate_genes = map.getBoolean("calculate-genes", false); |
2 |
26 Feb 07 |
jari |
75 |
boolean calculate_experiments = map.getBoolean("calculate-experiments", false); |
2 |
26 Feb 07 |
jari |
76 |
|
2 |
26 Feb 07 |
jari |
77 |
this.expMatrix = data.getMatrix("experiment"); |
2 |
26 Feb 07 |
jari |
78 |
|
2 |
26 Feb 07 |
jari |
79 |
number_of_genes = this.expMatrix.getRowDimension(); |
2 |
26 Feb 07 |
jari |
80 |
number_of_samples = this.expMatrix.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
81 |
|
2 |
26 Feb 07 |
jari |
82 |
this.clusterConvergence = new int[number_of_clusters]; |
2 |
26 Feb 07 |
jari |
83 |
|
2 |
26 Feb 07 |
jari |
//JCB MODIFIED FOR KMEDIANS |
2 |
26 Feb 07 |
jari |
85 |
KMCluster[] clusters; |
2 |
26 Feb 07 |
jari |
86 |
FloatMatrix means = null; |
2 |
26 Feb 07 |
jari |
87 |
FloatMatrix medians = null; |
2 |
26 Feb 07 |
jari |
88 |
FloatMatrix variances = null; |
2 |
26 Feb 07 |
jari |
89 |
|
2 |
26 Feb 07 |
jari |
90 |
if(calculateMeans){ |
2 |
26 Feb 07 |
jari |
91 |
clusters = calculate(number_of_genes, number_of_clusters, number_of_iterations); |
2 |
26 Feb 07 |
jari |
92 |
means = getMeans(clusters); |
2 |
26 Feb 07 |
jari |
93 |
variances = getVariances(clusters, means); |
2 |
26 Feb 07 |
jari |
94 |
} |
2 |
26 Feb 07 |
jari |
//or else calculating k-medians |
2 |
26 Feb 07 |
jari |
96 |
else{ |
2 |
26 Feb 07 |
jari |
97 |
clusters = calculateMedians(number_of_genes, number_of_clusters, number_of_iterations); |
2 |
26 Feb 07 |
jari |
98 |
medians = getMedians(clusters); |
2 |
26 Feb 07 |
jari |
99 |
variances = getVariances(clusters, medians); |
2 |
26 Feb 07 |
jari |
100 |
} |
2 |
26 Feb 07 |
jari |
101 |
|
2 |
26 Feb 07 |
jari |
102 |
|
2 |
26 Feb 07 |
jari |
//clusterConvergence captures the last iteration that a cluster |
2 |
26 Feb 07 |
jari |
//gave up or recieved an element |
2 |
26 Feb 07 |
jari |
//Use the results to sort based on convergence iteration |
2 |
26 Feb 07 |
jari |
//if two clusters converged on the iteration rank by decreasing size |
2 |
26 Feb 07 |
jari |
107 |
|
2 |
26 Feb 07 |
jari |
//sort clusters (before posible HCL), means and variances |
2 |
26 Feb 07 |
jari |
109 |
|
2 |
26 Feb 07 |
jari |
//inefficient but convert to a matrix of floats to use our quicksort |
2 |
26 Feb 07 |
jari |
//and use the sorted indices array from qsort |
2 |
26 Feb 07 |
jari |
112 |
float [] tempConv = new float[clusterConvergence.length]; |
2 |
26 Feb 07 |
jari |
113 |
for(int i = 0; i < clusterConvergence.length; i++) { |
2 |
26 Feb 07 |
jari |
114 |
tempConv[i] = (float)clusterConvergence[i]; |
2 |
26 Feb 07 |
jari |
115 |
} |
2 |
26 Feb 07 |
jari |
116 |
|
2 |
26 Feb 07 |
jari |
117 |
QSort qsort = new QSort(tempConv); //automatically sorts |
2 |
26 Feb 07 |
jari |
118 |
tempConv = qsort.getSorted(); |
2 |
26 Feb 07 |
jari |
119 |
int [] sortedClusterIndices = qsort.getOrigIndx(); // sorted ordering |
2 |
26 Feb 07 |
jari |
120 |
int temp; |
2 |
26 Feb 07 |
jari |
121 |
|
2 |
26 Feb 07 |
jari |
//maybe bubble through array to sort out ties based on cluster size |
2 |
26 Feb 07 |
jari |
123 |
for (int i = 0; i < number_of_clusters-1; i++) { |
2 |
26 Feb 07 |
jari |
124 |
for(int j = 0; j < number_of_clusters-1-i; j++) { |
2 |
26 Feb 07 |
jari |
125 |
|
2 |
26 Feb 07 |
jari |
//if they have the sampe iteration count we might swap the order |
2 |
26 Feb 07 |
jari |
127 |
if(tempConv[j] == tempConv[j+1]) { |
2 |
26 Feb 07 |
jari |
128 |
|
2 |
26 Feb 07 |
jari |
//check sizes |
2 |
26 Feb 07 |
jari |
130 |
if(clusters[sortedClusterIndices[j]].size() |
2 |
26 Feb 07 |
jari |
131 |
< clusters[sortedClusterIndices[j+1]].size()) { |
2 |
26 Feb 07 |
jari |
//if we need to swap swap sorted indices |
2 |
26 Feb 07 |
jari |
//and swap the conversion indices will not swap since |
2 |
26 Feb 07 |
jari |
//the values are already equal |
2 |
26 Feb 07 |
jari |
135 |
temp = sortedClusterIndices[j]; |
2 |
26 Feb 07 |
jari |
136 |
sortedClusterIndices[j] = sortedClusterIndices[j+1]; |
2 |
26 Feb 07 |
jari |
137 |
sortedClusterIndices[j+1] = temp; |
2 |
26 Feb 07 |
jari |
138 |
} |
2 |
26 Feb 07 |
jari |
139 |
} |
2 |
26 Feb 07 |
jari |
140 |
} |
2 |
26 Feb 07 |
jari |
141 |
} |
2 |
26 Feb 07 |
jari |
142 |
|
2 |
26 Feb 07 |
jari |
143 |
|
2 |
26 Feb 07 |
jari |
//sort it out... |
2 |
26 Feb 07 |
jari |
145 |
KMCluster [] newClusterOrder = new KMCluster[clusters.length]; |
2 |
26 Feb 07 |
jari |
146 |
FloatMatrix newMeansMedsOrder = new FloatMatrix(clusters.length, number_of_samples); |
2 |
26 Feb 07 |
jari |
147 |
FloatMatrix newVariancesOrder = new FloatMatrix(clusters.length, number_of_samples); |
2 |
26 Feb 07 |
jari |
148 |
|
2 |
26 Feb 07 |
jari |
149 |
for(int i = 0; i < clusters.length; i++) { |
2 |
26 Feb 07 |
jari |
//sort clusters |
2 |
26 Feb 07 |
jari |
151 |
newClusterOrder[i] = clusters[sortedClusterIndices[i]]; |
2 |
26 Feb 07 |
jari |
//sort variances |
2 |
26 Feb 07 |
jari |
153 |
newVariancesOrder.A[i] = variances.A[sortedClusterIndices[i]]; |
2 |
26 Feb 07 |
jari |
154 |
|
2 |
26 Feb 07 |
jari |
//sort means or medians |
2 |
26 Feb 07 |
jari |
156 |
if(calculateMeans) |
2 |
26 Feb 07 |
jari |
157 |
newMeansMedsOrder.A[i] = means.A[sortedClusterIndices[i]]; |
2 |
26 Feb 07 |
jari |
158 |
else |
2 |
26 Feb 07 |
jari |
159 |
newMeansMedsOrder.A[i] = medians.A[sortedClusterIndices[i]]; |
2 |
26 Feb 07 |
jari |
160 |
|
2 |
26 Feb 07 |
jari |
//use this loop to order the cluster convergence information |
2 |
26 Feb 07 |
jari |
162 |
clusterConvergence[i] = (int)(tempConv[i]); |
2 |
26 Feb 07 |
jari |
163 |
} |
2 |
26 Feb 07 |
jari |
164 |
|
2 |
26 Feb 07 |
jari |
//reassign sorted object to main objects |
2 |
26 Feb 07 |
jari |
166 |
clusters = newClusterOrder; |
2 |
26 Feb 07 |
jari |
167 |
variances = newVariancesOrder; |
2 |
26 Feb 07 |
jari |
168 |
if(calculateMeans) |
2 |
26 Feb 07 |
jari |
169 |
means = newMeansMedsOrder; |
2 |
26 Feb 07 |
jari |
170 |
else |
2 |
26 Feb 07 |
jari |
171 |
medians = newMeansMedsOrder; |
2 |
26 Feb 07 |
jari |
172 |
|
2 |
26 Feb 07 |
jari |
173 |
AlgorithmEvent event = null; |
2 |
26 Feb 07 |
jari |
174 |
if (hierarchical_tree) { |
2 |
26 Feb 07 |
jari |
175 |
event = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, clusters.length, "Calculate Hierarchical Trees"); |
2 |
26 Feb 07 |
jari |
176 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
177 |
event.setIntValue(0); |
2 |
26 Feb 07 |
jari |
178 |
event.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
179 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
180 |
} |
2 |
26 Feb 07 |
jari |
181 |
|
2 |
26 Feb 07 |
jari |
182 |
Cluster result_cluster = new Cluster(); |
2 |
26 Feb 07 |
jari |
183 |
NodeList nodeList = result_cluster.getNodeList(); |
2 |
26 Feb 07 |
jari |
184 |
int[] features; |
2 |
26 Feb 07 |
jari |
185 |
for (int i=0; i<clusters.length; i++) { |
2 |
26 Feb 07 |
jari |
186 |
if (stop) { |
2 |
26 Feb 07 |
jari |
187 |
throw new AbortException(); |
2 |
26 Feb 07 |
jari |
188 |
} |
2 |
26 Feb 07 |
jari |
189 |
features = convert2int(clusters[i]); |
2 |
26 Feb 07 |
jari |
190 |
|
2 |
26 Feb 07 |
jari |
191 |
Node node = new Node(features); |
2 |
26 Feb 07 |
jari |
192 |
nodeList.addNode(node); |
2 |
26 Feb 07 |
jari |
193 |
if (hierarchical_tree) { |
2 |
26 Feb 07 |
jari |
194 |
node.setValues(calculateHierarchicalTree(features, method_linkage, calculate_genes, calculate_experiments)); |
2 |
26 Feb 07 |
jari |
195 |
event.setIntValue(i+1); |
2 |
26 Feb 07 |
jari |
196 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
197 |
} |
2 |
26 Feb 07 |
jari |
198 |
} |
2 |
26 Feb 07 |
jari |
199 |
|
2 |
26 Feb 07 |
jari |
// prepare the result |
2 |
26 Feb 07 |
jari |
201 |
AlgorithmData result = new AlgorithmData(); |
2 |
26 Feb 07 |
jari |
202 |
result.addCluster("cluster", result_cluster); |
2 |
26 Feb 07 |
jari |
203 |
|
2 |
26 Feb 07 |
jari |
204 |
|
2 |
26 Feb 07 |
jari |
//JCB K-Medians alteration |
2 |
26 Feb 07 |
jari |
206 |
if(calculateMeans) |
2 |
26 Feb 07 |
jari |
207 |
result.addMatrix("clusters_means", means); |
2 |
26 Feb 07 |
jari |
208 |
else |
2 |
26 Feb 07 |
jari |
209 |
result.addMatrix("clusters_means", medians); //use the same key so that KMCGUI can remain unchanged |
2 |
26 Feb 07 |
jari |
210 |
|
2 |
26 Feb 07 |
jari |
211 |
result.addMatrix("clusters_variances", variances); |
2 |
26 Feb 07 |
jari |
212 |
result.addParam("iterations", String.valueOf(getIterations())); |
2 |
26 Feb 07 |
jari |
213 |
result.addParam("converged", String.valueOf(getConverged())); |
2 |
26 Feb 07 |
jari |
214 |
result.addIntArray("convergence-iterations", clusterConvergence); |
2 |
26 Feb 07 |
jari |
215 |
|
2 |
26 Feb 07 |
jari |
216 |
return result; |
2 |
26 Feb 07 |
jari |
217 |
} |
2 |
26 Feb 07 |
jari |
218 |
|
2 |
26 Feb 07 |
jari |
219 |
private NodeValueList calculateHierarchicalTree(int[] features, int method, boolean genes, boolean experiments) throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
220 |
NodeValueList nodeList = new NodeValueList(); |
2 |
26 Feb 07 |
jari |
221 |
AlgorithmData data = new AlgorithmData(); |
2 |
26 Feb 07 |
jari |
222 |
FloatMatrix experiment; |
2 |
26 Feb 07 |
jari |
223 |
|
2 |
26 Feb 07 |
jari |
// Create a sub experiment appropriate for gene clustering or experiment clusutering |
2 |
26 Feb 07 |
jari |
225 |
if(kmcGenes) |
2 |
26 Feb 07 |
jari |
226 |
experiment = getSubExperiment(this.expMatrix, features); |
2 |
26 Feb 07 |
jari |
227 |
else |
2 |
26 Feb 07 |
jari |
228 |
experiment = getSubExperimentReducedCols(this.expMatrix, features); |
2 |
26 Feb 07 |
jari |
229 |
|
2 |
26 Feb 07 |
jari |
230 |
data.addMatrix("experiment", experiment); |
2 |
26 Feb 07 |
jari |
231 |
data.addParam("hcl-distance-function", String.valueOf(this.hcl_function)); |
2 |
26 Feb 07 |
jari |
232 |
data.addParam("hcl-distance-absolute", String.valueOf(this.hcl_absolute)); |
2 |
26 Feb 07 |
jari |
233 |
data.addParam("method-linkage", String.valueOf(method)); |
2 |
26 Feb 07 |
jari |
234 |
HCL hcl = new HCL(); |
2 |
26 Feb 07 |
jari |
235 |
AlgorithmData result; |
2 |
26 Feb 07 |
jari |
236 |
|
2 |
26 Feb 07 |
jari |
237 |
if (genes) { |
2 |
26 Feb 07 |
jari |
238 |
data.addParam("calculate-genes", String.valueOf(true)); |
2 |
26 Feb 07 |
jari |
239 |
result = hcl.execute(data); |
2 |
26 Feb 07 |
jari |
240 |
validate(result); |
2 |
26 Feb 07 |
jari |
241 |
addNodeValues(nodeList, result); |
2 |
26 Feb 07 |
jari |
242 |
} |
2 |
26 Feb 07 |
jari |
243 |
if (experiments) { |
2 |
26 Feb 07 |
jari |
244 |
data.addParam("calculate-genes", String.valueOf(false)); |
2 |
26 Feb 07 |
jari |
245 |
result = hcl.execute(data); |
2 |
26 Feb 07 |
jari |
246 |
validate(result); |
2 |
26 Feb 07 |
jari |
247 |
addNodeValues(nodeList, result); |
2 |
26 Feb 07 |
jari |
248 |
} |
2 |
26 Feb 07 |
jari |
249 |
return nodeList; |
2 |
26 Feb 07 |
jari |
250 |
} |
2 |
26 Feb 07 |
jari |
251 |
|
2 |
26 Feb 07 |
jari |
252 |
private void addNodeValues(NodeValueList target_list, AlgorithmData source_result) { |
2 |
26 Feb 07 |
jari |
253 |
target_list.addNodeValue(new NodeValue("child-1-array", source_result.getIntArray("child-1-array"))); |
2 |
26 Feb 07 |
jari |
254 |
target_list.addNodeValue(new NodeValue("child-2-array", source_result.getIntArray("child-2-array"))); |
2 |
26 Feb 07 |
jari |
255 |
target_list.addNodeValue(new NodeValue("node-order", source_result.getIntArray("node-order"))); |
2 |
26 Feb 07 |
jari |
256 |
target_list.addNodeValue(new NodeValue("height", source_result.getMatrix("height").getRowPackedCopy())); |
2 |
26 Feb 07 |
jari |
257 |
} |
2 |
26 Feb 07 |
jari |
258 |
|
2 |
26 Feb 07 |
jari |
259 |
private FloatMatrix getSubExperiment(FloatMatrix experiment, int[] features) { |
2 |
26 Feb 07 |
jari |
260 |
FloatMatrix subExperiment = new FloatMatrix(features.length, experiment.getColumnDimension()); |
2 |
26 Feb 07 |
jari |
261 |
for (int i=0; i<features.length; i++) { |
2 |
26 Feb 07 |
jari |
262 |
subExperiment.A[i] = experiment.A[features[i]]; |
2 |
26 Feb 07 |
jari |
263 |
} |
2 |
26 Feb 07 |
jari |
264 |
return subExperiment; |
2 |
26 Feb 07 |
jari |
265 |
} |
2 |
26 Feb 07 |
jari |
266 |
|
2 |
26 Feb 07 |
jari |
267 |
/** |
2 |
26 Feb 07 |
jari |
* Creates a matrix with reduced columns (samples) as during experiment clustering |
2 |
26 Feb 07 |
jari |
269 |
*/ |
2 |
26 Feb 07 |
jari |
270 |
private FloatMatrix getSubExperimentReducedCols(FloatMatrix experiment, int[] features) { |
2 |
26 Feb 07 |
jari |
271 |
FloatMatrix copyMatrix = experiment.copy(); |
2 |
26 Feb 07 |
jari |
272 |
FloatMatrix subExperiment = new FloatMatrix(features.length, copyMatrix.getColumnDimension()); |
2 |
26 Feb 07 |
jari |
273 |
for (int i=0; i<features.length; i++) { |
2 |
26 Feb 07 |
jari |
274 |
subExperiment.A[i] = copyMatrix.A[features[i]]; |
2 |
26 Feb 07 |
jari |
275 |
} |
2 |
26 Feb 07 |
jari |
276 |
subExperiment = subExperiment.transpose(); |
2 |
26 Feb 07 |
jari |
277 |
return subExperiment; |
2 |
26 Feb 07 |
jari |
278 |
} |
2 |
26 Feb 07 |
jari |
279 |
|
2 |
26 Feb 07 |
jari |
280 |
/** |
2 |
26 Feb 07 |
jari |
* Checking the result of hcl algorithm calculation. |
2 |
26 Feb 07 |
jari |
* @throws AlgorithmException, if the result is incorrect. |
2 |
26 Feb 07 |
jari |
283 |
*/ |
2 |
26 Feb 07 |
jari |
284 |
private void validate(AlgorithmData result) throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
285 |
if (result.getIntArray("child-1-array") == null) { |
2 |
26 Feb 07 |
jari |
286 |
throw new AlgorithmException("parameter 'child-1-array' is null"); |
2 |
26 Feb 07 |
jari |
287 |
} |
2 |
26 Feb 07 |
jari |
288 |
if (result.getIntArray("child-2-array") == null) { |
2 |
26 Feb 07 |
jari |
289 |
throw new AlgorithmException("parameter 'child-2-array' is null"); |
2 |
26 Feb 07 |
jari |
290 |
} |
2 |
26 Feb 07 |
jari |
291 |
if (result.getIntArray("node-order") == null) { |
2 |
26 Feb 07 |
jari |
292 |
throw new AlgorithmException("parameter 'node-order' is null"); |
2 |
26 Feb 07 |
jari |
293 |
} |
2 |
26 Feb 07 |
jari |
294 |
if (result.getMatrix("height") == null) { |
2 |
26 Feb 07 |
jari |
295 |
throw new AlgorithmException("parameter 'height' is null"); |
2 |
26 Feb 07 |
jari |
296 |
} |
2 |
26 Feb 07 |
jari |
297 |
} |
2 |
26 Feb 07 |
jari |
298 |
|
2 |
26 Feb 07 |
jari |
299 |
private int[] convert2int(ArrayList source) { |
2 |
26 Feb 07 |
jari |
300 |
int[] int_matrix = new int[source.size()]; |
2 |
26 Feb 07 |
jari |
301 |
for (int i=0; i<int_matrix.length; i++) { |
2 |
26 Feb 07 |
jari |
302 |
int_matrix[i] = (int)((Float)source.get(i)).floatValue(); |
2 |
26 Feb 07 |
jari |
303 |
} |
2 |
26 Feb 07 |
jari |
304 |
return int_matrix; |
2 |
26 Feb 07 |
jari |
305 |
} |
2 |
26 Feb 07 |
jari |
306 |
|
2 |
26 Feb 07 |
jari |
307 |
private KMCluster[] calculate(int number_of_genes, int number_of_clusters, int number_of_iterations) throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
308 |
int current; // index for a tuple being checking |
2 |
26 Feb 07 |
jari |
309 |
int i; |
2 |
26 Feb 07 |
jari |
310 |
int address = 0; // index for a cluster that is nearest to an object |
2 |
26 Feb 07 |
jari |
311 |
int counter = 0; |
2 |
26 Feb 07 |
jari |
312 |
float dissim[] = new float[number_of_clusters]; // dissimilarities between two data items |
2 |
26 Feb 07 |
jari |
313 |
int[] location = new int[number_of_genes]; |
2 |
26 Feb 07 |
jari |
314 |
Float[] elements = new Float[number_of_genes]; |
2 |
26 Feb 07 |
jari |
315 |
KMCluster[] clusters = new KMCluster[number_of_clusters]; |
2 |
26 Feb 07 |
jari |
316 |
for (i=0; i<clusters.length; i++) { // initialize array clusters |
2 |
26 Feb 07 |
jari |
317 |
clusters[i] = new KMCluster(); |
2 |
26 Feb 07 |
jari |
318 |
} |
2 |
26 Feb 07 |
jari |
319 |
int clusterIndex = 0; |
2 |
26 Feb 07 |
jari |
320 |
Random random = new Random(); |
2 |
26 Feb 07 |
jari |
321 |
for (i=0; i<number_of_genes; i++) { |
2 |
26 Feb 07 |
jari |
322 |
clusterIndex = (int)Math.floor(random.nextFloat()*number_of_clusters); |
2 |
26 Feb 07 |
jari |
323 |
clusterIndex = Math.min(clusterIndex, number_of_clusters-1); |
2 |
26 Feb 07 |
jari |
324 |
elements[i] = new Float(i); |
2 |
26 Feb 07 |
jari |
325 |
location[i] = clusterIndex; |
2 |
26 Feb 07 |
jari |
326 |
clusters[clusterIndex].add(elements[i]); |
2 |
26 Feb 07 |
jari |
327 |
} |
2 |
26 Feb 07 |
jari |
328 |
|
2 |
26 Feb 07 |
jari |
329 |
AlgorithmEvent event = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, 200); |
2 |
26 Feb 07 |
jari |
330 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
331 |
|
2 |
26 Feb 07 |
jari |
332 |
int currentProgress = 0; |
2 |
26 Feb 07 |
jari |
333 |
int oldCurrentProgress = 0; |
2 |
26 Feb 07 |
jari |
334 |
double Factor=200/(double)(number_of_genes*number_of_iterations); |
2 |
26 Feb 07 |
jari |
335 |
for (i=0; i<number_of_clusters; i++) { |
2 |
26 Feb 07 |
jari |
336 |
clusters[i].calculateMean(); |
2 |
26 Feb 07 |
jari |
337 |
} |
2 |
26 Feb 07 |
jari |
338 |
current = 0; |
2 |
26 Feb 07 |
jari |
339 |
int iterations = 0; |
2 |
26 Feb 07 |
jari |
340 |
boolean converged = false; |
2 |
26 Feb 07 |
jari |
341 |
int reallocations = 0; |
2 |
26 Feb 07 |
jari |
342 |
while (counter != number_of_genes*number_of_iterations) { |
2 |
26 Feb 07 |
jari |
343 |
if (stop) { |
2 |
26 Feb 07 |
jari |
344 |
throw new AbortException(); |
2 |
26 Feb 07 |
jari |
345 |
} |
2 |
26 Feb 07 |
jari |
346 |
currentProgress = (int)(counter*Factor); |
2 |
26 Feb 07 |
jari |
347 |
if (currentProgress > oldCurrentProgress) { |
2 |
26 Feb 07 |
jari |
348 |
event.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
349 |
event.setIntValue(currentProgress); |
2 |
26 Feb 07 |
jari |
350 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
351 |
oldCurrentProgress = currentProgress; |
2 |
26 Feb 07 |
jari |
352 |
} |
2 |
26 Feb 07 |
jari |
353 |
for (i=0; i<number_of_clusters; i++) { |
2 |
26 Feb 07 |
jari |
354 |
dissim[i] = ExperimentUtil.geneDistance(expMatrix, clusters[i].getMean(), current, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
355 |
} |
2 |
26 Feb 07 |
jari |
356 |
address = findNearest(dissim); |
2 |
26 Feb 07 |
jari |
357 |
if (address != location[current]) { |
2 |
26 Feb 07 |
jari |
358 |
reallocations++; |
2 |
26 Feb 07 |
jari |
359 |
|
2 |
26 Feb 07 |
jari |
360 |
clusters[location[current]].updateMeanForLoosingCluster((int)(elements[current].intValue())); |
2 |
26 Feb 07 |
jari |
361 |
clusters[location[current]].remove(elements[current]); |
2 |
26 Feb 07 |
jari |
362 |
|
2 |
26 Feb 07 |
jari |
363 |
clusters[address].updateMeanForWinningCluster(elements[current].intValue()); |
2 |
26 Feb 07 |
jari |
364 |
clusters[address].add(elements[current]); |
2 |
26 Feb 07 |
jari |
365 |
|
2 |
26 Feb 07 |
jari |
//clusters[location[current]].calculateMean(); |
2 |
26 Feb 07 |
jari |
//clusters[address].calculateMean(); |
2 |
26 Feb 07 |
jari |
368 |
location[current]=address; |
2 |
26 Feb 07 |
jari |
369 |
|
2 |
26 Feb 07 |
jari |
//tally the exchange of gene from old loc to current loc |
2 |
26 Feb 07 |
jari |
//this indicates that an exchange occurred on this iteration |
2 |
26 Feb 07 |
jari |
//used +1 so the first iteration is labeled as 1 for reporting to user |
2 |
26 Feb 07 |
jari |
373 |
this.clusterConvergence[location[current]] = iterations+1; |
2 |
26 Feb 07 |
jari |
374 |
this.clusterConvergence[address] = iterations+1; |
2 |
26 Feb 07 |
jari |
375 |
} |
2 |
26 Feb 07 |
jari |
376 |
current++; |
2 |
26 Feb 07 |
jari |
377 |
if (current == number_of_genes) { |
2 |
26 Feb 07 |
jari |
378 |
current = 0; |
2 |
26 Feb 07 |
jari |
379 |
} |
2 |
26 Feb 07 |
jari |
380 |
counter++; |
2 |
26 Feb 07 |
jari |
381 |
if (counter%number_of_genes == 0) { |
2 |
26 Feb 07 |
jari |
382 |
iterations++; |
2 |
26 Feb 07 |
jari |
383 |
if (reallocations == 0) { |
2 |
26 Feb 07 |
jari |
384 |
converged = true; |
2 |
26 Feb 07 |
jari |
385 |
break; |
2 |
26 Feb 07 |
jari |
386 |
} else { |
2 |
26 Feb 07 |
jari |
387 |
event.setId(AlgorithmEvent.MONITOR_VALUE); |
2 |
26 Feb 07 |
jari |
388 |
event.setIntValue(reallocations); |
2 |
26 Feb 07 |
jari |
389 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
390 |
reallocations = 0; |
2 |
26 Feb 07 |
jari |
391 |
} |
2 |
26 Feb 07 |
jari |
392 |
} |
2 |
26 Feb 07 |
jari |
393 |
} |
2 |
26 Feb 07 |
jari |
394 |
|
2 |
26 Feb 07 |
jari |
395 |
event.setId(AlgorithmEvent.MONITOR_VALUE); |
2 |
26 Feb 07 |
jari |
396 |
event.setIntValue(-1); |
2 |
26 Feb 07 |
jari |
397 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
398 |
|
2 |
26 Feb 07 |
jari |
399 |
setIterations(iterations); |
2 |
26 Feb 07 |
jari |
400 |
setConverged(converged); |
2 |
26 Feb 07 |
jari |
401 |
|
2 |
26 Feb 07 |
jari |
402 |
return clusters; |
2 |
26 Feb 07 |
jari |
403 |
} |
2 |
26 Feb 07 |
jari |
404 |
|
2 |
26 Feb 07 |
jari |
405 |
|
2 |
26 Feb 07 |
jari |
406 |
|
2 |
26 Feb 07 |
jari |
407 |
/** |
2 |
26 Feb 07 |
jari |
* Code added to allow running of K-medians |
2 |
26 Feb 07 |
jari |
409 |
*/ |
2 |
26 Feb 07 |
jari |
410 |
private KMCluster[] calculateMedians(int number_of_genes, int number_of_clusters, int number_of_iterations) throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
411 |
int current; // index for a tuple being checking |
2 |
26 Feb 07 |
jari |
412 |
int i; |
2 |
26 Feb 07 |
jari |
413 |
int address = 0; // index for a cluster that is nearest to an object |
2 |
26 Feb 07 |
jari |
414 |
int counter = 0; |
2 |
26 Feb 07 |
jari |
415 |
float dissim[] = new float[number_of_clusters]; // dissimilarities between two data items |
2 |
26 Feb 07 |
jari |
416 |
int[] location = new int[number_of_genes]; |
2 |
26 Feb 07 |
jari |
417 |
Float[] elements = new Float[number_of_genes]; |
2 |
26 Feb 07 |
jari |
418 |
KMCluster[] clusters = new KMCluster[number_of_clusters]; |
2 |
26 Feb 07 |
jari |
419 |
for (i=0; i<clusters.length; i++) { // initialize array clusters |
2 |
26 Feb 07 |
jari |
420 |
clusters[i] = new KMCluster(); |
2 |
26 Feb 07 |
jari |
421 |
} |
2 |
26 Feb 07 |
jari |
422 |
int clusterIndex = 0; |
2 |
26 Feb 07 |
jari |
423 |
Random random = new Random(); |
2 |
26 Feb 07 |
jari |
424 |
for (i=0; i<number_of_genes; i++) { |
2 |
26 Feb 07 |
jari |
425 |
clusterIndex = (int)Math.floor(random.nextFloat()*number_of_clusters); |
2 |
26 Feb 07 |
jari |
426 |
clusterIndex = Math.min(clusterIndex, number_of_clusters-1); |
2 |
26 Feb 07 |
jari |
427 |
elements[i] = new Float(i); |
2 |
26 Feb 07 |
jari |
428 |
location[i] = clusterIndex; |
2 |
26 Feb 07 |
jari |
429 |
clusters[clusterIndex].add(elements[i]); |
2 |
26 Feb 07 |
jari |
430 |
} |
2 |
26 Feb 07 |
jari |
431 |
|
2 |
26 Feb 07 |
jari |
432 |
AlgorithmEvent event = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, 200); |
2 |
26 Feb 07 |
jari |
433 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
434 |
|
2 |
26 Feb 07 |
jari |
435 |
int currentProgress = 0; |
2 |
26 Feb 07 |
jari |
436 |
int oldCurrentProgress = 0; |
2 |
26 Feb 07 |
jari |
437 |
double Factor=200/(double)(number_of_genes*number_of_iterations); |
2 |
26 Feb 07 |
jari |
438 |
for (i=0; i<number_of_clusters; i++) { |
2 |
26 Feb 07 |
jari |
439 |
clusters[i].calculateMedian(); //set cluster median |
2 |
26 Feb 07 |
jari |
440 |
} |
2 |
26 Feb 07 |
jari |
441 |
|
2 |
26 Feb 07 |
jari |
442 |
current = 0; |
2 |
26 Feb 07 |
jari |
443 |
int iterations = 0; |
2 |
26 Feb 07 |
jari |
444 |
boolean converged = false; |
2 |
26 Feb 07 |
jari |
445 |
int reallocations = 0; |
2 |
26 Feb 07 |
jari |
446 |
while (counter != number_of_genes*number_of_iterations) { |
2 |
26 Feb 07 |
jari |
447 |
if (stop) { |
2 |
26 Feb 07 |
jari |
448 |
throw new AbortException(); |
2 |
26 Feb 07 |
jari |
449 |
} |
2 |
26 Feb 07 |
jari |
450 |
currentProgress = (int)(counter*Factor); |
2 |
26 Feb 07 |
jari |
451 |
if (currentProgress > oldCurrentProgress) { |
2 |
26 Feb 07 |
jari |
452 |
event.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
453 |
event.setIntValue(currentProgress); |
2 |
26 Feb 07 |
jari |
454 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
455 |
oldCurrentProgress = currentProgress; |
2 |
26 Feb 07 |
jari |
456 |
} |
2 |
26 Feb 07 |
jari |
457 |
for (i=0; i<number_of_clusters; i++) { |
2 |
26 Feb 07 |
jari |
458 |
dissim[i] = ExperimentUtil.geneDistance(expMatrix, clusters[i].getMedian(), current, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
459 |
} |
2 |
26 Feb 07 |
jari |
460 |
address = findNearest(dissim); |
2 |
26 Feb 07 |
jari |
461 |
if (address != location[current]) { |
2 |
26 Feb 07 |
jari |
462 |
reallocations++; |
2 |
26 Feb 07 |
jari |
463 |
clusters[location[current]].remove(elements[current]); |
2 |
26 Feb 07 |
jari |
464 |
clusters[address].add(elements[current]); |
2 |
26 Feb 07 |
jari |
465 |
clusters[location[current]].calculateMedian(); |
2 |
26 Feb 07 |
jari |
466 |
clusters[address].calculateMedian(); |
2 |
26 Feb 07 |
jari |
467 |
location[current]=address; |
2 |
26 Feb 07 |
jari |
468 |
} |
2 |
26 Feb 07 |
jari |
469 |
current++; |
2 |
26 Feb 07 |
jari |
470 |
if (current == number_of_genes) { |
2 |
26 Feb 07 |
jari |
471 |
current = 0; |
2 |
26 Feb 07 |
jari |
472 |
} |
2 |
26 Feb 07 |
jari |
473 |
counter++; |
2 |
26 Feb 07 |
jari |
474 |
if (counter%number_of_genes == 0) { |
2 |
26 Feb 07 |
jari |
475 |
iterations++; |
2 |
26 Feb 07 |
jari |
476 |
if (reallocations == 0) { |
2 |
26 Feb 07 |
jari |
477 |
converged = true; |
2 |
26 Feb 07 |
jari |
478 |
break; |
2 |
26 Feb 07 |
jari |
479 |
} else { |
2 |
26 Feb 07 |
jari |
480 |
event.setId(AlgorithmEvent.MONITOR_VALUE); |
2 |
26 Feb 07 |
jari |
481 |
event.setIntValue(reallocations); |
2 |
26 Feb 07 |
jari |
482 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
483 |
reallocations = 0; |
2 |
26 Feb 07 |
jari |
484 |
} |
2 |
26 Feb 07 |
jari |
485 |
} |
2 |
26 Feb 07 |
jari |
486 |
} |
2 |
26 Feb 07 |
jari |
487 |
|
2 |
26 Feb 07 |
jari |
488 |
event.setId(AlgorithmEvent.MONITOR_VALUE); |
2 |
26 Feb 07 |
jari |
489 |
event.setIntValue(-1); |
2 |
26 Feb 07 |
jari |
490 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
491 |
|
2 |
26 Feb 07 |
jari |
492 |
setIterations(iterations); |
2 |
26 Feb 07 |
jari |
493 |
setConverged(converged); |
2 |
26 Feb 07 |
jari |
494 |
|
2 |
26 Feb 07 |
jari |
495 |
return clusters; |
2 |
26 Feb 07 |
jari |
496 |
} |
2 |
26 Feb 07 |
jari |
497 |
|
2 |
26 Feb 07 |
jari |
498 |
|
2 |
26 Feb 07 |
jari |
499 |
|
2 |
26 Feb 07 |
jari |
500 |
|
2 |
26 Feb 07 |
jari |
501 |
private void setIterations(int iterations) { |
2 |
26 Feb 07 |
jari |
502 |
this.iterations = iterations; |
2 |
26 Feb 07 |
jari |
503 |
} |
2 |
26 Feb 07 |
jari |
504 |
|
2 |
26 Feb 07 |
jari |
505 |
private int getIterations() { |
2 |
26 Feb 07 |
jari |
506 |
return iterations; |
2 |
26 Feb 07 |
jari |
507 |
} |
2 |
26 Feb 07 |
jari |
508 |
|
2 |
26 Feb 07 |
jari |
509 |
private void setConverged(boolean converged) { |
2 |
26 Feb 07 |
jari |
510 |
this.converged = converged; |
2 |
26 Feb 07 |
jari |
511 |
} |
2 |
26 Feb 07 |
jari |
512 |
|
2 |
26 Feb 07 |
jari |
513 |
private boolean getConverged() { |
2 |
26 Feb 07 |
jari |
514 |
return converged; |
2 |
26 Feb 07 |
jari |
515 |
} |
2 |
26 Feb 07 |
jari |
516 |
|
2 |
26 Feb 07 |
jari |
// "nearest" returns address of the nearest cluster |
2 |
26 Feb 07 |
jari |
518 |
private int findNearest(float x[]) { |
2 |
26 Feb 07 |
jari |
519 |
int address = 0; |
2 |
26 Feb 07 |
jari |
520 |
float smallest = x[0]; |
2 |
26 Feb 07 |
jari |
521 |
for (int i=1; i<x.length; i++) { |
2 |
26 Feb 07 |
jari |
522 |
if (x[i] < smallest) { |
2 |
26 Feb 07 |
jari |
523 |
smallest = x[i]; |
2 |
26 Feb 07 |
jari |
524 |
address = i; |
2 |
26 Feb 07 |
jari |
525 |
} |
2 |
26 Feb 07 |
jari |
526 |
} |
2 |
26 Feb 07 |
jari |
527 |
return address; |
2 |
26 Feb 07 |
jari |
528 |
} |
2 |
26 Feb 07 |
jari |
529 |
|
2 |
26 Feb 07 |
jari |
530 |
public void abort() { |
2 |
26 Feb 07 |
jari |
531 |
stop = true; |
2 |
26 Feb 07 |
jari |
532 |
} |
2 |
26 Feb 07 |
jari |
533 |
|
2 |
26 Feb 07 |
jari |
534 |
|
2 |
26 Feb 07 |
jari |
535 |
private class KMCluster extends ArrayList { |
2 |
26 Feb 07 |
jari |
536 |
|
2 |
26 Feb 07 |
jari |
537 |
private FloatMatrix mean = new FloatMatrix(1, number_of_samples); |
2 |
26 Feb 07 |
jari |
538 |
private FloatMatrix median = new FloatMatrix(1, number_of_samples); |
2 |
26 Feb 07 |
jari |
539 |
|
2 |
26 Feb 07 |
jari |
//JB optimize Mean updates |
2 |
26 Feb 07 |
jari |
541 |
private float [] sums = new float[number_of_samples]; |
2 |
26 Feb 07 |
jari |
542 |
private int [] validNList = new int[number_of_samples]; |
2 |
26 Feb 07 |
jari |
543 |
|
2 |
26 Feb 07 |
jari |
544 |
public KMCluster() {} |
2 |
26 Feb 07 |
jari |
545 |
|
2 |
26 Feb 07 |
jari |
546 |
public void calculateMean() { |
2 |
26 Feb 07 |
jari |
547 |
float currentMean; |
2 |
26 Feb 07 |
jari |
548 |
int n = size(); |
2 |
26 Feb 07 |
jari |
549 |
float value; |
2 |
26 Feb 07 |
jari |
550 |
for (int i=0; i<number_of_samples; i++) { |
2 |
26 Feb 07 |
jari |
551 |
currentMean = 0f; |
2 |
26 Feb 07 |
jari |
552 |
validN = 0; |
2 |
26 Feb 07 |
jari |
553 |
for (int j=0; j<n; j++) { |
2 |
26 Feb 07 |
jari |
554 |
value = expMatrix.get(((Float)get(j)).intValue(), i); |
2 |
26 Feb 07 |
jari |
555 |
if (!Float.isNaN(value)) { |
2 |
26 Feb 07 |
jari |
556 |
currentMean += value; |
2 |
26 Feb 07 |
jari |
557 |
validN++; |
2 |
26 Feb 07 |
jari |
558 |
} |
2 |
26 Feb 07 |
jari |
559 |
} |
2 |
26 Feb 07 |
jari |
560 |
|
2 |
26 Feb 07 |
jari |
//jb accounting |
2 |
26 Feb 07 |
jari |
562 |
sums[i] = currentMean; |
2 |
26 Feb 07 |
jari |
563 |
validNList[i] = validN; |
2 |
26 Feb 07 |
jari |
564 |
|
2 |
26 Feb 07 |
jari |
565 |
mean.set(0, i, currentMean/(float)validN); |
2 |
26 Feb 07 |
jari |
566 |
} |
2 |
26 Feb 07 |
jari |
567 |
} |
2 |
26 Feb 07 |
jari |
568 |
|
2 |
26 Feb 07 |
jari |
569 |
public void updateMeanForLoosingCluster(int index) { |
2 |
26 Feb 07 |
jari |
// int dataIndex = (int)(((Float)get(index)).floatValue()); |
2 |
26 Feb 07 |
jari |
571 |
float [] currValues = expMatrix.A[index]; |
2 |
26 Feb 07 |
jari |
572 |
|
2 |
26 Feb 07 |
jari |
//update validNList |
2 |
26 Feb 07 |
jari |
574 |
for(int i = 0; i < number_of_samples; i++) { |
2 |
26 Feb 07 |
jari |
575 |
if(!Float.isNaN(currValues[i])) { |
2 |
26 Feb 07 |
jari |
576 |
validNList[i]--; |
2 |
26 Feb 07 |
jari |
577 |
sums[i] -= currValues[i]; |
2 |
26 Feb 07 |
jari |
578 |
mean.set(0, i, sums[i]/(float)validNList[i]); |
2 |
26 Feb 07 |
jari |
579 |
} |
2 |
26 Feb 07 |
jari |
580 |
} |
2 |
26 Feb 07 |
jari |
581 |
} |
2 |
26 Feb 07 |
jari |
582 |
|
2 |
26 Feb 07 |
jari |
583 |
|
2 |
26 Feb 07 |
jari |
584 |
public void updateMeanForWinningCluster(int index) { |
2 |
26 Feb 07 |
jari |
// int dataIndex = (int)(((Float)get(index)).floatValue()); |
2 |
26 Feb 07 |
jari |
586 |
float [] currValues = expMatrix.A[index]; |
2 |
26 Feb 07 |
jari |
587 |
|
2 |
26 Feb 07 |
jari |
//update validNList |
2 |
26 Feb 07 |
jari |
589 |
for(int i = 0; i < number_of_samples; i++) { |
2 |
26 Feb 07 |
jari |
590 |
if(!Float.isNaN(currValues[i])) { |
2 |
26 Feb 07 |
jari |
591 |
validNList[i]++; |
2 |
26 Feb 07 |
jari |
592 |
sums[i] += currValues[i]; |
2 |
26 Feb 07 |
jari |
593 |
mean.set(0, i, sums[i]/(float)validNList[i]); |
2 |
26 Feb 07 |
jari |
594 |
} |
2 |
26 Feb 07 |
jari |
595 |
} |
2 |
26 Feb 07 |
jari |
596 |
} |
2 |
26 Feb 07 |
jari |
597 |
|
2 |
26 Feb 07 |
jari |
598 |
public FloatMatrix getMean() { |
2 |
26 Feb 07 |
jari |
599 |
return mean; |
2 |
26 Feb 07 |
jari |
600 |
} |
2 |
26 Feb 07 |
jari |
601 |
|
2 |
26 Feb 07 |
jari |
602 |
|
2 |
26 Feb 07 |
jari |
//JCB for K-means |
2 |
26 Feb 07 |
jari |
604 |
private void calculateMedian(){ |
2 |
26 Feb 07 |
jari |
605 |
float currentMedian; |
2 |
26 Feb 07 |
jari |
606 |
float [] values; |
2 |
26 Feb 07 |
jari |
607 |
int numberOfValidValues; |
2 |
26 Feb 07 |
jari |
608 |
|
2 |
26 Feb 07 |
jari |
609 |
for(int i = 0; i < number_of_samples; i++){ |
2 |
26 Feb 07 |
jari |
610 |
values = new float[size()]; |
2 |
26 Feb 07 |
jari |
611 |
numberOfValidValues = getValues(values, i); |
2 |
26 Feb 07 |
jari |
612 |
|
2 |
26 Feb 07 |
jari |
613 |
if(numberOfValidValues == 0) |
2 |
26 Feb 07 |
jari |
614 |
break; |
2 |
26 Feb 07 |
jari |
615 |
|
2 |
26 Feb 07 |
jari |
// median.set(0, i, getElementMedian(values, 0, numberOfValidValues-1, (int)(numberOfValidValues/2.0))); |
2 |
26 Feb 07 |
jari |
617 |
median.set(0, i, computeMedian(values)); |
2 |
26 Feb 07 |
jari |
618 |
} |
2 |
26 Feb 07 |
jari |
619 |
} |
2 |
26 Feb 07 |
jari |
620 |
|
2 |
26 Feb 07 |
jari |
621 |
|
2 |
26 Feb 07 |
jari |
622 |
/* |
2 |
26 Feb 07 |
jari |
* Compute the median value in an array. |
2 |
26 Feb 07 |
jari |
624 |
* |
2 |
26 Feb 07 |
jari |
* Sorts the array as a side effect. |
2 |
26 Feb 07 |
jari |
626 |
* |
2 |
26 Feb 07 |
jari |
* Non-recursive solution |
2 |
26 Feb 07 |
jari |
628 |
*/ |
2 |
26 Feb 07 |
jari |
629 |
private float computeMedian(float[] array) { |
2 |
26 Feb 07 |
jari |
630 |
int numberOfItems; |
2 |
26 Feb 07 |
jari |
631 |
float returnValue; |
2 |
26 Feb 07 |
jari |
632 |
|
2 |
26 Feb 07 |
jari |
// Sort the array. |
2 |
26 Feb 07 |
jari |
634 |
Arrays.sort(array); |
2 |
26 Feb 07 |
jari |
635 |
numberOfItems = array.length; |
2 |
26 Feb 07 |
jari |
636 |
if (numberOfItems % 2 == 1) { |
2 |
26 Feb 07 |
jari |
// If there are an odd number of elements, return the middle one. |
2 |
26 Feb 07 |
jari |
638 |
returnValue = array[numberOfItems/2]; |
2 |
26 Feb 07 |
jari |
639 |
} else { |
2 |
26 Feb 07 |
jari |
// Otherwise, return the average of the two middle ones. |
2 |
26 Feb 07 |
jari |
641 |
returnValue = array[numberOfItems/2 - 1]; |
2 |
26 Feb 07 |
jari |
642 |
returnValue += array[numberOfItems/2]; |
2 |
26 Feb 07 |
jari |
643 |
returnValue /= 2.0; |
2 |
26 Feb 07 |
jari |
644 |
} |
2 |
26 Feb 07 |
jari |
645 |
return returnValue; |
2 |
26 Feb 07 |
jari |
646 |
} |
2 |
26 Feb 07 |
jari |
647 |
|
2 |
26 Feb 07 |
jari |
648 |
/** |
2 |
26 Feb 07 |
jari |
* strips values to use to calculate median |
2 |
26 Feb 07 |
jari |
650 |
*/ |
2 |
26 Feb 07 |
jari |
651 |
private int getValues(float [] values, int sampleIndex){ |
2 |
26 Feb 07 |
jari |
652 |
float currVal; |
2 |
26 Feb 07 |
jari |
653 |
int currIndex = 0; |
2 |
26 Feb 07 |
jari |
654 |
int numberOfValues = 0; |
2 |
26 Feb 07 |
jari |
655 |
|
2 |
26 Feb 07 |
jari |
656 |
|
2 |
26 Feb 07 |
jari |
657 |
for(int i = 0; i < values.length; i++){ |
2 |
26 Feb 07 |
jari |
658 |
currVal = expMatrix.get(((Float)get(i)).intValue(), sampleIndex); |
2 |
26 Feb 07 |
jari |
659 |
if(!Float.isNaN(currVal)){ |
2 |
26 Feb 07 |
jari |
660 |
values[currIndex] = currVal; |
2 |
26 Feb 07 |
jari |
661 |
numberOfValues++; |
2 |
26 Feb 07 |
jari |
662 |
currIndex++; |
2 |
26 Feb 07 |
jari |
663 |
} |
2 |
26 Feb 07 |
jari |
664 |
} |
2 |
26 Feb 07 |
jari |
665 |
return numberOfValues; |
2 |
26 Feb 07 |
jari |
666 |
} |
2 |
26 Feb 07 |
jari |
667 |
|
2 |
26 Feb 07 |
jari |
668 |
|
2 |
26 Feb 07 |
jari |
//JCB recursive 'randomized' method for getting median |
2 |
26 Feb 07 |
jari |
//Uses randomized partition |
2 |
26 Feb 07 |
jari |
671 |
// |
2 |
26 Feb 07 |
jari |
//Used computeMedian(float [] array) since it handles odd and even arrays properly |
2 |
26 Feb 07 |
jari |
673 |
// |
2 |
26 Feb 07 |
jari |
674 |
public float getElementMedian(float [] array, int start, int end, int medNum){ |
2 |
26 Feb 07 |
jari |
675 |
if(start == end) |
2 |
26 Feb 07 |
jari |
676 |
return array[start]; |
2 |
26 Feb 07 |
jari |
677 |
|
2 |
26 Feb 07 |
jari |
678 |
int part = randomPartition(array, start, end); |
2 |
26 Feb 07 |
jari |
679 |
int k = part - start + 1; |
2 |
26 Feb 07 |
jari |
680 |
if(medNum <= k) |
2 |
26 Feb 07 |
jari |
681 |
return getElementMedian(array, start, part, medNum); |
2 |
26 Feb 07 |
jari |
682 |
else |
2 |
26 Feb 07 |
jari |
683 |
return getElementMedian(array, part+1, end, medNum-k); |
2 |
26 Feb 07 |
jari |
684 |
} |
2 |
26 Feb 07 |
jari |
685 |
|
2 |
26 Feb 07 |
jari |
686 |
private int randomPartition(float [] array, int start, int end){ |
2 |
26 Feb 07 |
jari |
687 |
int i = (int)((end - start + 1) * Math.random()) + start; |
2 |
26 Feb 07 |
jari |
688 |
|
2 |
26 Feb 07 |
jari |
// System.out.println(array.length+" "+i+" "+start); |
2 |
26 Feb 07 |
jari |
690 |
swap(array, i, start); |
2 |
26 Feb 07 |
jari |
691 |
|
2 |
26 Feb 07 |
jari |
692 |
return partition(array, start, end); |
2 |
26 Feb 07 |
jari |
693 |
} |
2 |
26 Feb 07 |
jari |
694 |
|
2 |
26 Feb 07 |
jari |
695 |
private void swap(float [] array, int i, int j){ |
2 |
26 Feb 07 |
jari |
696 |
float temp = array[i]; |
2 |
26 Feb 07 |
jari |
697 |
array[i] = array[j]; |
2 |
26 Feb 07 |
jari |
698 |
array[j] = temp; |
2 |
26 Feb 07 |
jari |
699 |
} |
2 |
26 Feb 07 |
jari |
700 |
|
2 |
26 Feb 07 |
jari |
701 |
|
2 |
26 Feb 07 |
jari |
702 |
public int partition(float [] array, int start, int end){ |
2 |
26 Feb 07 |
jari |
703 |
|
2 |
26 Feb 07 |
jari |
//pivot value |
2 |
26 Feb 07 |
jari |
705 |
float x = array[start]; |
2 |
26 Feb 07 |
jari |
706 |
|
2 |
26 Feb 07 |
jari |
707 |
int i = start - 1; |
2 |
26 Feb 07 |
jari |
708 |
int j = end + 1; |
2 |
26 Feb 07 |
jari |
709 |
|
2 |
26 Feb 07 |
jari |
710 |
int currExpIndex = 0; |
2 |
26 Feb 07 |
jari |
711 |
|
2 |
26 Feb 07 |
jari |
712 |
while(true){ |
2 |
26 Feb 07 |
jari |
713 |
do{ |
2 |
26 Feb 07 |
jari |
714 |
j--; |
2 |
26 Feb 07 |
jari |
715 |
}while(array[j] > x); |
2 |
26 Feb 07 |
jari |
716 |
do{ |
2 |
26 Feb 07 |
jari |
717 |
i++; |
2 |
26 Feb 07 |
jari |
718 |
}while(array[i] < x); |
2 |
26 Feb 07 |
jari |
719 |
|
2 |
26 Feb 07 |
jari |
720 |
if( i < j ) |
2 |
26 Feb 07 |
jari |
721 |
swap(array, i, j); |
2 |
26 Feb 07 |
jari |
722 |
else{ |
2 |
26 Feb 07 |
jari |
723 |
return j; |
2 |
26 Feb 07 |
jari |
724 |
} |
2 |
26 Feb 07 |
jari |
725 |
|
2 |
26 Feb 07 |
jari |
726 |
} |
2 |
26 Feb 07 |
jari |
727 |
} |
2 |
26 Feb 07 |
jari |
728 |
|
2 |
26 Feb 07 |
jari |
729 |
|
2 |
26 Feb 07 |
jari |
//JCB for k-means |
2 |
26 Feb 07 |
jari |
731 |
public FloatMatrix getMedian(){ |
2 |
26 Feb 07 |
jari |
732 |
return median; |
2 |
26 Feb 07 |
jari |
733 |
} |
2 |
26 Feb 07 |
jari |
734 |
} |
2 |
26 Feb 07 |
jari |
735 |
|
2 |
26 Feb 07 |
jari |
736 |
|
2 |
26 Feb 07 |
jari |
737 |
|
2 |
26 Feb 07 |
jari |
738 |
private FloatMatrix getMeans(KMCluster[] clusters) { |
2 |
26 Feb 07 |
jari |
739 |
FloatMatrix means = new FloatMatrix(clusters.length, number_of_samples); |
2 |
26 Feb 07 |
jari |
740 |
FloatMatrix mean; |
2 |
26 Feb 07 |
jari |
741 |
for (int i=0; i<clusters.length; i++) { |
2 |
26 Feb 07 |
jari |
742 |
mean = clusters[i].getMean(); |
2 |
26 Feb 07 |
jari |
743 |
means.A[i] = mean.A[0]; |
2 |
26 Feb 07 |
jari |
744 |
} |
2 |
26 Feb 07 |
jari |
745 |
return means; |
2 |
26 Feb 07 |
jari |
746 |
} |
2 |
26 Feb 07 |
jari |
747 |
|
2 |
26 Feb 07 |
jari |
748 |
private FloatMatrix getMedians(KMCluster[] clusters) { |
2 |
26 Feb 07 |
jari |
749 |
FloatMatrix medians = new FloatMatrix(clusters.length, number_of_samples); |
2 |
26 Feb 07 |
jari |
750 |
FloatMatrix median; |
2 |
26 Feb 07 |
jari |
751 |
for (int i=0; i<clusters.length; i++) { |
2 |
26 Feb 07 |
jari |
752 |
median = clusters[i].getMedian(); |
2 |
26 Feb 07 |
jari |
753 |
medians.A[i] = median.A[0]; |
2 |
26 Feb 07 |
jari |
754 |
} |
2 |
26 Feb 07 |
jari |
755 |
return medians; |
2 |
26 Feb 07 |
jari |
756 |
} |
2 |
26 Feb 07 |
jari |
757 |
|
2 |
26 Feb 07 |
jari |
758 |
|
2 |
26 Feb 07 |
jari |
759 |
private FloatMatrix getVariances(KMCluster[] clusters, FloatMatrix means) { |
2 |
26 Feb 07 |
jari |
760 |
final int rows = means.getRowDimension(); |
2 |
26 Feb 07 |
jari |
761 |
final int columns = means.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
762 |
FloatMatrix variances = new FloatMatrix(rows, columns); |
2 |
26 Feb 07 |
jari |
763 |
for (int row=0; row<rows; row++) { |
2 |
26 Feb 07 |
jari |
764 |
for (int column=0; column<columns; column++) { |
2 |
26 Feb 07 |
jari |
765 |
variances.set(row, column, getSampleVariance(clusters[row], column, means.get(row, column))); |
2 |
26 Feb 07 |
jari |
766 |
} |
2 |
26 Feb 07 |
jari |
767 |
} |
2 |
26 Feb 07 |
jari |
768 |
return variances; |
2 |
26 Feb 07 |
jari |
769 |
} |
2 |
26 Feb 07 |
jari |
770 |
|
2 |
26 Feb 07 |
jari |
771 |
private float getSampleNormalizedSum(KMCluster cluster, int column, float mean) { |
2 |
26 Feb 07 |
jari |
772 |
final int size = cluster.size(); |
2 |
26 Feb 07 |
jari |
773 |
float sum = 0f; |
2 |
26 Feb 07 |
jari |
774 |
float value; |
2 |
26 Feb 07 |
jari |
775 |
validN = 0; |
2 |
26 Feb 07 |
jari |
776 |
for (int i=0; i<size; i++) { |
2 |
26 Feb 07 |
jari |
777 |
value = expMatrix.get(((Float)cluster.get(i)).intValue(), column); |
2 |
26 Feb 07 |
jari |
778 |
if (!Float.isNaN(value)) { |
2 |
26 Feb 07 |
jari |
779 |
sum += Math.pow(value-mean, 2); |
2 |
26 Feb 07 |
jari |
780 |
validN++; |
2 |
26 Feb 07 |
jari |
781 |
} |
2 |
26 Feb 07 |
jari |
782 |
} |
2 |
26 Feb 07 |
jari |
783 |
return sum; |
2 |
26 Feb 07 |
jari |
784 |
} |
2 |
26 Feb 07 |
jari |
785 |
|
2 |
26 Feb 07 |
jari |
786 |
private float getSampleVariance(KMCluster cluster, int column, float mean) { |
2 |
26 Feb 07 |
jari |
787 |
if(validN > 1) |
2 |
26 Feb 07 |
jari |
788 |
return(float)Math.sqrt(getSampleNormalizedSum(cluster, column, mean)/(float)(validN-1)); |
2 |
26 Feb 07 |
jari |
789 |
else |
2 |
26 Feb 07 |
jari |
790 |
return 0f; |
2 |
26 Feb 07 |
jari |
791 |
} |
2 |
26 Feb 07 |
jari |
792 |
|
2 |
26 Feb 07 |
jari |
793 |
|
2 |
26 Feb 07 |
jari |
794 |
|
2 |
26 Feb 07 |
jari |
/*private void printClusters(KMCluster[] clusters) { |
2 |
26 Feb 07 |
jari |
for (int i=0; i<clusters.length; i++) { |
2 |
26 Feb 07 |
jari |
clusters[i].getMean().print(5, 2); |
2 |
26 Feb 07 |
jari |
798 |
} |
2 |
26 Feb 07 |
jari |
799 |
}*/ |
2 |
26 Feb 07 |
jari |
800 |
} |