2 |
26 Feb 07 |
jari |
1 |
/* |
2 |
26 Feb 07 |
jari |
Copyright @ 1999-2003, The Institute for Genomic Research (TIGR). |
2 |
26 Feb 07 |
jari |
All rights reserved. |
2 |
26 Feb 07 |
jari |
4 |
*/ |
2 |
26 Feb 07 |
jari |
5 |
/* |
2 |
26 Feb 07 |
jari |
* $RCSfile: KNNClassify.java,v $ |
2 |
26 Feb 07 |
jari |
<<<<<<< KNNClassify.java |
2 |
26 Feb 07 |
jari |
* $Revision: 1.9 $ |
2 |
26 Feb 07 |
jari |
* $Date: 2005/03/10 15:45:20 $ |
2 |
26 Feb 07 |
jari |
* $Author: braistedj $ |
2 |
26 Feb 07 |
jari |
11 |
======= |
2 |
26 Feb 07 |
jari |
* $Revision: 1.9 $ |
2 |
26 Feb 07 |
jari |
* $Date: 2005/03/10 15:45:20 $ |
2 |
26 Feb 07 |
jari |
* $Author: braistedj $ |
2 |
26 Feb 07 |
jari |
>>>>>>> 1.4 |
2 |
26 Feb 07 |
jari |
* $State: Exp $ |
2 |
26 Feb 07 |
jari |
17 |
*/ |
2 |
26 Feb 07 |
jari |
18 |
/* |
2 |
26 Feb 07 |
jari |
* KNNClassify.java |
2 |
26 Feb 07 |
jari |
20 |
* |
2 |
26 Feb 07 |
jari |
* Created on September 2, 2003, 4:37 PM |
2 |
26 Feb 07 |
jari |
22 |
*/ |
2 |
26 Feb 07 |
jari |
23 |
|
2 |
26 Feb 07 |
jari |
24 |
package org.tigr.microarray.mev.cluster.algorithm.impl; |
2 |
26 Feb 07 |
jari |
//THIS IS A TEST COMMENT |
2 |
26 Feb 07 |
jari |
//TEST COMMENT FOR V 1.6 |
2 |
26 Feb 07 |
jari |
27 |
import java.util.Random; |
2 |
26 Feb 07 |
jari |
28 |
import java.util.Vector; |
2 |
26 Feb 07 |
jari |
29 |
|
2 |
26 Feb 07 |
jari |
30 |
import org.tigr.microarray.mev.cluster.Cluster; |
2 |
26 Feb 07 |
jari |
31 |
import org.tigr.microarray.mev.cluster.Node; |
2 |
26 Feb 07 |
jari |
32 |
import org.tigr.microarray.mev.cluster.NodeList; |
2 |
26 Feb 07 |
jari |
33 |
import org.tigr.microarray.mev.cluster.NodeValue; |
2 |
26 Feb 07 |
jari |
34 |
import org.tigr.microarray.mev.cluster.NodeValueList; |
2 |
26 Feb 07 |
jari |
35 |
import org.tigr.microarray.mev.cluster.algorithm.AbortException; |
2 |
26 Feb 07 |
jari |
36 |
import org.tigr.microarray.mev.cluster.algorithm.AbstractAlgorithm; |
2 |
26 Feb 07 |
jari |
37 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmData; |
2 |
26 Feb 07 |
jari |
38 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmEvent; |
2 |
26 Feb 07 |
jari |
39 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmException; |
2 |
26 Feb 07 |
jari |
40 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmParameters; |
2 |
26 Feb 07 |
jari |
41 |
import org.tigr.util.FloatMatrix; |
2 |
26 Feb 07 |
jari |
42 |
import org.tigr.util.QSort; |
2 |
26 Feb 07 |
jari |
43 |
|
2 |
26 Feb 07 |
jari |
44 |
/** |
2 |
26 Feb 07 |
jari |
45 |
* |
2 |
26 Feb 07 |
jari |
* @author nbhagaba |
2 |
26 Feb 07 |
jari |
47 |
*/ |
2 |
26 Feb 07 |
jari |
48 |
public class KNNClassify extends AbstractAlgorithm { |
2 |
26 Feb 07 |
jari |
49 |
|
2 |
26 Feb 07 |
jari |
50 |
private boolean stop = false; |
2 |
26 Feb 07 |
jari |
51 |
private int function; |
2 |
26 Feb 07 |
jari |
52 |
private float factor; |
2 |
26 Feb 07 |
jari |
53 |
private boolean absolute; |
2 |
26 Feb 07 |
jari |
54 |
private FloatMatrix expMatrix; |
2 |
26 Feb 07 |
jari |
55 |
|
2 |
26 Feb 07 |
jari |
56 |
private Vector[] clusters, filteredClassifiersByClass; |
2 |
26 Feb 07 |
jari |
57 |
private int k; // # of clusters |
2 |
26 Feb 07 |
jari |
58 |
|
2 |
26 Feb 07 |
jari |
59 |
private int numRows, numCols; |
2 |
26 Feb 07 |
jari |
60 |
private int usedNumNeibs; |
2 |
26 Feb 07 |
jari |
61 |
|
2 |
26 Feb 07 |
jari |
62 |
private boolean validate, classifyGenes, useVarianceFilter, useCorrelFilter; |
2 |
26 Feb 07 |
jari |
63 |
private int numClasses, numVarFilteredVectors, numNeighbors, numPerms, postVarClassSetSize, postVarDataSetSize, postCorrDataSetSize, origDataSetSize, origClassSetSize; |
2 |
26 Feb 07 |
jari |
64 |
private double correlPValue; |
2 |
26 Feb 07 |
jari |
65 |
private int[] classIndices, classes; |
2 |
26 Feb 07 |
jari |
//private int[][] filteredClassifiersByClass; |
2 |
26 Feb 07 |
jari |
67 |
|
2 |
26 Feb 07 |
jari |
68 |
private Vector rowsInAnalysis, filteredClassifierSet, filteredClasses; |
2 |
26 Feb 07 |
jari |
69 |
|
2 |
26 Feb 07 |
jari |
70 |
private int hcl_function; |
2 |
26 Feb 07 |
jari |
71 |
private boolean hcl_absolute; |
2 |
26 Feb 07 |
jari |
72 |
|
2 |
26 Feb 07 |
jari |
//AlgorithmEvent event, event2; |
2 |
26 Feb 07 |
jari |
74 |
|
2 |
26 Feb 07 |
jari |
/** This method should interrupt the calculation. |
2 |
26 Feb 07 |
jari |
76 |
*/ |
2 |
26 Feb 07 |
jari |
77 |
|
2 |
26 Feb 07 |
jari |
78 |
|
2 |
26 Feb 07 |
jari |
/** This method execute calculation and return result, |
2 |
26 Feb 07 |
jari |
* stored in <code>AlgorithmData</code> class. |
2 |
26 Feb 07 |
jari |
81 |
* |
2 |
26 Feb 07 |
jari |
* @param data the data to be calculated. |
2 |
26 Feb 07 |
jari |
83 |
*/ |
2 |
26 Feb 07 |
jari |
84 |
public AlgorithmData execute(AlgorithmData data) throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
85 |
AlgorithmParameters map = data.getParams(); |
2 |
26 Feb 07 |
jari |
86 |
function = map.getInt("distance-function", EUCLIDEAN); |
2 |
26 Feb 07 |
jari |
87 |
factor = map.getFloat("distance-factor", 1.0f); |
2 |
26 Feb 07 |
jari |
88 |
absolute = map.getBoolean("distance-absolute", false); |
2 |
26 Feb 07 |
jari |
89 |
|
2 |
26 Feb 07 |
jari |
90 |
boolean hierarchical_tree = map.getBoolean("hierarchical-tree", false); |
2 |
26 Feb 07 |
jari |
91 |
int method_linkage = map.getInt("method-linkage", 0); |
2 |
26 Feb 07 |
jari |
92 |
boolean calculate_genes = map.getBoolean("calculate-genes", false); |
2 |
26 Feb 07 |
jari |
93 |
boolean calculate_experiments = map.getBoolean("calculate-experiments", false); |
2 |
26 Feb 07 |
jari |
94 |
|
2 |
26 Feb 07 |
jari |
95 |
hcl_function = map.getInt("hcl-distance-function", EUCLIDEAN); |
2 |
26 Feb 07 |
jari |
96 |
hcl_absolute = map.getBoolean("hcl-distance-absolute", false); |
2 |
26 Feb 07 |
jari |
97 |
|
2 |
26 Feb 07 |
jari |
98 |
this.expMatrix = data.getMatrix("experiment"); |
2 |
26 Feb 07 |
jari |
99 |
|
2 |
26 Feb 07 |
jari |
100 |
numRows = this.expMatrix.getRowDimension(); |
2 |
26 Feb 07 |
jari |
101 |
numCols = this.expMatrix.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
102 |
|
2 |
26 Feb 07 |
jari |
103 |
validate = map.getBoolean("validate", false); |
2 |
26 Feb 07 |
jari |
104 |
|
2 |
26 Feb 07 |
jari |
105 |
if (!validate) { |
2 |
26 Feb 07 |
jari |
106 |
classifyGenes = map.getBoolean("classifyGenes", true); |
2 |
26 Feb 07 |
jari |
107 |
useVarianceFilter = map.getBoolean("useVarianceFilter", false); |
2 |
26 Feb 07 |
jari |
108 |
useCorrelFilter = map.getBoolean("useCorrelFilter", false); |
2 |
26 Feb 07 |
jari |
109 |
if (useCorrelFilter) { |
2 |
26 Feb 07 |
jari |
110 |
correlPValue = map.getFloat("correlPValue", 0.01f); |
2 |
26 Feb 07 |
jari |
111 |
numPerms = map.getInt("numPerms", 1000); |
2 |
26 Feb 07 |
jari |
112 |
} |
2 |
26 Feb 07 |
jari |
113 |
numClasses = map.getInt("numClasses", 5); |
2 |
26 Feb 07 |
jari |
114 |
numNeighbors = map.getInt("numNeighbors", 3); |
2 |
26 Feb 07 |
jari |
115 |
classIndices = data.getIntArray("classIndices"); |
2 |
26 Feb 07 |
jari |
116 |
classes = data.getIntArray("classes"); |
2 |
26 Feb 07 |
jari |
117 |
numVarFilteredVectors = map.getInt("numVarFilteredVectors", numRows); |
2 |
26 Feb 07 |
jari |
118 |
/* |
2 |
26 Feb 07 |
jari |
if (classifyGenes) { |
2 |
26 Feb 07 |
jari |
if (useVarianceFilter) { |
2 |
26 Feb 07 |
jari |
numVarFilteredVectors = map.getInt("numVarFilteredVectors", numGenes); |
2 |
26 Feb 07 |
jari |
122 |
} |
2 |
26 Feb 07 |
jari |
} else {// if (!classifyGenes) |
2 |
26 Feb 07 |
jari |
if (useVarianceFilter) { |
2 |
26 Feb 07 |
jari |
numVarFilteredVectors = map.getInt("numVarFilteredVectors", numExps); |
2 |
26 Feb 07 |
jari |
126 |
} |
2 |
26 Feb 07 |
jari |
127 |
} |
2 |
26 Feb 07 |
jari |
128 |
*/ |
2 |
26 Feb 07 |
jari |
129 |
|
2 |
26 Feb 07 |
jari |
130 |
AlgorithmEvent event = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, numRows); |
2 |
26 Feb 07 |
jari |
131 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
132 |
event.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
133 |
|
2 |
26 Feb 07 |
jari |
134 |
rowsInAnalysis = new Vector(); |
2 |
26 Feb 07 |
jari |
135 |
for (int i = 0; i < numRows; i++) { |
2 |
26 Feb 07 |
jari |
136 |
rowsInAnalysis.add(new Integer(i)); |
2 |
26 Feb 07 |
jari |
137 |
} |
2 |
26 Feb 07 |
jari |
138 |
|
2 |
26 Feb 07 |
jari |
139 |
filteredClassifierSet = new Vector(); |
2 |
26 Feb 07 |
jari |
140 |
filteredClasses = new Vector(); |
2 |
26 Feb 07 |
jari |
141 |
for (int i = 0; i < classIndices.length; i++) { |
2 |
26 Feb 07 |
jari |
142 |
filteredClassifierSet.add(new Integer(classIndices[i])); |
2 |
26 Feb 07 |
jari |
143 |
filteredClasses.add(new Integer(classes[i])); |
2 |
26 Feb 07 |
jari |
144 |
} |
2 |
26 Feb 07 |
jari |
145 |
|
2 |
26 Feb 07 |
jari |
146 |
/* |
2 |
26 Feb 07 |
jari |
System.out.println("Before variance filter:"); |
2 |
26 Feb 07 |
jari |
System.out.println("rowsInAnalysis.size() = " + rowsInAnalysis.size() + ", filteredClassifierSet.size() = " + filteredClassifierSet.size()); |
2 |
26 Feb 07 |
jari |
149 |
*/ |
2 |
26 Feb 07 |
jari |
150 |
|
2 |
26 Feb 07 |
jari |
151 |
origDataSetSize = rowsInAnalysis.size(); |
2 |
26 Feb 07 |
jari |
152 |
origClassSetSize = filteredClassifierSet.size(); |
2 |
26 Feb 07 |
jari |
153 |
|
2 |
26 Feb 07 |
jari |
154 |
if (useVarianceFilter) { |
2 |
26 Feb 07 |
jari |
155 |
double[] variances = new double[rowsInAnalysis.size()]; |
2 |
26 Feb 07 |
jari |
156 |
|
2 |
26 Feb 07 |
jari |
157 |
for (int i = 0; i < variances.length; i++) { |
2 |
26 Feb 07 |
jari |
158 |
if (stop) { |
2 |
26 Feb 07 |
jari |
159 |
throw new AbortException(); |
2 |
26 Feb 07 |
jari |
160 |
} |
2 |
26 Feb 07 |
jari |
161 |
event.setIntValue(i); |
2 |
26 Feb 07 |
jari |
162 |
event.setDescription("Calculating variance of element = " + (i + 1)); |
2 |
26 Feb 07 |
jari |
163 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
//System.out.println("Calculating variance of gene = " + (i + 1)); |
2 |
26 Feb 07 |
jari |
165 |
variances[i] = getVar(i); |
2 |
26 Feb 07 |
jari |
166 |
} |
2 |
26 Feb 07 |
jari |
167 |
|
2 |
26 Feb 07 |
jari |
168 |
QSort sortVariances = new QSort(variances); |
2 |
26 Feb 07 |
jari |
169 |
int[] sortedIndices = sortVariances.getOrigIndx(); |
2 |
26 Feb 07 |
jari |
170 |
int[] sortDesc = reverse(sortedIndices); |
2 |
26 Feb 07 |
jari |
171 |
|
2 |
26 Feb 07 |
jari |
172 |
rowsInAnalysis = new Vector(); |
2 |
26 Feb 07 |
jari |
173 |
for (int i = 0; i < numVarFilteredVectors; i++) { |
2 |
26 Feb 07 |
jari |
174 |
rowsInAnalysis.add(new Integer(sortDesc[i])); |
2 |
26 Feb 07 |
jari |
175 |
} |
2 |
26 Feb 07 |
jari |
176 |
|
2 |
26 Feb 07 |
jari |
177 |
filteredClassifierSet = new Vector(); |
2 |
26 Feb 07 |
jari |
178 |
filteredClasses = new Vector(); |
2 |
26 Feb 07 |
jari |
179 |
|
2 |
26 Feb 07 |
jari |
180 |
for (int i = 0; i < classIndices.length; i++) { |
2 |
26 Feb 07 |
jari |
181 |
if (isFoundInVector(classIndices[i], rowsInAnalysis)) { |
2 |
26 Feb 07 |
jari |
182 |
filteredClassifierSet.add(new Integer(classIndices[i])); |
2 |
26 Feb 07 |
jari |
183 |
filteredClasses.add(new Integer(classes[i])); |
2 |
26 Feb 07 |
jari |
//rowsInAnalysis.remove(new Integer(classIndices[i])); |
2 |
26 Feb 07 |
jari |
185 |
} |
2 |
26 Feb 07 |
jari |
186 |
} |
2 |
26 Feb 07 |
jari |
187 |
|
2 |
26 Feb 07 |
jari |
188 |
} |
2 |
26 Feb 07 |
jari |
189 |
|
2 |
26 Feb 07 |
jari |
190 |
for (int i = 0; i < classIndices.length; i++) { |
2 |
26 Feb 07 |
jari |
191 |
if (isFoundInVector(classIndices[i], rowsInAnalysis)) { |
2 |
26 Feb 07 |
jari |
192 |
rowsInAnalysis.remove(new Integer(classIndices[i])); |
2 |
26 Feb 07 |
jari |
193 |
} |
2 |
26 Feb 07 |
jari |
194 |
} |
2 |
26 Feb 07 |
jari |
195 |
|
2 |
26 Feb 07 |
jari |
196 |
/* |
2 |
26 Feb 07 |
jari |
System.out.println("After variance filter: "); |
2 |
26 Feb 07 |
jari |
System.out.println("rowsInAnalysis.size() = " + rowsInAnalysis.size() + ", filteredClassifierSet.size() = " + filteredClassifierSet.size()); |
2 |
26 Feb 07 |
jari |
199 |
*/ |
2 |
26 Feb 07 |
jari |
200 |
postVarDataSetSize = rowsInAnalysis.size(); |
2 |
26 Feb 07 |
jari |
201 |
postVarClassSetSize = filteredClassifierSet.size(); |
2 |
26 Feb 07 |
jari |
202 |
|
2 |
26 Feb 07 |
jari |
203 |
AlgorithmEvent event2 = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, rowsInAnalysis.size()); |
2 |
26 Feb 07 |
jari |
204 |
fireValueChanged(event2); |
2 |
26 Feb 07 |
jari |
205 |
event2.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
206 |
|
2 |
26 Feb 07 |
jari |
207 |
if (useCorrelFilter) { |
2 |
26 Feb 07 |
jari |
208 |
Vector correlFilteredRows = new Vector(); |
2 |
26 Feb 07 |
jari |
209 |
for (int i = 0; i < rowsInAnalysis.size(); i++) { |
2 |
26 Feb 07 |
jari |
210 |
if (stop) { |
2 |
26 Feb 07 |
jari |
211 |
throw new AbortException(); |
2 |
26 Feb 07 |
jari |
212 |
} |
2 |
26 Feb 07 |
jari |
213 |
event2.setIntValue(i); |
2 |
26 Feb 07 |
jari |
214 |
event2.setDescription("Applying correlation filter: element " + (i + 1)); |
2 |
26 Feb 07 |
jari |
215 |
fireValueChanged(event2); |
2 |
26 Feb 07 |
jari |
216 |
int currentRow = ((Integer)(rowsInAnalysis.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
217 |
Random rnd1 = new Random(); |
2 |
26 Feb 07 |
jari |
218 |
if (passesCorrelationFilter(currentRow, filteredClassifierSet, correlPValue, numPerms, rnd1.nextLong()) ) { |
2 |
26 Feb 07 |
jari |
219 |
correlFilteredRows.add(new Integer(currentRow)); |
2 |
26 Feb 07 |
jari |
220 |
} |
2 |
26 Feb 07 |
jari |
221 |
} |
2 |
26 Feb 07 |
jari |
222 |
|
2 |
26 Feb 07 |
jari |
223 |
rowsInAnalysis = new Vector(); |
2 |
26 Feb 07 |
jari |
224 |
|
2 |
26 Feb 07 |
jari |
225 |
for (int i = 0; i < correlFilteredRows.size(); i++) { |
2 |
26 Feb 07 |
jari |
226 |
rowsInAnalysis.add((Integer)(correlFilteredRows.get(i))); |
2 |
26 Feb 07 |
jari |
227 |
} |
2 |
26 Feb 07 |
jari |
228 |
} |
2 |
26 Feb 07 |
jari |
229 |
|
2 |
26 Feb 07 |
jari |
//System.out.println("After correlation filter: "); |
2 |
26 Feb 07 |
jari |
//System.out.println("rowsInAnalysis.size() = " + rowsInAnalysis.size() + ", filteredClassifierSet.size() = " + filteredClassifierSet.size()); |
2 |
26 Feb 07 |
jari |
232 |
|
2 |
26 Feb 07 |
jari |
233 |
postCorrDataSetSize = rowsInAnalysis.size(); |
2 |
26 Feb 07 |
jari |
234 |
|
2 |
26 Feb 07 |
jari |
235 |
Vector[] classSets = new Vector[numClasses + 1]; // classSets[0] contains unclassified elements |
2 |
26 Feb 07 |
jari |
236 |
for (int i = 0; i < classSets.length; i++) { |
2 |
26 Feb 07 |
jari |
237 |
classSets[i] = new Vector(); |
2 |
26 Feb 07 |
jari |
238 |
} |
2 |
26 Feb 07 |
jari |
239 |
|
2 |
26 Feb 07 |
jari |
//filteredClassifiersByClass = new int[numClasses + 1][]; // first element stays empty; the remaining elements contain the memebers of the training set grouped by class |
2 |
26 Feb 07 |
jari |
241 |
filteredClassifiersByClass = new Vector[numClasses + 1]; // first element stays empty; the remaining elements contain the members of the training set grouped by class |
2 |
26 Feb 07 |
jari |
242 |
for (int i = 0; i < numClasses + 1; i++) { |
2 |
26 Feb 07 |
jari |
243 |
filteredClassifiersByClass[i] = new Vector(); |
2 |
26 Feb 07 |
jari |
244 |
} |
2 |
26 Feb 07 |
jari |
245 |
for (int i = 0; i < filteredClassifierSet.size(); i++) { |
2 |
26 Feb 07 |
jari |
246 |
int currClassifier = ((Integer)(filteredClassifierSet.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
247 |
int currClass = ((Integer)(filteredClasses.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
248 |
filteredClassifiersByClass[currClass].add(new Integer(currClassifier)); |
2 |
26 Feb 07 |
jari |
249 |
} |
2 |
26 Feb 07 |
jari |
250 |
|
2 |
26 Feb 07 |
jari |
251 |
|
2 |
26 Feb 07 |
jari |
252 |
for (int i = 0; i < rowsInAnalysis.size(); i++) { |
2 |
26 Feb 07 |
jari |
253 |
int currRow = ((Integer)(rowsInAnalysis.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
254 |
int currClass = getClassification(currRow, numNeighbors); |
2 |
26 Feb 07 |
jari |
255 |
classSets[currClass].add(new Integer(currRow)); |
2 |
26 Feb 07 |
jari |
256 |
|
2 |
26 Feb 07 |
jari |
257 |
} |
2 |
26 Feb 07 |
jari |
258 |
|
2 |
26 Feb 07 |
jari |
259 |
for (int i = 0; i < numRows; i++) { |
2 |
26 Feb 07 |
jari |
260 |
if( !(rowsInAnalysis.contains(new Integer(i))) && !(filteredClassifierSet.contains(new Integer(i))) ) { |
2 |
26 Feb 07 |
jari |
261 |
classSets[0].add(new Integer(i)); |
2 |
26 Feb 07 |
jari |
262 |
} |
2 |
26 Feb 07 |
jari |
263 |
} |
2 |
26 Feb 07 |
jari |
264 |
|
2 |
26 Feb 07 |
jari |
265 |
Vector[] unusedClassifiers = new Vector[numClasses + 1]; |
2 |
26 Feb 07 |
jari |
266 |
Vector[] usedClassifiers = new Vector[numClasses + 1]; |
2 |
26 Feb 07 |
jari |
267 |
for (int i = 1; i < unusedClassifiers.length; i++) { |
2 |
26 Feb 07 |
jari |
268 |
unusedClassifiers[i] = new Vector(); |
2 |
26 Feb 07 |
jari |
269 |
usedClassifiers[i] = new Vector(); |
2 |
26 Feb 07 |
jari |
270 |
} |
2 |
26 Feb 07 |
jari |
271 |
|
2 |
26 Feb 07 |
jari |
//DONE UP TO HERE 10/01/03. NEED TO EXTRACT UNUSED CLASSIFIERS. |
2 |
26 Feb 07 |
jari |
273 |
|
2 |
26 Feb 07 |
jari |
274 |
for (int i = 0; i < classIndices.length; i++) { |
2 |
26 Feb 07 |
jari |
275 |
if (!isFoundInVector(classIndices[i], filteredClassifierSet)) { |
2 |
26 Feb 07 |
jari |
276 |
unusedClassifiers[classes[i]].add(new Integer(classIndices[i])); |
2 |
26 Feb 07 |
jari |
277 |
} else { |
2 |
26 Feb 07 |
jari |
278 |
usedClassifiers[classes[i]].add(new Integer(classIndices[i])); |
2 |
26 Feb 07 |
jari |
279 |
} |
2 |
26 Feb 07 |
jari |
280 |
} |
2 |
26 Feb 07 |
jari |
281 |
|
2 |
26 Feb 07 |
jari |
282 |
Vector[] usedPlusClassified = new Vector[numClasses + 1]; |
2 |
26 Feb 07 |
jari |
283 |
|
2 |
26 Feb 07 |
jari |
284 |
for (int i = 1; i < usedPlusClassified.length; i++) { |
2 |
26 Feb 07 |
jari |
285 |
usedPlusClassified[i] = new Vector(); |
2 |
26 Feb 07 |
jari |
286 |
for (int j = 0; j < usedClassifiers[i].size(); j++) { |
2 |
26 Feb 07 |
jari |
287 |
usedPlusClassified[i].add( (Integer)(usedClassifiers[i].get(j)) ); |
2 |
26 Feb 07 |
jari |
288 |
} |
2 |
26 Feb 07 |
jari |
289 |
for (int j = 0; j < classSets[i].size(); j++) { |
2 |
26 Feb 07 |
jari |
290 |
usedPlusClassified[i].add( (Integer)(classSets[i].get(j)) ); |
2 |
26 Feb 07 |
jari |
291 |
} |
2 |
26 Feb 07 |
jari |
292 |
} |
2 |
26 Feb 07 |
jari |
293 |
|
2 |
26 Feb 07 |
jari |
294 |
clusters = new Vector[numClasses*4 + 1]; |
2 |
26 Feb 07 |
jari |
295 |
|
2 |
26 Feb 07 |
jari |
296 |
for (int i = 1; i <= numClasses; i++) { |
2 |
26 Feb 07 |
jari |
297 |
clusters[i - 1] = usedClassifiers[i]; |
2 |
26 Feb 07 |
jari |
298 |
clusters[i - 1 + numClasses] = unusedClassifiers[i]; |
2 |
26 Feb 07 |
jari |
299 |
clusters[i - 1 + 2*numClasses] = classSets[i]; |
2 |
26 Feb 07 |
jari |
300 |
clusters[i - 1 + 3*numClasses] = usedPlusClassified[i]; |
2 |
26 Feb 07 |
jari |
301 |
} |
2 |
26 Feb 07 |
jari |
302 |
|
2 |
26 Feb 07 |
jari |
303 |
clusters[numClasses*4] = classSets[0]; |
2 |
26 Feb 07 |
jari |
304 |
|
2 |
26 Feb 07 |
jari |
305 |
int[][] crossValidationStats = getCrossValidationStats(); |
2 |
26 Feb 07 |
jari |
306 |
int[] numberCorrectlyClassifiedByClass = crossValidationStats[0]; |
2 |
26 Feb 07 |
jari |
307 |
int[] numberIncorrectlyClassifiedByClass = crossValidationStats[1]; |
2 |
26 Feb 07 |
jari |
308 |
int[] origNumInFiltTrgSetByClass = new int[numClasses + 1]; |
2 |
26 Feb 07 |
jari |
309 |
|
2 |
26 Feb 07 |
jari |
310 |
for (int i = 0; i < origNumInFiltTrgSetByClass.length; i++) { |
2 |
26 Feb 07 |
jari |
311 |
origNumInFiltTrgSetByClass[i] = filteredClassifiersByClass[i].size(); |
2 |
26 Feb 07 |
jari |
312 |
} |
2 |
26 Feb 07 |
jari |
313 |
/* |
2 |
26 Feb 07 |
jari |
for (int i = 0; i < clusters.length; i++) { |
2 |
26 Feb 07 |
jari |
System.out.println("clusters[" + i + "].size() = " + clusters[i].size()); |
2 |
26 Feb 07 |
jari |
316 |
} |
2 |
26 Feb 07 |
jari |
317 |
*/ |
2 |
26 Feb 07 |
jari |
318 |
|
2 |
26 Feb 07 |
jari |
// for each class, report 1) used classifiers, 2) classified vectors only (no classifiers) 3) classifiers + classified, and 4) unused classisiers. |
2 |
26 Feb 07 |
jari |
// The last cluster is all the unassigned genes |
2 |
26 Feb 07 |
jari |
321 |
|
2 |
26 Feb 07 |
jari |
322 |
FloatMatrix means = getMeans(clusters); |
2 |
26 Feb 07 |
jari |
323 |
FloatMatrix variances = getVariances(clusters, means); |
2 |
26 Feb 07 |
jari |
324 |
|
2 |
26 Feb 07 |
jari |
325 |
AlgorithmEvent event3 = null; |
2 |
26 Feb 07 |
jari |
326 |
if (hierarchical_tree) { |
2 |
26 Feb 07 |
jari |
327 |
event3 = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, clusters.length, "Calculate Hierarchical Trees"); |
2 |
26 Feb 07 |
jari |
328 |
fireValueChanged(event3); |
2 |
26 Feb 07 |
jari |
329 |
event3.setIntValue(0); |
2 |
26 Feb 07 |
jari |
330 |
event3.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
331 |
fireValueChanged(event3); |
2 |
26 Feb 07 |
jari |
332 |
} |
2 |
26 Feb 07 |
jari |
333 |
|
2 |
26 Feb 07 |
jari |
334 |
Cluster result_cluster = new Cluster(); |
2 |
26 Feb 07 |
jari |
335 |
NodeList nodeList = result_cluster.getNodeList(); |
2 |
26 Feb 07 |
jari |
336 |
int[] features; |
2 |
26 Feb 07 |
jari |
337 |
for (int i=0; i<clusters.length; i++) { |
2 |
26 Feb 07 |
jari |
338 |
if (stop) { |
2 |
26 Feb 07 |
jari |
339 |
throw new AbortException(); |
2 |
26 Feb 07 |
jari |
340 |
} |
2 |
26 Feb 07 |
jari |
341 |
features = convert2int(clusters[i]); |
2 |
26 Feb 07 |
jari |
342 |
Node node = new Node(features); |
2 |
26 Feb 07 |
jari |
343 |
nodeList.addNode(node); |
2 |
26 Feb 07 |
jari |
344 |
if (hierarchical_tree) { |
2 |
26 Feb 07 |
jari |
345 |
node.setValues(calculateHierarchicalTree(features, method_linkage, calculate_genes, calculate_experiments)); |
2 |
26 Feb 07 |
jari |
346 |
event.setIntValue(i+1); |
2 |
26 Feb 07 |
jari |
347 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
348 |
} |
2 |
26 Feb 07 |
jari |
349 |
} |
2 |
26 Feb 07 |
jari |
350 |
|
2 |
26 Feb 07 |
jari |
// prepare the result |
2 |
26 Feb 07 |
jari |
352 |
AlgorithmData result = new AlgorithmData(); |
2 |
26 Feb 07 |
jari |
353 |
result.addParam("usedNumNeibs", String.valueOf(usedNumNeibs)); |
2 |
26 Feb 07 |
jari |
354 |
result.addParam("origDataSetSize", String.valueOf(origDataSetSize)); |
2 |
26 Feb 07 |
jari |
355 |
result.addParam("origClassSetSize", String.valueOf(origClassSetSize)); |
2 |
26 Feb 07 |
jari |
356 |
if (this.useVarianceFilter) { |
2 |
26 Feb 07 |
jari |
357 |
result.addParam("postVarDataSetSize", String.valueOf(postVarDataSetSize)); |
2 |
26 Feb 07 |
jari |
358 |
result.addParam("postVarClassSetSize", String.valueOf(postVarClassSetSize)); |
2 |
26 Feb 07 |
jari |
359 |
} |
2 |
26 Feb 07 |
jari |
360 |
if (this.useCorrelFilter) { |
2 |
26 Feb 07 |
jari |
361 |
result.addParam("postCorrDataSetSize", String.valueOf(postCorrDataSetSize)); |
2 |
26 Feb 07 |
jari |
362 |
} |
2 |
26 Feb 07 |
jari |
363 |
result.addIntArray("origNumInFiltTrgSetByClass", origNumInFiltTrgSetByClass); |
2 |
26 Feb 07 |
jari |
364 |
result.addIntArray("numberCorrectlyClassifiedByClass", numberCorrectlyClassifiedByClass); |
2 |
26 Feb 07 |
jari |
365 |
result.addIntArray("numberIncorrectlyClassifiedByClass", numberIncorrectlyClassifiedByClass); |
2 |
26 Feb 07 |
jari |
366 |
result.addCluster("cluster", result_cluster); |
2 |
26 Feb 07 |
jari |
367 |
result.addMatrix("clusters_means", means); |
2 |
26 Feb 07 |
jari |
368 |
result.addMatrix("clusters_variances", variances); |
2 |
26 Feb 07 |
jari |
369 |
return result; |
2 |
26 Feb 07 |
jari |
370 |
|
2 |
26 Feb 07 |
jari |
371 |
} else {// if (validate) |
2 |
26 Feb 07 |
jari |
372 |
classifyGenes = map.getBoolean("classifyGenes", true); |
2 |
26 Feb 07 |
jari |
373 |
useCorrelFilter = map.getBoolean("useCorrelFilter", false); |
2 |
26 Feb 07 |
jari |
374 |
if (useCorrelFilter) { |
2 |
26 Feb 07 |
jari |
375 |
correlPValue = map.getFloat("correlPValue", 0.01f); |
2 |
26 Feb 07 |
jari |
376 |
numPerms = map.getInt("numPerms", 1000); |
2 |
26 Feb 07 |
jari |
377 |
} |
2 |
26 Feb 07 |
jari |
378 |
numClasses = map.getInt("numClasses", 5); |
2 |
26 Feb 07 |
jari |
379 |
numNeighbors = map.getInt("numNeighbors", 3); |
2 |
26 Feb 07 |
jari |
380 |
classIndices = data.getIntArray("classIndices"); |
2 |
26 Feb 07 |
jari |
381 |
classes = data.getIntArray("classes"); |
2 |
26 Feb 07 |
jari |
382 |
|
2 |
26 Feb 07 |
jari |
383 |
filteredClassifierSet = new Vector(); |
2 |
26 Feb 07 |
jari |
384 |
filteredClasses = new Vector(); |
2 |
26 Feb 07 |
jari |
385 |
for (int i = 0; i < classIndices.length; i++) { |
2 |
26 Feb 07 |
jari |
386 |
filteredClassifierSet.add(new Integer(classIndices[i])); |
2 |
26 Feb 07 |
jari |
387 |
filteredClasses.add(new Integer(classes[i])); |
2 |
26 Feb 07 |
jari |
388 |
} |
2 |
26 Feb 07 |
jari |
389 |
|
2 |
26 Feb 07 |
jari |
390 |
/* |
2 |
26 Feb 07 |
jari |
System.out.println("Before variance filter:"); |
2 |
26 Feb 07 |
jari |
System.out.println("rowsInAnalysis.size() = " + rowsInAnalysis.size() + ", filteredClassifierSet.size() = " + filteredClassifierSet.size()); |
2 |
26 Feb 07 |
jari |
393 |
*/ |
2 |
26 Feb 07 |
jari |
394 |
|
2 |
26 Feb 07 |
jari |
//origDataSetSize = rowsInAnalysis.size(); |
2 |
26 Feb 07 |
jari |
396 |
origClassSetSize = filteredClassifierSet.size(); |
2 |
26 Feb 07 |
jari |
397 |
|
2 |
26 Feb 07 |
jari |
398 |
filteredClassifiersByClass = new Vector[numClasses + 1]; // first element stays empty; the remaining elements contain the members of the training set grouped by class |
2 |
26 Feb 07 |
jari |
399 |
for (int i = 0; i < numClasses + 1; i++) { |
2 |
26 Feb 07 |
jari |
400 |
filteredClassifiersByClass[i] = new Vector(); |
2 |
26 Feb 07 |
jari |
401 |
} |
2 |
26 Feb 07 |
jari |
402 |
for (int i = 0; i < filteredClassifierSet.size(); i++) { |
2 |
26 Feb 07 |
jari |
403 |
int currClassifier = ((Integer)(filteredClassifierSet.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
404 |
int currClass = ((Integer)(filteredClasses.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
405 |
filteredClassifiersByClass[currClass].add(new Integer(currClassifier)); |
2 |
26 Feb 07 |
jari |
406 |
} |
2 |
26 Feb 07 |
jari |
407 |
|
2 |
26 Feb 07 |
jari |
408 |
int[][] crossValidationStats = getCrossValidationStats(); |
2 |
26 Feb 07 |
jari |
409 |
int[] numberCorrectlyClassifiedByClass = crossValidationStats[0]; |
2 |
26 Feb 07 |
jari |
410 |
int[] numberIncorrectlyClassifiedByClass = crossValidationStats[1]; |
2 |
26 Feb 07 |
jari |
411 |
int[] origNumInFiltTrgSetByClass = new int[numClasses + 1]; |
2 |
26 Feb 07 |
jari |
412 |
|
2 |
26 Feb 07 |
jari |
413 |
for (int i = 0; i < origNumInFiltTrgSetByClass.length; i++) { |
2 |
26 Feb 07 |
jari |
414 |
origNumInFiltTrgSetByClass[i] = filteredClassifiersByClass[i].size(); |
2 |
26 Feb 07 |
jari |
415 |
} |
2 |
26 Feb 07 |
jari |
416 |
|
2 |
26 Feb 07 |
jari |
417 |
Vector nonTrainingRows = new Vector(); |
2 |
26 Feb 07 |
jari |
418 |
for (int i = 0; i < numRows; i++) { |
2 |
26 Feb 07 |
jari |
419 |
nonTrainingRows.add(new Integer(i)); |
2 |
26 Feb 07 |
jari |
420 |
} |
2 |
26 Feb 07 |
jari |
421 |
nonTrainingRows.removeAll(filteredClassifierSet); |
2 |
26 Feb 07 |
jari |
422 |
|
2 |
26 Feb 07 |
jari |
423 |
clusters = new Vector[numClasses + 1]; |
2 |
26 Feb 07 |
jari |
424 |
clusters[0] = nonTrainingRows; |
2 |
26 Feb 07 |
jari |
425 |
for (int i = 1; i < numClasses + 1; i++) { |
2 |
26 Feb 07 |
jari |
426 |
clusters[i] = filteredClassifiersByClass[i]; |
2 |
26 Feb 07 |
jari |
427 |
} |
2 |
26 Feb 07 |
jari |
428 |
|
2 |
26 Feb 07 |
jari |
429 |
FloatMatrix means = getMeans(clusters); |
2 |
26 Feb 07 |
jari |
430 |
FloatMatrix variances = getVariances(clusters, means); |
2 |
26 Feb 07 |
jari |
431 |
|
2 |
26 Feb 07 |
jari |
432 |
AlgorithmEvent event3 = null; |
2 |
26 Feb 07 |
jari |
433 |
if (hierarchical_tree) { |
2 |
26 Feb 07 |
jari |
434 |
event3 = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, clusters.length, "Calculate Hierarchical Trees"); |
2 |
26 Feb 07 |
jari |
435 |
fireValueChanged(event3); |
2 |
26 Feb 07 |
jari |
436 |
event3.setIntValue(0); |
2 |
26 Feb 07 |
jari |
437 |
event3.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
438 |
fireValueChanged(event3); |
2 |
26 Feb 07 |
jari |
439 |
} |
2 |
26 Feb 07 |
jari |
440 |
|
2 |
26 Feb 07 |
jari |
441 |
AlgorithmEvent event = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, numRows); |
2 |
26 Feb 07 |
jari |
442 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
443 |
event.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
444 |
|
2 |
26 Feb 07 |
jari |
445 |
Cluster result_cluster = new Cluster(); |
2 |
26 Feb 07 |
jari |
446 |
NodeList nodeList = result_cluster.getNodeList(); |
2 |
26 Feb 07 |
jari |
447 |
int[] features; |
2 |
26 Feb 07 |
jari |
448 |
for (int i=0; i<clusters.length; i++) { |
2 |
26 Feb 07 |
jari |
449 |
if (stop) { |
2 |
26 Feb 07 |
jari |
450 |
throw new AbortException(); |
2 |
26 Feb 07 |
jari |
451 |
} |
2 |
26 Feb 07 |
jari |
452 |
features = convert2int(clusters[i]); |
2 |
26 Feb 07 |
jari |
453 |
Node node = new Node(features); |
2 |
26 Feb 07 |
jari |
454 |
nodeList.addNode(node); |
2 |
26 Feb 07 |
jari |
455 |
if (hierarchical_tree) { |
2 |
26 Feb 07 |
jari |
456 |
node.setValues(calculateHierarchicalTree(features, method_linkage, calculate_genes, calculate_experiments)); |
2 |
26 Feb 07 |
jari |
457 |
event.setIntValue(i+1); |
2 |
26 Feb 07 |
jari |
458 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
459 |
} |
2 |
26 Feb 07 |
jari |
460 |
} |
2 |
26 Feb 07 |
jari |
461 |
|
2 |
26 Feb 07 |
jari |
// prepare the result |
2 |
26 Feb 07 |
jari |
463 |
AlgorithmData result = new AlgorithmData(); |
2 |
26 Feb 07 |
jari |
464 |
result.addParam("usedNumNeibs", String.valueOf(usedNumNeibs)); |
2 |
26 Feb 07 |
jari |
465 |
|
2 |
26 Feb 07 |
jari |
466 |
result.addIntArray("origNumInFiltTrgSetByClass", origNumInFiltTrgSetByClass); |
2 |
26 Feb 07 |
jari |
467 |
result.addIntArray("numberCorrectlyClassifiedByClass", numberCorrectlyClassifiedByClass); |
2 |
26 Feb 07 |
jari |
468 |
result.addIntArray("numberIncorrectlyClassifiedByClass", numberIncorrectlyClassifiedByClass); |
2 |
26 Feb 07 |
jari |
469 |
result.addCluster("cluster", result_cluster); |
2 |
26 Feb 07 |
jari |
470 |
result.addMatrix("clusters_means", means); |
2 |
26 Feb 07 |
jari |
471 |
result.addMatrix("clusters_variances", variances); |
2 |
26 Feb 07 |
jari |
472 |
return result; |
2 |
26 Feb 07 |
jari |
473 |
} // end if (validate) |
2 |
26 Feb 07 |
jari |
474 |
|
2 |
26 Feb 07 |
jari |
//return null; //for now |
2 |
26 Feb 07 |
jari |
476 |
} |
2 |
26 Feb 07 |
jari |
477 |
|
2 |
26 Feb 07 |
jari |
478 |
private NodeValueList calculateHierarchicalTree(int[] features, int method, boolean genes, boolean experiments) throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
479 |
NodeValueList nodeList = new NodeValueList(); |
2 |
26 Feb 07 |
jari |
480 |
AlgorithmData data = new AlgorithmData(); |
2 |
26 Feb 07 |
jari |
481 |
FloatMatrix experiment; |
2 |
26 Feb 07 |
jari |
482 |
|
2 |
26 Feb 07 |
jari |
483 |
if(classifyGenes) |
2 |
26 Feb 07 |
jari |
484 |
experiment = getSubExperiment(this.expMatrix, features); |
2 |
26 Feb 07 |
jari |
485 |
else |
2 |
26 Feb 07 |
jari |
486 |
experiment = this.getSubExperimentReducedCols(this.expMatrix, features); |
2 |
26 Feb 07 |
jari |
487 |
|
2 |
26 Feb 07 |
jari |
488 |
data.addMatrix("experiment", experiment); |
2 |
26 Feb 07 |
jari |
489 |
data.addParam("hcl-distance-function", String.valueOf(this.hcl_function)); |
2 |
26 Feb 07 |
jari |
490 |
data.addParam("hcl-distance-absolute", String.valueOf(this.hcl_absolute)); |
2 |
26 Feb 07 |
jari |
491 |
data.addParam("method-linkage", String.valueOf(method)); |
2 |
26 Feb 07 |
jari |
492 |
HCL hcl = new HCL(); |
2 |
26 Feb 07 |
jari |
493 |
AlgorithmData result; |
2 |
26 Feb 07 |
jari |
494 |
if (genes) { |
2 |
26 Feb 07 |
jari |
495 |
data.addParam("calculate-genes", String.valueOf(true)); |
2 |
26 Feb 07 |
jari |
496 |
result = hcl.execute(data); |
2 |
26 Feb 07 |
jari |
497 |
validate(result); |
2 |
26 Feb 07 |
jari |
498 |
addNodeValues(nodeList, result); |
2 |
26 Feb 07 |
jari |
499 |
} |
2 |
26 Feb 07 |
jari |
500 |
if (experiments) { |
2 |
26 Feb 07 |
jari |
501 |
data.addParam("calculate-genes", String.valueOf(false)); |
2 |
26 Feb 07 |
jari |
502 |
result = hcl.execute(data); |
2 |
26 Feb 07 |
jari |
503 |
validate(result); |
2 |
26 Feb 07 |
jari |
504 |
addNodeValues(nodeList, result); |
2 |
26 Feb 07 |
jari |
505 |
} |
2 |
26 Feb 07 |
jari |
506 |
return nodeList; |
2 |
26 Feb 07 |
jari |
507 |
} |
2 |
26 Feb 07 |
jari |
508 |
|
2 |
26 Feb 07 |
jari |
509 |
private void addNodeValues(NodeValueList target_list, AlgorithmData source_result) { |
2 |
26 Feb 07 |
jari |
510 |
target_list.addNodeValue(new NodeValue("child-1-array", source_result.getIntArray("child-1-array"))); |
2 |
26 Feb 07 |
jari |
511 |
target_list.addNodeValue(new NodeValue("child-2-array", source_result.getIntArray("child-2-array"))); |
2 |
26 Feb 07 |
jari |
512 |
target_list.addNodeValue(new NodeValue("node-order", source_result.getIntArray("node-order"))); |
2 |
26 Feb 07 |
jari |
513 |
target_list.addNodeValue(new NodeValue("height", source_result.getMatrix("height").getRowPackedCopy())); |
2 |
26 Feb 07 |
jari |
514 |
} |
2 |
26 Feb 07 |
jari |
515 |
|
2 |
26 Feb 07 |
jari |
516 |
private FloatMatrix getSubExperiment(FloatMatrix experiment, int[] features) { |
2 |
26 Feb 07 |
jari |
517 |
FloatMatrix subExperiment = new FloatMatrix(features.length, experiment.getColumnDimension()); |
2 |
26 Feb 07 |
jari |
518 |
for (int i=0; i<features.length; i++) { |
2 |
26 Feb 07 |
jari |
519 |
subExperiment.A[i] = experiment.A[features[i]]; |
2 |
26 Feb 07 |
jari |
520 |
} |
2 |
26 Feb 07 |
jari |
521 |
return subExperiment; |
2 |
26 Feb 07 |
jari |
522 |
} |
2 |
26 Feb 07 |
jari |
523 |
|
2 |
26 Feb 07 |
jari |
524 |
/** |
2 |
26 Feb 07 |
jari |
* Creates a matrix with reduced columns (samples) as during experiment clustering |
2 |
26 Feb 07 |
jari |
526 |
*/ |
2 |
26 Feb 07 |
jari |
527 |
private FloatMatrix getSubExperimentReducedCols(FloatMatrix experiment, int[] features) { |
2 |
26 Feb 07 |
jari |
528 |
FloatMatrix copyMatrix = experiment.copy(); |
2 |
26 Feb 07 |
jari |
529 |
FloatMatrix subExperiment = new FloatMatrix(features.length, copyMatrix.getColumnDimension()); |
2 |
26 Feb 07 |
jari |
530 |
for (int i=0; i<features.length; i++) { |
2 |
26 Feb 07 |
jari |
531 |
subExperiment.A[i] = copyMatrix.A[features[i]]; |
2 |
26 Feb 07 |
jari |
532 |
} |
2 |
26 Feb 07 |
jari |
533 |
subExperiment = subExperiment.transpose(); |
2 |
26 Feb 07 |
jari |
534 |
return subExperiment; |
2 |
26 Feb 07 |
jari |
535 |
} |
2 |
26 Feb 07 |
jari |
536 |
|
2 |
26 Feb 07 |
jari |
537 |
/** |
2 |
26 Feb 07 |
jari |
* Checking the result of hcl algorithm calculation. |
2 |
26 Feb 07 |
jari |
* @throws AlgorithmException, if the result is incorrect. |
2 |
26 Feb 07 |
jari |
540 |
*/ |
2 |
26 Feb 07 |
jari |
541 |
private void validate(AlgorithmData result) throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
542 |
if (result.getIntArray("child-1-array") == null) { |
2 |
26 Feb 07 |
jari |
543 |
throw new AlgorithmException("parameter 'child-1-array' is null"); |
2 |
26 Feb 07 |
jari |
544 |
} |
2 |
26 Feb 07 |
jari |
545 |
if (result.getIntArray("child-2-array") == null) { |
2 |
26 Feb 07 |
jari |
546 |
throw new AlgorithmException("parameter 'child-2-array' is null"); |
2 |
26 Feb 07 |
jari |
547 |
} |
2 |
26 Feb 07 |
jari |
548 |
if (result.getIntArray("node-order") == null) { |
2 |
26 Feb 07 |
jari |
549 |
throw new AlgorithmException("parameter 'node-order' is null"); |
2 |
26 Feb 07 |
jari |
550 |
} |
2 |
26 Feb 07 |
jari |
551 |
if (result.getMatrix("height") == null) { |
2 |
26 Feb 07 |
jari |
552 |
throw new AlgorithmException("parameter 'height' is null"); |
2 |
26 Feb 07 |
jari |
553 |
} |
2 |
26 Feb 07 |
jari |
554 |
} |
2 |
26 Feb 07 |
jari |
555 |
|
2 |
26 Feb 07 |
jari |
556 |
private int[] convert2int(Vector source) { |
2 |
26 Feb 07 |
jari |
557 |
int[] int_matrix = new int[source.size()]; |
2 |
26 Feb 07 |
jari |
558 |
for (int i=0; i<int_matrix.length; i++) { |
2 |
26 Feb 07 |
jari |
559 |
int_matrix[i] = ((Integer) source.get(i)).intValue(); |
2 |
26 Feb 07 |
jari |
560 |
} |
2 |
26 Feb 07 |
jari |
561 |
return int_matrix; |
2 |
26 Feb 07 |
jari |
562 |
} |
2 |
26 Feb 07 |
jari |
563 |
|
2 |
26 Feb 07 |
jari |
564 |
public void abort() { |
2 |
26 Feb 07 |
jari |
565 |
stop = true; |
2 |
26 Feb 07 |
jari |
566 |
} |
2 |
26 Feb 07 |
jari |
567 |
|
2 |
26 Feb 07 |
jari |
568 |
private FloatMatrix getMeans(Vector[] clusters) { |
2 |
26 Feb 07 |
jari |
569 |
FloatMatrix means = new FloatMatrix(clusters.length, numCols); |
2 |
26 Feb 07 |
jari |
570 |
FloatMatrix mean; |
2 |
26 Feb 07 |
jari |
571 |
for (int i=0; i<clusters.length; i++) { |
2 |
26 Feb 07 |
jari |
572 |
mean = getMean(clusters[i]); |
2 |
26 Feb 07 |
jari |
573 |
means.A[i] = mean.A[0]; |
2 |
26 Feb 07 |
jari |
574 |
} |
2 |
26 Feb 07 |
jari |
575 |
return means; |
2 |
26 Feb 07 |
jari |
576 |
} |
2 |
26 Feb 07 |
jari |
577 |
|
2 |
26 Feb 07 |
jari |
578 |
private FloatMatrix getMean(Vector cluster) { |
2 |
26 Feb 07 |
jari |
579 |
FloatMatrix mean = new FloatMatrix(1, numCols); |
2 |
26 Feb 07 |
jari |
580 |
float currentMean; |
2 |
26 Feb 07 |
jari |
581 |
int n = cluster.size(); |
2 |
26 Feb 07 |
jari |
582 |
int denom = 0; |
2 |
26 Feb 07 |
jari |
583 |
float value; |
2 |
26 Feb 07 |
jari |
584 |
for (int i=0; i<numCols; i++) { |
2 |
26 Feb 07 |
jari |
585 |
currentMean = 0f; |
2 |
26 Feb 07 |
jari |
586 |
denom = 0; |
2 |
26 Feb 07 |
jari |
587 |
for (int j=0; j<n; j++) { |
2 |
26 Feb 07 |
jari |
588 |
value = expMatrix.get(((Integer) cluster.get(j)).intValue(), i); |
2 |
26 Feb 07 |
jari |
589 |
if (!Float.isNaN(value)) { |
2 |
26 Feb 07 |
jari |
590 |
currentMean += value; |
2 |
26 Feb 07 |
jari |
591 |
denom++; |
2 |
26 Feb 07 |
jari |
592 |
} |
2 |
26 Feb 07 |
jari |
593 |
} |
2 |
26 Feb 07 |
jari |
594 |
mean.set(0, i, currentMean/(float)denom); |
2 |
26 Feb 07 |
jari |
595 |
} |
2 |
26 Feb 07 |
jari |
596 |
|
2 |
26 Feb 07 |
jari |
597 |
return mean; |
2 |
26 Feb 07 |
jari |
598 |
} |
2 |
26 Feb 07 |
jari |
599 |
|
2 |
26 Feb 07 |
jari |
600 |
private FloatMatrix getVariances(Vector[] clusters, FloatMatrix means) { |
2 |
26 Feb 07 |
jari |
601 |
final int rows = means.getRowDimension(); |
2 |
26 Feb 07 |
jari |
602 |
final int columns = means.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
603 |
FloatMatrix variances = new FloatMatrix(rows, columns); |
2 |
26 Feb 07 |
jari |
604 |
for (int row=0; row<rows; row++) { |
2 |
26 Feb 07 |
jari |
605 |
for (int column=0; column<columns; column++) { |
2 |
26 Feb 07 |
jari |
606 |
variances.set(row, column, getSampleVariance(clusters[row], column, means.get(row, column))); |
2 |
26 Feb 07 |
jari |
607 |
} |
2 |
26 Feb 07 |
jari |
608 |
} |
2 |
26 Feb 07 |
jari |
609 |
return variances; |
2 |
26 Feb 07 |
jari |
610 |
} |
2 |
26 Feb 07 |
jari |
611 |
|
2 |
26 Feb 07 |
jari |
612 |
int validN; |
2 |
26 Feb 07 |
jari |
613 |
|
2 |
26 Feb 07 |
jari |
614 |
private float getSampleNormalizedSum(Vector cluster, int column, float mean) { |
2 |
26 Feb 07 |
jari |
615 |
final int size = cluster.size(); |
2 |
26 Feb 07 |
jari |
616 |
float sum = 0f; |
2 |
26 Feb 07 |
jari |
617 |
validN = 0; |
2 |
26 Feb 07 |
jari |
618 |
float value; |
2 |
26 Feb 07 |
jari |
619 |
for (int i=0; i<size; i++) { |
2 |
26 Feb 07 |
jari |
620 |
value = expMatrix.get(((Integer) cluster.get(i)).intValue(), column); |
2 |
26 Feb 07 |
jari |
621 |
if (!Float.isNaN(value)) { |
2 |
26 Feb 07 |
jari |
622 |
sum += Math.pow(value-mean, 2); |
2 |
26 Feb 07 |
jari |
623 |
validN++; |
2 |
26 Feb 07 |
jari |
624 |
} |
2 |
26 Feb 07 |
jari |
625 |
} |
2 |
26 Feb 07 |
jari |
626 |
return sum; |
2 |
26 Feb 07 |
jari |
627 |
} |
2 |
26 Feb 07 |
jari |
628 |
|
2 |
26 Feb 07 |
jari |
629 |
private float getSampleVariance(Vector cluster, int column, float mean) { |
2 |
26 Feb 07 |
jari |
630 |
return(float)Math.sqrt(getSampleNormalizedSum(cluster, column, mean)/(float)(validN-1)); |
2 |
26 Feb 07 |
jari |
631 |
} |
2 |
26 Feb 07 |
jari |
632 |
|
2 |
26 Feb 07 |
jari |
633 |
private int getClassification(int row, int numNeibs) { // return zero if unclassified (in case of a tie) |
2 |
26 Feb 07 |
jari |
634 |
int[] classCounts = new int[numClasses + 1]; |
2 |
26 Feb 07 |
jari |
635 |
for (int i = 0; i < classCounts.length; i++) { |
2 |
26 Feb 07 |
jari |
636 |
classCounts[i] = 0; |
2 |
26 Feb 07 |
jari |
637 |
} |
2 |
26 Feb 07 |
jari |
638 |
|
2 |
26 Feb 07 |
jari |
639 |
float[] distances = new float[filteredClassifierSet.size()]; |
2 |
26 Feb 07 |
jari |
640 |
int numNeibsUsed; |
2 |
26 Feb 07 |
jari |
641 |
|
2 |
26 Feb 07 |
jari |
642 |
if (numNeibs <= filteredClassifierSet.size()) { |
2 |
26 Feb 07 |
jari |
643 |
numNeibsUsed = numNeibs; |
2 |
26 Feb 07 |
jari |
644 |
} else { |
2 |
26 Feb 07 |
jari |
645 |
numNeibsUsed = filteredClassifierSet.size(); |
2 |
26 Feb 07 |
jari |
646 |
} |
2 |
26 Feb 07 |
jari |
647 |
|
2 |
26 Feb 07 |
jari |
648 |
usedNumNeibs = numNeibsUsed; |
2 |
26 Feb 07 |
jari |
649 |
|
2 |
26 Feb 07 |
jari |
650 |
for (int i = 0; i < filteredClassifierSet.size(); i++) { |
2 |
26 Feb 07 |
jari |
651 |
int currentClassifier = ((Integer)(filteredClassifierSet.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
652 |
float currDist = ExperimentUtil.geneEuclidianDistance(expMatrix, null, row, currentClassifier, factor); |
2 |
26 Feb 07 |
jari |
653 |
distances[i] = currDist; |
2 |
26 Feb 07 |
jari |
654 |
} |
2 |
26 Feb 07 |
jari |
655 |
|
2 |
26 Feb 07 |
jari |
656 |
QSort sortDistances = new QSort(distances); |
2 |
26 Feb 07 |
jari |
657 |
int[] sortedDistIndices = sortDistances.getOrigIndx(); |
2 |
26 Feb 07 |
jari |
658 |
|
2 |
26 Feb 07 |
jari |
659 |
for (int i = 0; i < numNeibsUsed; i++) { |
2 |
26 Feb 07 |
jari |
660 |
int currClassifierIndex = sortedDistIndices[i]; |
2 |
26 Feb 07 |
jari |
661 |
int currClass = ((Integer)(filteredClasses.get(currClassifierIndex))).intValue(); |
2 |
26 Feb 07 |
jari |
662 |
classCounts[currClass] = classCounts[currClass] + 1; |
2 |
26 Feb 07 |
jari |
663 |
} |
2 |
26 Feb 07 |
jari |
664 |
|
2 |
26 Feb 07 |
jari |
665 |
int maxCount = 0; |
2 |
26 Feb 07 |
jari |
666 |
for (int i = 1; i < classCounts.length; i++) { |
2 |
26 Feb 07 |
jari |
667 |
maxCount = Math.max(maxCount, classCounts[i]); |
2 |
26 Feb 07 |
jari |
668 |
} |
2 |
26 Feb 07 |
jari |
669 |
|
2 |
26 Feb 07 |
jari |
670 |
int numMaxCountEncountered = 0; |
2 |
26 Feb 07 |
jari |
671 |
int assignedClass = 0; |
2 |
26 Feb 07 |
jari |
672 |
for (int i = 1; i < classCounts.length; i++) { |
2 |
26 Feb 07 |
jari |
673 |
if (maxCount == classCounts[i]) { |
2 |
26 Feb 07 |
jari |
674 |
numMaxCountEncountered++; |
2 |
26 Feb 07 |
jari |
675 |
assignedClass = i; |
2 |
26 Feb 07 |
jari |
676 |
} |
2 |
26 Feb 07 |
jari |
677 |
} |
2 |
26 Feb 07 |
jari |
678 |
|
2 |
26 Feb 07 |
jari |
679 |
if (numMaxCountEncountered == 1) { |
2 |
26 Feb 07 |
jari |
680 |
return assignedClass; |
2 |
26 Feb 07 |
jari |
681 |
} else { |
2 |
26 Feb 07 |
jari |
682 |
return 0; |
2 |
26 Feb 07 |
jari |
683 |
} |
2 |
26 Feb 07 |
jari |
684 |
} |
2 |
26 Feb 07 |
jari |
685 |
|
2 |
26 Feb 07 |
jari |
686 |
private int getClassificationForCrossValid(int row, int numNeibs, Vector reducedClassifierSet, Vector reducedClasses) {// return zero if unclassified (in case of a tie) |
2 |
26 Feb 07 |
jari |
687 |
int[] classCounts = new int[numClasses + 1]; |
2 |
26 Feb 07 |
jari |
688 |
for (int i = 0; i < classCounts.length; i++) { |
2 |
26 Feb 07 |
jari |
689 |
classCounts[i] = 0; |
2 |
26 Feb 07 |
jari |
690 |
} |
2 |
26 Feb 07 |
jari |
691 |
|
2 |
26 Feb 07 |
jari |
692 |
float[] distances = new float[reducedClassifierSet.size()]; |
2 |
26 Feb 07 |
jari |
693 |
int numNeibsUsed; |
2 |
26 Feb 07 |
jari |
694 |
|
2 |
26 Feb 07 |
jari |
695 |
if (numNeibs <= reducedClassifierSet.size()) { |
2 |
26 Feb 07 |
jari |
696 |
numNeibsUsed = numNeibs; |
2 |
26 Feb 07 |
jari |
697 |
} else { |
2 |
26 Feb 07 |
jari |
698 |
numNeibsUsed = reducedClassifierSet.size(); |
2 |
26 Feb 07 |
jari |
699 |
} |
2 |
26 Feb 07 |
jari |
700 |
|
2 |
26 Feb 07 |
jari |
701 |
usedNumNeibs = numNeibsUsed; |
2 |
26 Feb 07 |
jari |
702 |
|
2 |
26 Feb 07 |
jari |
703 |
for (int i = 0; i < reducedClassifierSet.size(); i++) { |
2 |
26 Feb 07 |
jari |
704 |
int currentClassifier = ((Integer)(reducedClassifierSet.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
705 |
float currDist = ExperimentUtil.geneEuclidianDistance(expMatrix, null, row, currentClassifier, factor); |
2 |
26 Feb 07 |
jari |
706 |
distances[i] = currDist; |
2 |
26 Feb 07 |
jari |
707 |
} |
2 |
26 Feb 07 |
jari |
708 |
|
2 |
26 Feb 07 |
jari |
709 |
QSort sortDistances = new QSort(distances); |
2 |
26 Feb 07 |
jari |
710 |
int[] sortedDistIndices = sortDistances.getOrigIndx(); |
2 |
26 Feb 07 |
jari |
711 |
|
2 |
26 Feb 07 |
jari |
712 |
for (int i = 0; i < numNeibsUsed; i++) { |
2 |
26 Feb 07 |
jari |
713 |
int currClassifierIndex = sortedDistIndices[i]; |
2 |
26 Feb 07 |
jari |
714 |
int currClass = ((Integer)(reducedClasses.get(currClassifierIndex))).intValue(); |
2 |
26 Feb 07 |
jari |
715 |
classCounts[currClass] = classCounts[currClass] + 1; |
2 |
26 Feb 07 |
jari |
716 |
} |
2 |
26 Feb 07 |
jari |
717 |
|
2 |
26 Feb 07 |
jari |
718 |
int maxCount = 0; |
2 |
26 Feb 07 |
jari |
719 |
for (int i = 1; i < classCounts.length; i++) { |
2 |
26 Feb 07 |
jari |
720 |
maxCount = Math.max(maxCount, classCounts[i]); |
2 |
26 Feb 07 |
jari |
721 |
} |
2 |
26 Feb 07 |
jari |
722 |
|
2 |
26 Feb 07 |
jari |
723 |
int numMaxCountEncountered = 0; |
2 |
26 Feb 07 |
jari |
724 |
int assignedClass = 0; |
2 |
26 Feb 07 |
jari |
725 |
for (int i = 1; i < classCounts.length; i++) { |
2 |
26 Feb 07 |
jari |
726 |
if (maxCount == classCounts[i]) { |
2 |
26 Feb 07 |
jari |
727 |
numMaxCountEncountered++; |
2 |
26 Feb 07 |
jari |
728 |
assignedClass = i; |
2 |
26 Feb 07 |
jari |
729 |
} |
2 |
26 Feb 07 |
jari |
730 |
} |
2 |
26 Feb 07 |
jari |
731 |
|
2 |
26 Feb 07 |
jari |
732 |
if (numMaxCountEncountered == 1) { |
2 |
26 Feb 07 |
jari |
733 |
return assignedClass; |
2 |
26 Feb 07 |
jari |
734 |
} else { |
2 |
26 Feb 07 |
jari |
735 |
return 0; |
2 |
26 Feb 07 |
jari |
736 |
} |
2 |
26 Feb 07 |
jari |
737 |
} |
2 |
26 Feb 07 |
jari |
738 |
|
2 |
26 Feb 07 |
jari |
739 |
private int[][] getCrossValidationStats() throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
740 |
int[] numCorrectlyClassifiedByClass = new int[numClasses + 1]; // first element of array is not used, so the first class is index [1] |
2 |
26 Feb 07 |
jari |
741 |
int[] numIncorrectlyClassifiedByClass = new int[numClasses + 1]; // first element of array is not used, so the first class is index [1] |
2 |
26 Feb 07 |
jari |
742 |
for (int i = 0; i < numCorrectlyClassifiedByClass.length; i++) { |
2 |
26 Feb 07 |
jari |
743 |
numCorrectlyClassifiedByClass[i] = 0; |
2 |
26 Feb 07 |
jari |
744 |
numIncorrectlyClassifiedByClass[i] = 0; |
2 |
26 Feb 07 |
jari |
745 |
} |
2 |
26 Feb 07 |
jari |
746 |
|
2 |
26 Feb 07 |
jari |
747 |
AlgorithmEvent algEvent = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, filteredClassifierSet.size()); |
2 |
26 Feb 07 |
jari |
748 |
fireValueChanged(algEvent); |
2 |
26 Feb 07 |
jari |
749 |
algEvent.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
750 |
|
2 |
26 Feb 07 |
jari |
751 |
for (int i = 0; i < filteredClassifierSet.size(); i++) { |
2 |
26 Feb 07 |
jari |
752 |
if (stop) { |
2 |
26 Feb 07 |
jari |
753 |
throw new AbortException(); |
2 |
26 Feb 07 |
jari |
754 |
} |
2 |
26 Feb 07 |
jari |
755 |
algEvent.setIntValue(i); |
2 |
26 Feb 07 |
jari |
756 |
algEvent.setDescription("Cross-validation: testing classifier " + (i + 1)); |
2 |
26 Feb 07 |
jari |
757 |
fireValueChanged(algEvent); |
2 |
26 Feb 07 |
jari |
758 |
Vector reducedFilteredClassifierSet = (Vector)(filteredClassifierSet.clone()); |
2 |
26 Feb 07 |
jari |
759 |
reducedFilteredClassifierSet.remove(i); |
2 |
26 Feb 07 |
jari |
760 |
Vector reducedFilteredClasses = (Vector)(filteredClasses.clone()); |
2 |
26 Feb 07 |
jari |
761 |
reducedFilteredClasses.remove(i); |
2 |
26 Feb 07 |
jari |
762 |
|
2 |
26 Feb 07 |
jari |
763 |
int rowToClassify = ((Integer)(filteredClassifierSet.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
764 |
if(!useCorrelFilter) { |
2 |
26 Feb 07 |
jari |
765 |
int currClass = getClassificationForCrossValid(rowToClassify, numNeighbors, reducedFilteredClassifierSet, reducedFilteredClasses); |
2 |
26 Feb 07 |
jari |
766 |
if (filteredClassifiersByClass[currClass].contains(new Integer(rowToClassify))) { |
2 |
26 Feb 07 |
jari |
767 |
numCorrectlyClassifiedByClass[currClass]++; |
2 |
26 Feb 07 |
jari |
768 |
} else { |
2 |
26 Feb 07 |
jari |
769 |
numIncorrectlyClassifiedByClass[currClass]++; |
2 |
26 Feb 07 |
jari |
770 |
} |
2 |
26 Feb 07 |
jari |
771 |
|
2 |
26 Feb 07 |
jari |
772 |
} else { // if (useCorrelFilter) |
2 |
26 Feb 07 |
jari |
773 |
Random rnd1 = new Random(); |
2 |
26 Feb 07 |
jari |
774 |
if (passesCorrelationFilter(rowToClassify, reducedFilteredClassifierSet, correlPValue, numPerms, rnd1.nextLong())) { |
2 |
26 Feb 07 |
jari |
775 |
int currClass = getClassificationForCrossValid(rowToClassify, numNeighbors, reducedFilteredClassifierSet, reducedFilteredClasses); |
2 |
26 Feb 07 |
jari |
776 |
if (filteredClassifiersByClass[currClass].contains(new Integer(rowToClassify))) { |
2 |
26 Feb 07 |
jari |
777 |
numCorrectlyClassifiedByClass[currClass]++; |
2 |
26 Feb 07 |
jari |
778 |
} else { |
2 |
26 Feb 07 |
jari |
779 |
numIncorrectlyClassifiedByClass[currClass]++; |
2 |
26 Feb 07 |
jari |
780 |
} |
2 |
26 Feb 07 |
jari |
781 |
} else {// if (!passesCorrelationFilter) |
2 |
26 Feb 07 |
jari |
//do nothing, since rowToClassify cannot be classified |
2 |
26 Feb 07 |
jari |
783 |
} |
2 |
26 Feb 07 |
jari |
784 |
} |
2 |
26 Feb 07 |
jari |
785 |
} |
2 |
26 Feb 07 |
jari |
786 |
|
2 |
26 Feb 07 |
jari |
787 |
int[][] crossValidationStats = new int[2][]; |
2 |
26 Feb 07 |
jari |
788 |
crossValidationStats[0] = numCorrectlyClassifiedByClass; |
2 |
26 Feb 07 |
jari |
789 |
crossValidationStats[1] = numIncorrectlyClassifiedByClass; |
2 |
26 Feb 07 |
jari |
790 |
/* |
2 |
26 Feb 07 |
jari |
for (int i = 1; i < numClasses + 1; i++) { |
2 |
26 Feb 07 |
jari |
System.out.println("Class " + i + ":"); |
2 |
26 Feb 07 |
jari |
System.out.println("Original number in training set = " + filteredClassifiersByClass[i].size()); |
2 |
26 Feb 07 |
jari |
System.out.println("Number correctly classified = " + numCorrectlyClassifiedByClass[i]); |
2 |
26 Feb 07 |
jari |
System.out.println("Number falsely assigned = " + numIncorrectlyClassifiedByClass[i]); |
2 |
26 Feb 07 |
jari |
796 |
|
2 |
26 Feb 07 |
jari |
797 |
} |
2 |
26 Feb 07 |
jari |
798 |
*/ |
2 |
26 Feb 07 |
jari |
799 |
|
2 |
26 Feb 07 |
jari |
800 |
return crossValidationStats; |
2 |
26 Feb 07 |
jari |
801 |
} |
2 |
26 Feb 07 |
jari |
802 |
|
2 |
26 Feb 07 |
jari |
803 |
private boolean passesCorrelationFilter(int row, Vector classifiers, double thresholdP, int permutations, long seed) { |
2 |
26 Feb 07 |
jari |
804 |
boolean passes = false; |
2 |
26 Feb 07 |
jari |
805 |
double rMax = getRMax(row, classifiers); |
2 |
26 Feb 07 |
jari |
806 |
float[] currentRow = new float[numCols]; |
2 |
26 Feb 07 |
jari |
807 |
int timesExceeded = 0; |
2 |
26 Feb 07 |
jari |
808 |
|
2 |
26 Feb 07 |
jari |
809 |
long[] seedsArray = new long[permutations]; |
2 |
26 Feb 07 |
jari |
810 |
Random rand = new Random(seed); |
2 |
26 Feb 07 |
jari |
811 |
for (int i = 0; i < seedsArray.length; i++) { |
2 |
26 Feb 07 |
jari |
812 |
seedsArray[i] = rand.nextLong(); |
2 |
26 Feb 07 |
jari |
813 |
} |
2 |
26 Feb 07 |
jari |
814 |
|
2 |
26 Feb 07 |
jari |
815 |
for (int i = 0; i < permutations; i++) { |
2 |
26 Feb 07 |
jari |
816 |
for (int j = 0; j < currentRow.length; j++) { |
2 |
26 Feb 07 |
jari |
817 |
currentRow[j] = expMatrix.A[row][j]; |
2 |
26 Feb 07 |
jari |
818 |
} |
2 |
26 Feb 07 |
jari |
819 |
|
2 |
26 Feb 07 |
jari |
820 |
float[] permutedRow = getPermutedValues(row, seedsArray[i]); |
2 |
26 Feb 07 |
jari |
//DONE UP TO HERE 9_30_03 |
2 |
26 Feb 07 |
jari |
822 |
double permRMax = getPermRMax(permutedRow, classifiers); |
2 |
26 Feb 07 |
jari |
823 |
if (permRMax > rMax) { |
2 |
26 Feb 07 |
jari |
824 |
timesExceeded++; |
2 |
26 Feb 07 |
jari |
825 |
} |
2 |
26 Feb 07 |
jari |
826 |
|
2 |
26 Feb 07 |
jari |
827 |
} |
2 |
26 Feb 07 |
jari |
828 |
|
2 |
26 Feb 07 |
jari |
829 |
double permPValue = (double)(timesExceeded)/(double)(permutations); |
2 |
26 Feb 07 |
jari |
830 |
if (permPValue <= thresholdP) { |
2 |
26 Feb 07 |
jari |
831 |
passes = true; |
2 |
26 Feb 07 |
jari |
832 |
} else { |
2 |
26 Feb 07 |
jari |
833 |
passes = false; |
2 |
26 Feb 07 |
jari |
834 |
} |
2 |
26 Feb 07 |
jari |
835 |
|
2 |
26 Feb 07 |
jari |
836 |
return passes; |
2 |
26 Feb 07 |
jari |
837 |
} |
2 |
26 Feb 07 |
jari |
838 |
|
2 |
26 Feb 07 |
jari |
839 |
private double getPermRMax(float[] rowValues, Vector classifiers) { |
2 |
26 Feb 07 |
jari |
840 |
double permRMax = Double.NEGATIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
841 |
|
2 |
26 Feb 07 |
jari |
842 |
for (int i = 0; i < classifiers.size(); i++) { |
2 |
26 Feb 07 |
jari |
843 |
int currRow = ((Integer)(classifiers.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
844 |
float[] currentRowValues = getRowValues(currRow); |
2 |
26 Feb 07 |
jari |
845 |
double currentR = getCorr(rowValues, currentRowValues); |
2 |
26 Feb 07 |
jari |
846 |
permRMax = Math.max(permRMax, currentR); |
2 |
26 Feb 07 |
jari |
847 |
} |
2 |
26 Feb 07 |
jari |
848 |
|
2 |
26 Feb 07 |
jari |
849 |
return permRMax; |
2 |
26 Feb 07 |
jari |
850 |
} |
2 |
26 Feb 07 |
jari |
851 |
|
2 |
26 Feb 07 |
jari |
852 |
private double getCorr(float[] arrX, float[] arrY) { |
2 |
26 Feb 07 |
jari |
//double corr; |
2 |
26 Feb 07 |
jari |
854 |
int nArrSize = arrX.length; |
2 |
26 Feb 07 |
jari |
855 |
|
2 |
26 Feb 07 |
jari |
856 |
double dblXY = 0f; |
2 |
26 Feb 07 |
jari |
857 |
double dblX = 0f; |
2 |
26 Feb 07 |
jari |
858 |
double dblXX = 0f; |
2 |
26 Feb 07 |
jari |
859 |
double dblY = 0f; |
2 |
26 Feb 07 |
jari |
860 |
double dblYY = 0f; |
2 |
26 Feb 07 |
jari |
861 |
|
2 |
26 Feb 07 |
jari |
862 |
double v_1, v_2; |
2 |
26 Feb 07 |
jari |
863 |
int iValidValCount = 0; |
2 |
26 Feb 07 |
jari |
864 |
for (int i=0; i<nArrSize; i++) { |
2 |
26 Feb 07 |
jari |
865 |
v_1 = arrX[i]; |
2 |
26 Feb 07 |
jari |
866 |
v_2 = arrY[i]; |
2 |
26 Feb 07 |
jari |
867 |
if (Double.isNaN(v_1) || Double.isNaN(v_2)) { |
2 |
26 Feb 07 |
jari |
868 |
continue; |
2 |
26 Feb 07 |
jari |
869 |
} |
2 |
26 Feb 07 |
jari |
870 |
iValidValCount++; |
2 |
26 Feb 07 |
jari |
871 |
dblXY += v_1*v_2; |
2 |
26 Feb 07 |
jari |
872 |
dblXX += v_1*v_1; |
2 |
26 Feb 07 |
jari |
873 |
dblYY += v_2*v_2; |
2 |
26 Feb 07 |
jari |
874 |
dblX += v_1; |
2 |
26 Feb 07 |
jari |
875 |
dblY += v_2; |
2 |
26 Feb 07 |
jari |
876 |
} |
2 |
26 Feb 07 |
jari |
877 |
if (iValidValCount == 0) |
2 |
26 Feb 07 |
jari |
878 |
return 0d; |
2 |
26 Feb 07 |
jari |
879 |
|
2 |
26 Feb 07 |
jari |
//Allows for a comparison of two 'flat' genes (genes with no variability in their |
2 |
26 Feb 07 |
jari |
// expression values), ie. 0, 0, 0, 0, 0 |
2 |
26 Feb 07 |
jari |
882 |
boolean nonFlat = false; |
2 |
26 Feb 07 |
jari |
883 |
NON_FLAT_CHECK: for (int j = 1; j < nArrSize; j++) { |
2 |
26 Feb 07 |
jari |
884 |
if ((!Float.isNaN(arrX[j])) && (!Float.isNaN(arrY[j]))) { |
2 |
26 Feb 07 |
jari |
885 |
if (arrX[j] != arrX[j-1]) { |
2 |
26 Feb 07 |
jari |
886 |
nonFlat = true; |
2 |
26 Feb 07 |
jari |
887 |
break NON_FLAT_CHECK; |
2 |
26 Feb 07 |
jari |
888 |
} |
2 |
26 Feb 07 |
jari |
889 |
if (arrY[j] != arrY[j-1]) { |
2 |
26 Feb 07 |
jari |
890 |
nonFlat = true; |
2 |
26 Feb 07 |
jari |
891 |
break NON_FLAT_CHECK; |
2 |
26 Feb 07 |
jari |
892 |
} |
2 |
26 Feb 07 |
jari |
893 |
} |
2 |
26 Feb 07 |
jari |
894 |
} |
2 |
26 Feb 07 |
jari |
895 |
|
2 |
26 Feb 07 |
jari |
896 |
if (nonFlat == false) { |
2 |
26 Feb 07 |
jari |
897 |
return 1.0d; |
2 |
26 Feb 07 |
jari |
898 |
} |
2 |
26 Feb 07 |
jari |
899 |
|
2 |
26 Feb 07 |
jari |
900 |
|
2 |
26 Feb 07 |
jari |
901 |
double dblAvgX = dblX/iValidValCount; |
2 |
26 Feb 07 |
jari |
902 |
double dblAvgY = dblY/iValidValCount; |
2 |
26 Feb 07 |
jari |
903 |
double dblUpper = dblXY-dblX*dblAvgY-dblAvgX*dblY+dblAvgX*dblAvgY*((double)iValidValCount); |
2 |
26 Feb 07 |
jari |
904 |
double p1 = (dblXX-dblAvgX*dblX*2d+dblAvgX*dblAvgX*((double)iValidValCount)); |
2 |
26 Feb 07 |
jari |
905 |
double p2 = (dblYY-dblAvgY*dblY*2d+dblAvgY*dblAvgY*((double)iValidValCount)); |
2 |
26 Feb 07 |
jari |
906 |
double dblLower = p1*p2; |
2 |
26 Feb 07 |
jari |
907 |
return(double)(dblUpper/(Math.sqrt(dblLower)+Double.MIN_VALUE)*(double)factor); |
2 |
26 Feb 07 |
jari |
908 |
|
2 |
26 Feb 07 |
jari |
//return corr; |
2 |
26 Feb 07 |
jari |
910 |
} |
2 |
26 Feb 07 |
jari |
911 |
|
2 |
26 Feb 07 |
jari |
912 |
private float[] getRowValues(int row) { |
2 |
26 Feb 07 |
jari |
913 |
float[] rowValues = new float[numCols]; |
2 |
26 Feb 07 |
jari |
914 |
|
2 |
26 Feb 07 |
jari |
915 |
for (int i = 0; i < rowValues.length; i++) { |
2 |
26 Feb 07 |
jari |
916 |
rowValues[i] = expMatrix.A[row][i]; |
2 |
26 Feb 07 |
jari |
917 |
} |
2 |
26 Feb 07 |
jari |
918 |
|
2 |
26 Feb 07 |
jari |
919 |
return rowValues; |
2 |
26 Feb 07 |
jari |
920 |
} |
2 |
26 Feb 07 |
jari |
921 |
|
2 |
26 Feb 07 |
jari |
922 |
private float[] getPermutedValues(int row, long seed) { |
2 |
26 Feb 07 |
jari |
923 |
float[] rowValues = new float[numCols]; |
2 |
26 Feb 07 |
jari |
924 |
float[] permutedRowValues = new float[numCols]; |
2 |
26 Feb 07 |
jari |
925 |
|
2 |
26 Feb 07 |
jari |
926 |
for (int i = 0; i < rowValues.length; i++) { |
2 |
26 Feb 07 |
jari |
927 |
rowValues[i] = expMatrix.A[row][i]; |
2 |
26 Feb 07 |
jari |
928 |
} |
2 |
26 Feb 07 |
jari |
929 |
|
2 |
26 Feb 07 |
jari |
930 |
/* |
2 |
26 Feb 07 |
jari |
System.out.print("Original row: "); |
2 |
26 Feb 07 |
jari |
for (int i = 0; i < rowValues.length; i++) { |
2 |
26 Feb 07 |
jari |
System.out.print(rowValues[i] + " "); |
2 |
26 Feb 07 |
jari |
934 |
} |
2 |
26 Feb 07 |
jari |
System.out.println(); |
2 |
26 Feb 07 |
jari |
936 |
*/ |
2 |
26 Feb 07 |
jari |
937 |
Random generator2 = new Random(seed); |
2 |
26 Feb 07 |
jari |
938 |
for (int i = rowValues.length; i > 1; i--) { |
2 |
26 Feb 07 |
jari |
//Random generator2 = new Random(); |
2 |
26 Feb 07 |
jari |
940 |
int randVal = generator2.nextInt(i - 1); |
2 |
26 Feb 07 |
jari |
941 |
float temp = rowValues[randVal]; |
2 |
26 Feb 07 |
jari |
942 |
rowValues[randVal] = rowValues[i - 1]; |
2 |
26 Feb 07 |
jari |
943 |
rowValues[i - 1] = temp; |
2 |
26 Feb 07 |
jari |
944 |
} |
2 |
26 Feb 07 |
jari |
945 |
/* |
2 |
26 Feb 07 |
jari |
System.out.print("Permuted row: "); |
2 |
26 Feb 07 |
jari |
for (int i = 0; i < rowValues.length; i++) { |
2 |
26 Feb 07 |
jari |
System.out.print(rowValues[i] + " "); |
2 |
26 Feb 07 |
jari |
949 |
} |
2 |
26 Feb 07 |
jari |
System.out.println(); |
2 |
26 Feb 07 |
jari |
System.out.println(); |
2 |
26 Feb 07 |
jari |
952 |
*/ |
2 |
26 Feb 07 |
jari |
953 |
|
2 |
26 Feb 07 |
jari |
954 |
/* |
2 |
26 Feb 07 |
jari |
try { |
2 |
26 Feb 07 |
jari |
Thread.sleep(10); |
2 |
26 Feb 07 |
jari |
} catch (Exception exc) { |
2 |
26 Feb 07 |
jari |
exc.printStackTrace(); |
2 |
26 Feb 07 |
jari |
959 |
} |
2 |
26 Feb 07 |
jari |
960 |
*/ |
2 |
26 Feb 07 |
jari |
961 |
|
2 |
26 Feb 07 |
jari |
962 |
return rowValues; |
2 |
26 Feb 07 |
jari |
963 |
|
2 |
26 Feb 07 |
jari |
964 |
} |
2 |
26 Feb 07 |
jari |
965 |
|
2 |
26 Feb 07 |
jari |
966 |
private double getRMax(int row, Vector classifiers) { |
2 |
26 Feb 07 |
jari |
967 |
double rMax = Double.NEGATIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
968 |
|
2 |
26 Feb 07 |
jari |
969 |
for (int i = 0; i < classifiers.size(); i++) { |
2 |
26 Feb 07 |
jari |
970 |
int currRow = ((Integer)(classifiers.get(i))).intValue(); |
2 |
26 Feb 07 |
jari |
971 |
double currentR = ExperimentUtil.genePearson(expMatrix, null, currRow, row, factor); |
2 |
26 Feb 07 |
jari |
972 |
rMax = Math.max(rMax, currentR); |
2 |
26 Feb 07 |
jari |
973 |
} |
2 |
26 Feb 07 |
jari |
974 |
|
2 |
26 Feb 07 |
jari |
975 |
return rMax; |
2 |
26 Feb 07 |
jari |
976 |
} |
2 |
26 Feb 07 |
jari |
977 |
|
2 |
26 Feb 07 |
jari |
978 |
private boolean isFoundInVector(int element, Vector vect) { |
2 |
26 Feb 07 |
jari |
979 |
boolean found = false; |
2 |
26 Feb 07 |
jari |
980 |
for (int i = 0; i < vect.size(); i++) { |
2 |
26 Feb 07 |
jari |
981 |
if (element == ((Integer)(vect.get(i))).intValue()) { |
2 |
26 Feb 07 |
jari |
982 |
found = true; |
2 |
26 Feb 07 |
jari |
983 |
break; |
2 |
26 Feb 07 |
jari |
984 |
} |
2 |
26 Feb 07 |
jari |
985 |
} |
2 |
26 Feb 07 |
jari |
986 |
return found; |
2 |
26 Feb 07 |
jari |
987 |
} |
2 |
26 Feb 07 |
jari |
988 |
|
2 |
26 Feb 07 |
jari |
989 |
private int[] reverse(int[] arr) { |
2 |
26 Feb 07 |
jari |
990 |
int[] revArr = new int[arr.length]; |
2 |
26 Feb 07 |
jari |
991 |
int revCount = 0; |
2 |
26 Feb 07 |
jari |
992 |
int count = arr.length - 1; |
2 |
26 Feb 07 |
jari |
993 |
for (int i=0; i < arr.length; i++) { |
2 |
26 Feb 07 |
jari |
994 |
revArr[revCount] = arr[count]; |
2 |
26 Feb 07 |
jari |
995 |
revCount++; |
2 |
26 Feb 07 |
jari |
996 |
count--; |
2 |
26 Feb 07 |
jari |
997 |
} |
2 |
26 Feb 07 |
jari |
998 |
return revArr; |
2 |
26 Feb 07 |
jari |
999 |
} |
2 |
26 Feb 07 |
jari |
1000 |
|
2 |
26 Feb 07 |
jari |
1001 |
private double getVar(int row) { |
2 |
26 Feb 07 |
jari |
1002 |
float[] rowValues = new float[numCols]; |
2 |
26 Feb 07 |
jari |
1003 |
for (int i = 0; i < rowValues.length; i++) { |
2 |
26 Feb 07 |
jari |
1004 |
rowValues[i] = expMatrix.A[row][i]; |
2 |
26 Feb 07 |
jari |
1005 |
} |
2 |
26 Feb 07 |
jari |
1006 |
return getVar(rowValues); |
2 |
26 Feb 07 |
jari |
1007 |
} |
2 |
26 Feb 07 |
jari |
1008 |
|
2 |
26 Feb 07 |
jari |
1009 |
private double getVar(float[] rowValues) { |
2 |
26 Feb 07 |
jari |
1010 |
float mean = getMean(rowValues); |
2 |
26 Feb 07 |
jari |
1011 |
int n = 0; |
2 |
26 Feb 07 |
jari |
1012 |
|
2 |
26 Feb 07 |
jari |
1013 |
float sumSquares = 0; |
2 |
26 Feb 07 |
jari |
1014 |
|
2 |
26 Feb 07 |
jari |
1015 |
for (int i = 0; i < rowValues.length; i++) { |
2 |
26 Feb 07 |
jari |
1016 |
if (!Float.isNaN(rowValues[i])) { |
2 |
26 Feb 07 |
jari |
1017 |
sumSquares = (float)(sumSquares + Math.pow((rowValues[i] - mean), 2)); |
2 |
26 Feb 07 |
jari |
1018 |
n++; |
2 |
26 Feb 07 |
jari |
1019 |
} |
2 |
26 Feb 07 |
jari |
1020 |
} |
2 |
26 Feb 07 |
jari |
1021 |
|
2 |
26 Feb 07 |
jari |
1022 |
if (n < 2) { |
2 |
26 Feb 07 |
jari |
1023 |
return Float.NaN; |
2 |
26 Feb 07 |
jari |
1024 |
} |
2 |
26 Feb 07 |
jari |
1025 |
|
2 |
26 Feb 07 |
jari |
1026 |
float var = sumSquares / (float)(n - 1); |
2 |
26 Feb 07 |
jari |
1027 |
if (Float.isInfinite(var)) { |
2 |
26 Feb 07 |
jari |
1028 |
return Double.NaN; |
2 |
26 Feb 07 |
jari |
1029 |
} else { |
2 |
26 Feb 07 |
jari |
1030 |
return (double)var; |
2 |
26 Feb 07 |
jari |
1031 |
} |
2 |
26 Feb 07 |
jari |
1032 |
} |
2 |
26 Feb 07 |
jari |
1033 |
|
2 |
26 Feb 07 |
jari |
1034 |
private float getMean(float[] group) { |
2 |
26 Feb 07 |
jari |
1035 |
float sum = 0; |
2 |
26 Feb 07 |
jari |
1036 |
int n = 0; |
2 |
26 Feb 07 |
jari |
1037 |
|
2 |
26 Feb 07 |
jari |
1038 |
int z = 0; |
2 |
26 Feb 07 |
jari |
1039 |
|
2 |
26 Feb 07 |
jari |
1040 |
for (int i = 0; i < group.length; i++) { |
2 |
26 Feb 07 |
jari |
//System.out.println("getMean(): group[" + i + "] = " + group[i]); |
2 |
26 Feb 07 |
jari |
1042 |
if (!Float.isNaN(group[i])) { |
2 |
26 Feb 07 |
jari |
1043 |
sum = sum + group[i]; |
2 |
26 Feb 07 |
jari |
1044 |
n++; |
2 |
26 Feb 07 |
jari |
1045 |
z++; |
2 |
26 Feb 07 |
jari |
1046 |
} |
2 |
26 Feb 07 |
jari |
1047 |
} |
2 |
26 Feb 07 |
jari |
1048 |
|
2 |
26 Feb 07 |
jari |
//System.out.println("getMean(): sum = " +sum); |
2 |
26 Feb 07 |
jari |
1050 |
if (n == 0) { |
2 |
26 Feb 07 |
jari |
1051 |
return Float.NaN; |
2 |
26 Feb 07 |
jari |
1052 |
} |
2 |
26 Feb 07 |
jari |
1053 |
float mean = sum / (float)n; |
2 |
26 Feb 07 |
jari |
1054 |
|
2 |
26 Feb 07 |
jari |
1055 |
if (Float.isInfinite(mean)) { |
2 |
26 Feb 07 |
jari |
1056 |
return Float.NaN; |
2 |
26 Feb 07 |
jari |
1057 |
} |
2 |
26 Feb 07 |
jari |
1058 |
|
2 |
26 Feb 07 |
jari |
1059 |
return mean; |
2 |
26 Feb 07 |
jari |
1060 |
} |
2 |
26 Feb 07 |
jari |
1061 |
|
2 |
26 Feb 07 |
jari |
1062 |
} |
2 |
26 Feb 07 |
jari |
1063 |
|
2 |
26 Feb 07 |
jari |
1064 |
|
2 |
26 Feb 07 |
jari |
1065 |
|
2 |
26 Feb 07 |
jari |
1066 |
|
2 |
26 Feb 07 |
jari |
1067 |
|
2 |
26 Feb 07 |
jari |
1068 |
|
2 |
26 Feb 07 |
jari |
1069 |
|
2 |
26 Feb 07 |
jari |
1070 |
|
2 |
26 Feb 07 |
jari |
1071 |
|
2 |
26 Feb 07 |
jari |
1072 |
|
2 |
26 Feb 07 |
jari |
1073 |
|