2 |
26 Feb 07 |
jari |
1 |
/* |
2 |
26 Feb 07 |
jari |
Copyright @ 1999-2003, The Institute for Genomic Research (TIGR). |
2 |
26 Feb 07 |
jari |
All rights reserved. |
2 |
26 Feb 07 |
jari |
4 |
*/ |
2 |
26 Feb 07 |
jari |
5 |
/* |
2 |
26 Feb 07 |
jari |
* $RCSfile: SOTA.java,v $ |
2 |
26 Feb 07 |
jari |
* $Revision: 1.3 $ |
2 |
26 Feb 07 |
jari |
* $Date: 2005/03/10 15:45:20 $ |
2 |
26 Feb 07 |
jari |
* $Author: braistedj $ |
2 |
26 Feb 07 |
jari |
* $State: Exp $ |
2 |
26 Feb 07 |
jari |
11 |
*/ |
2 |
26 Feb 07 |
jari |
12 |
package org.tigr.microarray.mev.cluster.algorithm.impl; |
2 |
26 Feb 07 |
jari |
13 |
|
2 |
26 Feb 07 |
jari |
14 |
import java.awt.Dimension; |
2 |
26 Feb 07 |
jari |
15 |
import java.util.Vector; |
2 |
26 Feb 07 |
jari |
16 |
|
2 |
26 Feb 07 |
jari |
17 |
import org.tigr.microarray.mev.cluster.Cluster; |
2 |
26 Feb 07 |
jari |
18 |
import org.tigr.microarray.mev.cluster.Node; |
2 |
26 Feb 07 |
jari |
19 |
import org.tigr.microarray.mev.cluster.NodeList; |
2 |
26 Feb 07 |
jari |
20 |
import org.tigr.microarray.mev.cluster.NodeValue; |
2 |
26 Feb 07 |
jari |
21 |
import org.tigr.microarray.mev.cluster.NodeValueList; |
2 |
26 Feb 07 |
jari |
22 |
import org.tigr.microarray.mev.cluster.algorithm.AbortException; |
2 |
26 Feb 07 |
jari |
23 |
import org.tigr.microarray.mev.cluster.algorithm.AbstractAlgorithm; |
2 |
26 Feb 07 |
jari |
24 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmData; |
2 |
26 Feb 07 |
jari |
25 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmEvent; |
2 |
26 Feb 07 |
jari |
26 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmException; |
2 |
26 Feb 07 |
jari |
27 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmParameters; |
2 |
26 Feb 07 |
jari |
28 |
import org.tigr.util.FloatMatrix; |
2 |
26 Feb 07 |
jari |
29 |
|
2 |
26 Feb 07 |
jari |
30 |
|
2 |
26 Feb 07 |
jari |
31 |
public class SOTA extends AbstractAlgorithm{ |
2 |
26 Feb 07 |
jari |
32 |
|
2 |
26 Feb 07 |
jari |
33 |
private AlgorithmData inData; |
2 |
26 Feb 07 |
jari |
34 |
private FloatMatrix dataMatrix; |
2 |
26 Feb 07 |
jari |
35 |
|
2 |
26 Feb 07 |
jari |
36 |
private int numberOfGenes; |
2 |
26 Feb 07 |
jari |
37 |
private int numberOfSamples; |
2 |
26 Feb 07 |
jari |
38 |
public int cycleNum; |
2 |
26 Feb 07 |
jari |
39 |
private int numberOfClusters; |
2 |
26 Feb 07 |
jari |
40 |
private int utilCounter; |
2 |
26 Feb 07 |
jari |
41 |
private float initDivSum; |
2 |
26 Feb 07 |
jari |
42 |
|
2 |
26 Feb 07 |
jari |
43 |
private SOTACell root; |
2 |
26 Feb 07 |
jari |
44 |
private SOTACell head; //tree has treaded leaf nodes, head is head of this list |
2 |
26 Feb 07 |
jari |
45 |
private SOTACell mostDiverseCell; |
2 |
26 Feb 07 |
jari |
46 |
private SOTACell mostVariableCell; |
2 |
26 Feb 07 |
jari |
47 |
public SOTACell [] myNucleus; //assosiates an index (gene) to a particular cell |
2 |
26 Feb 07 |
jari |
48 |
private double treeDiversity; //sum of all gene to centroid distances |
2 |
26 Feb 07 |
jari |
49 |
private Vector cycleDiversity; //maintain a record of tree diversity |
2 |
26 Feb 07 |
jari |
50 |
Cluster clusters; //result clusters data structure |
2 |
26 Feb 07 |
jari |
51 |
|
2 |
26 Feb 07 |
jari |
//Sota parameters |
2 |
26 Feb 07 |
jari |
53 |
boolean sotaGenes = true; |
2 |
26 Feb 07 |
jari |
54 |
int maxNumEpochs; //max epochs per cycle |
2 |
26 Feb 07 |
jari |
55 |
int maxNumCycles; //max number of training cycles, equals a number of clusters |
2 |
26 Feb 07 |
jari |
56 |
float epochCriteria; //error improvement threshold to continue a cycle |
2 |
26 Feb 07 |
jari |
57 |
float endCriteria; //final hard tree diversity |
2 |
26 Feb 07 |
jari |
58 |
float migW; //migration weights for (w)Winning cell, (p)parental cell, (s)sister cell |
2 |
26 Feb 07 |
jari |
59 |
float migP; |
2 |
26 Feb 07 |
jari |
60 |
float migS; |
2 |
26 Feb 07 |
jari |
61 |
int neighborhoodLevel; //number of leves to ascendt to select assignment subtree |
2 |
26 Feb 07 |
jari |
62 |
boolean useClusterVariance; //use max gene to gene distance as cell division criteria |
2 |
26 Feb 07 |
jari |
63 |
float pValue; |
2 |
26 Feb 07 |
jari |
64 |
boolean runToMaxCycles; //unrestricted growth |
2 |
26 Feb 07 |
jari |
65 |
boolean stop = false; |
2 |
26 Feb 07 |
jari |
66 |
int function; //distance functin |
2 |
26 Feb 07 |
jari |
67 |
float factor; //factor, based on distance and scaling for a particular algorithm, normally 1.0 |
2 |
26 Feb 07 |
jari |
68 |
float myFactor; //keeps track of factor to apply to distance |
2 |
26 Feb 07 |
jari |
69 |
boolean absolute; //absolute value of distance? |
2 |
26 Feb 07 |
jari |
70 |
boolean calcClusterHCL; |
2 |
26 Feb 07 |
jari |
71 |
boolean calcFullTreeHCL; |
2 |
26 Feb 07 |
jari |
72 |
boolean calculate_genes; |
2 |
26 Feb 07 |
jari |
73 |
boolean calculate_experiments; |
2 |
26 Feb 07 |
jari |
74 |
int method; //HCL linkage method |
2 |
26 Feb 07 |
jari |
75 |
|
2 |
26 Feb 07 |
jari |
76 |
private int hcl_function; |
2 |
26 Feb 07 |
jari |
77 |
private boolean hcl_absolute; |
2 |
26 Feb 07 |
jari |
78 |
|
2 |
26 Feb 07 |
jari |
79 |
/** |
2 |
26 Feb 07 |
jari |
* Constructs a new SOTA object |
2 |
26 Feb 07 |
jari |
81 |
*/ |
2 |
26 Feb 07 |
jari |
82 |
public SOTA(){ } |
2 |
26 Feb 07 |
jari |
83 |
|
2 |
26 Feb 07 |
jari |
84 |
/** |
2 |
26 Feb 07 |
jari |
* This method should interrupt the calculation. |
2 |
26 Feb 07 |
jari |
86 |
*/ |
2 |
26 Feb 07 |
jari |
87 |
public void abort() { |
2 |
26 Feb 07 |
jari |
88 |
stop = true; |
2 |
26 Feb 07 |
jari |
89 |
} |
2 |
26 Feb 07 |
jari |
90 |
|
2 |
26 Feb 07 |
jari |
91 |
|
2 |
26 Feb 07 |
jari |
92 |
/** |
2 |
26 Feb 07 |
jari |
* Performs SOTA tree construction given parameters provided in <code>AlgorithmData</code>. |
2 |
26 Feb 07 |
jari |
* Results are returned in AlgorthmData |
2 |
26 Feb 07 |
jari |
95 |
*/ |
2 |
26 Feb 07 |
jari |
96 |
public AlgorithmData execute(AlgorithmData data) throws AlgorithmException{ |
2 |
26 Feb 07 |
jari |
97 |
|
2 |
26 Feb 07 |
jari |
//Get parameters |
2 |
26 Feb 07 |
jari |
99 |
AlgorithmParameters params = data.getParams(); |
2 |
26 Feb 07 |
jari |
100 |
sotaGenes = params.getBoolean("sota-cluster-genes", true); |
2 |
26 Feb 07 |
jari |
101 |
maxNumEpochs = params.getInt("max-epochs-per-cycle", 1000); |
2 |
26 Feb 07 |
jari |
102 |
maxNumCycles = params.getInt("max-number-of-cycles", 10); |
2 |
26 Feb 07 |
jari |
103 |
epochCriteria = params.getFloat("epoch-improvement-cutoff"); |
2 |
26 Feb 07 |
jari |
104 |
endCriteria = params.getFloat("end-training-diversity"); |
2 |
26 Feb 07 |
jari |
105 |
runToMaxCycles = params.getBoolean("run-to-max-cycles"); |
2 |
26 Feb 07 |
jari |
106 |
useClusterVariance = params.getBoolean("use-cluster-variance", false); |
2 |
26 Feb 07 |
jari |
107 |
function = params.getInt("distance-function", EUCLIDEAN); |
2 |
26 Feb 07 |
jari |
108 |
absolute = params.getBoolean("distance-absolute", true); |
2 |
26 Feb 07 |
jari |
109 |
calcClusterHCL = params.getBoolean("calcClusterHCL",false); |
2 |
26 Feb 07 |
jari |
110 |
calculate_genes = params.getBoolean("calculate-genes",false); |
2 |
26 Feb 07 |
jari |
111 |
calculate_experiments = params.getBoolean("calculate-experiments",false); |
2 |
26 Feb 07 |
jari |
112 |
calcFullTreeHCL = params.getBoolean("calcFullTreeHCL", false); |
2 |
26 Feb 07 |
jari |
113 |
method = params.getInt("method-linkage", 0); |
2 |
26 Feb 07 |
jari |
114 |
pValue = params.getFloat("pValue", (float)0.05); |
2 |
26 Feb 07 |
jari |
115 |
migW = params.getFloat("mig_w", (float)0.01); |
2 |
26 Feb 07 |
jari |
116 |
migP = params.getFloat("mig_p", (float)0.005); |
2 |
26 Feb 07 |
jari |
117 |
migS = params.getFloat("mig_s", (float)0.001); |
2 |
26 Feb 07 |
jari |
118 |
neighborhoodLevel = params.getInt("neighborhood-level", 5); |
2 |
26 Feb 07 |
jari |
119 |
|
2 |
26 Feb 07 |
jari |
120 |
hcl_function = params.getInt("hcl-distance-function", EUCLIDEAN); |
2 |
26 Feb 07 |
jari |
121 |
hcl_absolute = params.getBoolean("hcl-distance-absolute", false); |
2 |
26 Feb 07 |
jari |
122 |
|
2 |
26 Feb 07 |
jari |
123 |
inData = data; //keep a handle on AlgorithmData for return |
2 |
26 Feb 07 |
jari |
124 |
|
2 |
26 Feb 07 |
jari |
//Set factor based on function |
2 |
26 Feb 07 |
jari |
126 |
if ((function==PEARSON) || |
2 |
26 Feb 07 |
jari |
127 |
(function==PEARSONUNCENTERED) || |
2 |
26 Feb 07 |
jari |
128 |
(function==PEARSONSQARED) || |
2 |
26 Feb 07 |
jari |
129 |
(function==COSINE) || |
2 |
26 Feb 07 |
jari |
130 |
(function==COVARIANCE) || |
2 |
26 Feb 07 |
jari |
131 |
(function==DOTPRODUCT) || |
2 |
26 Feb 07 |
jari |
132 |
(function==SPEARMANRANK) || |
2 |
26 Feb 07 |
jari |
133 |
(function==KENDALLSTAU)) { |
2 |
26 Feb 07 |
jari |
134 |
myFactor = -1.0f; |
2 |
26 Feb 07 |
jari |
135 |
} else { |
2 |
26 Feb 07 |
jari |
136 |
myFactor = 1.0f; |
2 |
26 Feb 07 |
jari |
137 |
} |
2 |
26 Feb 07 |
jari |
138 |
|
2 |
26 Feb 07 |
jari |
139 |
factor = (float)1.0; //scaling factor sent to getDistance methods |
2 |
26 Feb 07 |
jari |
140 |
inData.addParam("factor", String.valueOf(myFactor)); //return factor |
2 |
26 Feb 07 |
jari |
141 |
endCriteria *= myFactor; //alter polarity fo endCriteria based on metric |
2 |
26 Feb 07 |
jari |
142 |
treeDiversity = Float.POSITIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
143 |
dataMatrix = data.getMatrix("experiment"); //point dataMatrix at supplied matrix |
2 |
26 Feb 07 |
jari |
144 |
numberOfGenes = dataMatrix.getRowDimension(); |
2 |
26 Feb 07 |
jari |
145 |
numberOfSamples = dataMatrix.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
146 |
myNucleus = new SOTACell[numberOfGenes]; //will be shortcut from gene index to a cell |
2 |
26 Feb 07 |
jari |
147 |
cycleDiversity = new Vector(); |
2 |
26 Feb 07 |
jari |
148 |
|
2 |
26 Feb 07 |
jari |
//reset max number of cycles if limited by number of genes |
2 |
26 Feb 07 |
jari |
150 |
if(maxNumCycles >= numberOfGenes) |
2 |
26 Feb 07 |
jari |
151 |
maxNumCycles = numberOfGenes - 1; |
2 |
26 Feb 07 |
jari |
152 |
|
2 |
26 Feb 07 |
jari |
//if using variablility, resample data, select cutoff based on p value supplied |
2 |
26 Feb 07 |
jari |
154 |
if(useClusterVariance){ |
2 |
26 Feb 07 |
jari |
155 |
endCriteria = resampleAndGetNewCutoff(dataMatrix, pValue); |
2 |
26 Feb 07 |
jari |
156 |
} |
2 |
26 Feb 07 |
jari |
157 |
|
2 |
26 Feb 07 |
jari |
//initialize first cell and two children |
2 |
26 Feb 07 |
jari |
159 |
root = new SOTACell(numberOfSamples, dataMatrix); |
2 |
26 Feb 07 |
jari |
160 |
root.right = new SOTACell(numberOfSamples, dataMatrix); |
2 |
26 Feb 07 |
jari |
161 |
root.left = new SOTACell(numberOfSamples, dataMatrix); |
2 |
26 Feb 07 |
jari |
162 |
numberOfClusters = 2; |
2 |
26 Feb 07 |
jari |
163 |
root.left.parent = root; |
2 |
26 Feb 07 |
jari |
164 |
root.right.parent = root; |
2 |
26 Feb 07 |
jari |
165 |
head = root.left; |
2 |
26 Feb 07 |
jari |
166 |
root.left.succ = root.right; |
2 |
26 Feb 07 |
jari |
167 |
root.right.pred = root.left; |
2 |
26 Feb 07 |
jari |
168 |
|
2 |
26 Feb 07 |
jari |
169 |
int [] numberOfValidGenesInSample = new int[numberOfSamples]; |
2 |
26 Feb 07 |
jari |
//set to zero |
2 |
26 Feb 07 |
jari |
171 |
for(int i = 0; i < numberOfSamples ; i++) |
2 |
26 Feb 07 |
jari |
172 |
numberOfValidGenesInSample[i] = 0; |
2 |
26 Feb 07 |
jari |
173 |
|
2 |
26 Feb 07 |
jari |
//Inialize centroid root centroid to zeros |
2 |
26 Feb 07 |
jari |
175 |
for(int i = 0; i < numberOfSamples; i++){ |
2 |
26 Feb 07 |
jari |
176 |
root.centroidGene.set(0,i,0); |
2 |
26 Feb 07 |
jari |
177 |
} |
2 |
26 Feb 07 |
jari |
178 |
|
2 |
26 Feb 07 |
jari |
179 |
for(int i = 0; i < numberOfGenes ; i++){ |
2 |
26 Feb 07 |
jari |
180 |
root.members.add(new Integer(i)); //add all gene indices to root |
2 |
26 Feb 07 |
jari |
181 |
myNucleus[i] = root; //set all gene nuclei to point to root |
2 |
26 Feb 07 |
jari |
182 |
for(int j = 0; j < numberOfSamples; j++){ |
2 |
26 Feb 07 |
jari |
183 |
if( !(Float.isNaN(dataMatrix.get(i,j)))){ |
2 |
26 Feb 07 |
jari |
184 |
numberOfValidGenesInSample[j]++; //count number of genes with valid data in each sample |
2 |
26 Feb 07 |
jari |
185 |
|
2 |
26 Feb 07 |
jari |
186 |
root.centroidGene.set(0,j, root.centroidGene.get(0,j) + dataMatrix.get(i,j)); //calcualtes sum |
2 |
26 Feb 07 |
jari |
187 |
} |
2 |
26 Feb 07 |
jari |
188 |
} |
2 |
26 Feb 07 |
jari |
189 |
|
2 |
26 Feb 07 |
jari |
190 |
} |
2 |
26 Feb 07 |
jari |
191 |
|
2 |
26 Feb 07 |
jari |
192 |
mostDiverseCell = root; |
2 |
26 Feb 07 |
jari |
193 |
mostVariableCell = root; |
2 |
26 Feb 07 |
jari |
194 |
|
2 |
26 Feb 07 |
jari |
195 |
for(int j = 0; j < numberOfSamples; j++){ |
2 |
26 Feb 07 |
jari |
196 |
root.centroidGene.set(0, j, root.centroidGene.get(0,j)/numberOfValidGenesInSample[j]); //get a mean root centroid |
2 |
26 Feb 07 |
jari |
197 |
root.left.centroidGene.set(0,j, root.centroidGene.get(0,j)); //assign to children |
2 |
26 Feb 07 |
jari |
198 |
root.right.centroidGene.set(0,j, root.centroidGene.get(0,j)); |
2 |
26 Feb 07 |
jari |
199 |
} |
2 |
26 Feb 07 |
jari |
200 |
|
2 |
26 Feb 07 |
jari |
//put first value into diversity vector |
2 |
26 Feb 07 |
jari |
202 |
initDivSum = getNodeDiversitySum(root); |
2 |
26 Feb 07 |
jari |
203 |
cycleDiversity.add( new Float( initDivSum )); |
2 |
26 Feb 07 |
jari |
204 |
root.cellDiversity = initDivSum/numberOfGenes; |
2 |
26 Feb 07 |
jari |
205 |
if(useClusterVariance) |
2 |
26 Feb 07 |
jari |
206 |
root.cellVariance = getNodeVariance(root); |
2 |
26 Feb 07 |
jari |
207 |
|
2 |
26 Feb 07 |
jari |
208 |
if(runToMaxCycles) |
2 |
26 Feb 07 |
jari |
209 |
growSOTUnrestricted(); //make tree w/o regard to diversity |
2 |
26 Feb 07 |
jari |
210 |
else |
2 |
26 Feb 07 |
jari |
211 |
growSOT(); // Construct tree |
2 |
26 Feb 07 |
jari |
212 |
|
2 |
26 Feb 07 |
jari |
//If performing HCL on samples using all genes |
2 |
26 Feb 07 |
jari |
214 |
if(calcFullTreeHCL){ |
2 |
26 Feb 07 |
jari |
215 |
calcFullTreeHCL(); |
2 |
26 Feb 07 |
jari |
216 |
} |
2 |
26 Feb 07 |
jari |
217 |
|
2 |
26 Feb 07 |
jari |
//Code for HCL clustering |
2 |
26 Feb 07 |
jari |
219 |
if(calcClusterHCL){ |
2 |
26 Feb 07 |
jari |
220 |
calculateClusterHCL(); //calculate HCL trees for SOTA clusters |
2 |
26 Feb 07 |
jari |
221 |
} |
2 |
26 Feb 07 |
jari |
222 |
|
2 |
26 Feb 07 |
jari |
223 |
return inData; //inData has results incorporated |
2 |
26 Feb 07 |
jari |
224 |
} |
2 |
26 Feb 07 |
jari |
225 |
|
2 |
26 Feb 07 |
jari |
226 |
/** |
2 |
26 Feb 07 |
jari |
* Deterimins variablity cutoff given a p value |
2 |
26 Feb 07 |
jari |
228 |
*/ |
2 |
26 Feb 07 |
jari |
229 |
private float resampleAndGetNewCutoff(FloatMatrix origMatrix, float p){ |
2 |
26 Feb 07 |
jari |
230 |
FloatMatrix randMatrix = randomizeMatrix(origMatrix); |
2 |
26 Feb 07 |
jari |
231 |
|
2 |
26 Feb 07 |
jari |
232 |
int rows = origMatrix.getRowDimension(); |
2 |
26 Feb 07 |
jari |
233 |
int cols = origMatrix.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
234 |
int NUM_BINS = 500; |
2 |
26 Feb 07 |
jari |
235 |
|
2 |
26 Feb 07 |
jari |
236 |
int numSamplePoints = 0; |
2 |
26 Feb 07 |
jari |
237 |
int cumCutoff; |
2 |
26 Feb 07 |
jari |
238 |
|
2 |
26 Feb 07 |
jari |
239 |
float [][] distances = new float[rows][rows]; |
2 |
26 Feb 07 |
jari |
240 |
|
2 |
26 Feb 07 |
jari |
241 |
for(int i = 0; i < rows - 1; i++){ |
2 |
26 Feb 07 |
jari |
242 |
for(int j = 0; j < i; j++){ |
2 |
26 Feb 07 |
jari |
243 |
distances[i][j] = ExperimentUtil.geneDistance(randMatrix, null, i, j, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
244 |
} |
2 |
26 Feb 07 |
jari |
245 |
} |
2 |
26 Feb 07 |
jari |
246 |
|
2 |
26 Feb 07 |
jari |
//now have all gene to gene distances |
2 |
26 Feb 07 |
jari |
248 |
float [] minAndMax = getMinAndMax(distances, rows); |
2 |
26 Feb 07 |
jari |
249 |
float min = minAndMax[0]; |
2 |
26 Feb 07 |
jari |
250 |
float max = minAndMax[1]; |
2 |
26 Feb 07 |
jari |
251 |
|
2 |
26 Feb 07 |
jari |
252 |
float [] cumDist = new float[NUM_BINS]; |
2 |
26 Feb 07 |
jari |
253 |
|
2 |
26 Feb 07 |
jari |
254 |
for(int i = 0; i < NUM_BINS; i++){ |
2 |
26 Feb 07 |
jari |
255 |
cumDist[i] = 0; |
2 |
26 Feb 07 |
jari |
256 |
} |
2 |
26 Feb 07 |
jari |
257 |
|
2 |
26 Feb 07 |
jari |
258 |
for(int i = 0; i < rows - 1; i++){ |
2 |
26 Feb 07 |
jari |
259 |
for(int j = 0; j < i; j++){ |
2 |
26 Feb 07 |
jari |
260 |
cumDist[ (int)( (float)(NUM_BINS-1) * (distances[i][j]-min)/(max - min)) ]++; |
2 |
26 Feb 07 |
jari |
261 |
numSamplePoints++; |
2 |
26 Feb 07 |
jari |
262 |
} |
2 |
26 Feb 07 |
jari |
263 |
} |
2 |
26 Feb 07 |
jari |
264 |
|
2 |
26 Feb 07 |
jari |
265 |
for(int i = 0; i < NUM_BINS; i++){ |
2 |
26 Feb 07 |
jari |
266 |
cumDist[i] /= (float)numSamplePoints; |
2 |
26 Feb 07 |
jari |
267 |
} |
2 |
26 Feb 07 |
jari |
268 |
|
2 |
26 Feb 07 |
jari |
//now find the bin that has |
2 |
26 Feb 07 |
jari |
270 |
float cumCount = 0; |
2 |
26 Feb 07 |
jari |
271 |
int bin = 0; |
2 |
26 Feb 07 |
jari |
//cumCutoff = (int)(numSamplePoints * p); |
2 |
26 Feb 07 |
jari |
273 |
|
2 |
26 Feb 07 |
jari |
274 |
while(cumCount < p){ |
2 |
26 Feb 07 |
jari |
275 |
cumCount += cumDist[bin]; |
2 |
26 Feb 07 |
jari |
276 |
bin++; |
2 |
26 Feb 07 |
jari |
277 |
} |
2 |
26 Feb 07 |
jari |
278 |
|
2 |
26 Feb 07 |
jari |
279 |
return (((float)bin - (float)0.5)/(float)NUM_BINS ) * (max - min) + min; |
2 |
26 Feb 07 |
jari |
280 |
} |
2 |
26 Feb 07 |
jari |
281 |
|
2 |
26 Feb 07 |
jari |
282 |
/** |
2 |
26 Feb 07 |
jari |
* Utility to get min and max from a triangular matrix |
2 |
26 Feb 07 |
jari |
284 |
*/ |
2 |
26 Feb 07 |
jari |
285 |
private float [] getMinAndMax(float [][] A, int dim){ |
2 |
26 Feb 07 |
jari |
286 |
|
2 |
26 Feb 07 |
jari |
287 |
float [] minAndMax = new float[2]; |
2 |
26 Feb 07 |
jari |
288 |
float currDist; |
2 |
26 Feb 07 |
jari |
289 |
float maxDist = Float.NEGATIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
290 |
float minDist = Float.POSITIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
291 |
|
2 |
26 Feb 07 |
jari |
292 |
for(int i = 0; i < dim - 1; i++){ |
2 |
26 Feb 07 |
jari |
293 |
for(int j = 0; j < i; j++){ |
2 |
26 Feb 07 |
jari |
294 |
currDist = A[i][j]; |
2 |
26 Feb 07 |
jari |
295 |
if(currDist > maxDist) |
2 |
26 Feb 07 |
jari |
296 |
maxDist = currDist; |
2 |
26 Feb 07 |
jari |
297 |
if(currDist < minDist) |
2 |
26 Feb 07 |
jari |
298 |
minDist = currDist; |
2 |
26 Feb 07 |
jari |
299 |
} |
2 |
26 Feb 07 |
jari |
300 |
} |
2 |
26 Feb 07 |
jari |
301 |
minAndMax[0] = minDist; |
2 |
26 Feb 07 |
jari |
302 |
minAndMax[1] = maxDist; |
2 |
26 Feb 07 |
jari |
303 |
return minAndMax; |
2 |
26 Feb 07 |
jari |
304 |
|
2 |
26 Feb 07 |
jari |
305 |
} |
2 |
26 Feb 07 |
jari |
306 |
|
2 |
26 Feb 07 |
jari |
307 |
|
2 |
26 Feb 07 |
jari |
308 |
/** |
2 |
26 Feb 07 |
jari |
* Returns a randomized triangular matrix with genes randomized wrt vector element order |
2 |
26 Feb 07 |
jari |
310 |
*/ |
2 |
26 Feb 07 |
jari |
311 |
private FloatMatrix randomizeMatrix(FloatMatrix origMatrix){ |
2 |
26 Feb 07 |
jari |
312 |
FloatMatrix shuffleMatrix = origMatrix.copy(); |
2 |
26 Feb 07 |
jari |
313 |
int rows = shuffleMatrix.getRowDimension(); |
2 |
26 Feb 07 |
jari |
314 |
int cols = shuffleMatrix.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
315 |
float temp; |
2 |
26 Feb 07 |
jari |
316 |
int swapIndex; |
2 |
26 Feb 07 |
jari |
317 |
int sampleRange = cols - 1; |
2 |
26 Feb 07 |
jari |
318 |
|
2 |
26 Feb 07 |
jari |
319 |
for(int i = 0; i < rows; i++){ |
2 |
26 Feb 07 |
jari |
320 |
for(int j = cols-1; j > 0; j--){ |
2 |
26 Feb 07 |
jari |
321 |
|
2 |
26 Feb 07 |
jari |
322 |
swapIndex = (int)(Math.random()*(j)); |
2 |
26 Feb 07 |
jari |
323 |
temp = shuffleMatrix.get(i,j); |
2 |
26 Feb 07 |
jari |
324 |
shuffleMatrix.set(i,j, shuffleMatrix.get(i, swapIndex)); |
2 |
26 Feb 07 |
jari |
325 |
shuffleMatrix.set(i,swapIndex, temp); |
2 |
26 Feb 07 |
jari |
326 |
sampleRange--; |
2 |
26 Feb 07 |
jari |
327 |
} |
2 |
26 Feb 07 |
jari |
328 |
} |
2 |
26 Feb 07 |
jari |
329 |
return shuffleMatrix; |
2 |
26 Feb 07 |
jari |
330 |
} |
2 |
26 Feb 07 |
jari |
331 |
|
2 |
26 Feb 07 |
jari |
332 |
|
2 |
26 Feb 07 |
jari |
333 |
/** |
2 |
26 Feb 07 |
jari |
* Calculates HCL tree over all genes |
2 |
26 Feb 07 |
jari |
335 |
*/ |
2 |
26 Feb 07 |
jari |
336 |
private void calcFullTreeHCL() throws AlgorithmException{ |
2 |
26 Feb 07 |
jari |
337 |
|
2 |
26 Feb 07 |
jari |
338 |
AlgorithmEvent event = null; |
2 |
26 Feb 07 |
jari |
339 |
event = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, 1, "Calculate Hierarchical Tree, Clustering Experiments"); |
2 |
26 Feb 07 |
jari |
340 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
341 |
event.setIntValue(0); |
2 |
26 Feb 07 |
jari |
342 |
event.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
343 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
344 |
|
2 |
26 Feb 07 |
jari |
345 |
Cluster result_cluster = new Cluster(); |
2 |
26 Feb 07 |
jari |
346 |
NodeList nodeList = result_cluster.getNodeList(); |
2 |
26 Feb 07 |
jari |
347 |
|
2 |
26 Feb 07 |
jari |
348 |
int[] probesIndexes = new int[numberOfGenes]; |
2 |
26 Feb 07 |
jari |
349 |
for(int i = 0; i < numberOfGenes; i++){ |
2 |
26 Feb 07 |
jari |
350 |
probesIndexes[i]= i; |
2 |
26 Feb 07 |
jari |
351 |
} |
2 |
26 Feb 07 |
jari |
352 |
|
2 |
26 Feb 07 |
jari |
353 |
int[] featuresIndexes = new int[numberOfSamples]; |
2 |
26 Feb 07 |
jari |
354 |
for(int i = 0; i < numberOfSamples; i++){ |
2 |
26 Feb 07 |
jari |
355 |
featuresIndexes[i] = i; |
2 |
26 Feb 07 |
jari |
356 |
} |
2 |
26 Feb 07 |
jari |
357 |
|
2 |
26 Feb 07 |
jari |
358 |
if (stop) { |
2 |
26 Feb 07 |
jari |
359 |
throw new AbortException(); |
2 |
26 Feb 07 |
jari |
360 |
} |
2 |
26 Feb 07 |
jari |
361 |
|
2 |
26 Feb 07 |
jari |
362 |
Node node = new Node(featuresIndexes); |
2 |
26 Feb 07 |
jari |
363 |
nodeList.addNode(node); |
2 |
26 Feb 07 |
jari |
364 |
|
2 |
26 Feb 07 |
jari |
365 |
node.setValues(calculateHierarchicalTree(probesIndexes, method, false, true)); |
2 |
26 Feb 07 |
jari |
366 |
event.setIntValue(1); |
2 |
26 Feb 07 |
jari |
367 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
368 |
|
2 |
26 Feb 07 |
jari |
369 |
if(result_cluster != null) |
2 |
26 Feb 07 |
jari |
370 |
inData.addCluster("full-tree-sample-HCL", result_cluster); |
2 |
26 Feb 07 |
jari |
371 |
|
2 |
26 Feb 07 |
jari |
372 |
} |
2 |
26 Feb 07 |
jari |
373 |
|
2 |
26 Feb 07 |
jari |
374 |
/** |
2 |
26 Feb 07 |
jari |
* calculates HCL on SOTA cells (clusters) |
2 |
26 Feb 07 |
jari |
376 |
*/ |
2 |
26 Feb 07 |
jari |
377 |
private void calculateClusterHCL() throws AlgorithmException{ |
2 |
26 Feb 07 |
jari |
378 |
|
2 |
26 Feb 07 |
jari |
379 |
AlgorithmEvent event = null; |
2 |
26 Feb 07 |
jari |
380 |
|
2 |
26 Feb 07 |
jari |
381 |
event = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, numberOfClusters, "Calculate Hierarchical Trees SOTA Cluster Members"); |
2 |
26 Feb 07 |
jari |
382 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
383 |
event.setIntValue(0); |
2 |
26 Feb 07 |
jari |
384 |
event.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
385 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
386 |
|
2 |
26 Feb 07 |
jari |
387 |
Cluster result_cluster = new Cluster(); |
2 |
26 Feb 07 |
jari |
388 |
NodeList nodeList = result_cluster.getNodeList(); |
2 |
26 Feb 07 |
jari |
389 |
|
2 |
26 Feb 07 |
jari |
//clusters are formed after growSOT |
2 |
26 Feb 07 |
jari |
391 |
|
2 |
26 Feb 07 |
jari |
392 |
NodeList resultList = clusters.getNodeList(); |
2 |
26 Feb 07 |
jari |
393 |
Node currNode; |
2 |
26 Feb 07 |
jari |
394 |
|
2 |
26 Feb 07 |
jari |
395 |
int[] probeIndexes; |
2 |
26 Feb 07 |
jari |
396 |
|
2 |
26 Feb 07 |
jari |
397 |
for (int i=0; i<numberOfClusters; i++) { |
2 |
26 Feb 07 |
jari |
398 |
if (stop) { |
2 |
26 Feb 07 |
jari |
399 |
throw new AbortException(); |
2 |
26 Feb 07 |
jari |
400 |
} |
2 |
26 Feb 07 |
jari |
401 |
currNode = resultList.getNode(i); |
2 |
26 Feb 07 |
jari |
402 |
probeIndexes = currNode.getProbesIndexes(); |
2 |
26 Feb 07 |
jari |
403 |
|
2 |
26 Feb 07 |
jari |
404 |
Node node = new Node(probeIndexes); |
2 |
26 Feb 07 |
jari |
405 |
nodeList.addNode(node); |
2 |
26 Feb 07 |
jari |
406 |
|
2 |
26 Feb 07 |
jari |
407 |
node.setValues(calculateHierarchicalTree(probeIndexes, method, calculate_genes, calculate_experiments)); |
2 |
26 Feb 07 |
jari |
408 |
event.setIntValue(i+1); |
2 |
26 Feb 07 |
jari |
409 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
410 |
} |
2 |
26 Feb 07 |
jari |
411 |
|
2 |
26 Feb 07 |
jari |
412 |
if(result_cluster != null) |
2 |
26 Feb 07 |
jari |
413 |
inData.addCluster("hcl-result-clusters", result_cluster); |
2 |
26 Feb 07 |
jari |
414 |
} |
2 |
26 Feb 07 |
jari |
415 |
|
2 |
26 Feb 07 |
jari |
416 |
|
2 |
26 Feb 07 |
jari |
417 |
|
2 |
26 Feb 07 |
jari |
418 |
private NodeValueList calculateHierarchicalTree(int[] features, int method, boolean genes, boolean experiments) throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
419 |
NodeValueList nodeList = new NodeValueList(); |
2 |
26 Feb 07 |
jari |
420 |
AlgorithmData data = new AlgorithmData(); |
2 |
26 Feb 07 |
jari |
421 |
FloatMatrix experiment; |
2 |
26 Feb 07 |
jari |
422 |
if(sotaGenes) |
2 |
26 Feb 07 |
jari |
423 |
experiment = getSubExperiment(this.dataMatrix, features); |
2 |
26 Feb 07 |
jari |
424 |
else |
2 |
26 Feb 07 |
jari |
425 |
experiment = getSubExperimentReducedCols(this.dataMatrix, features); |
2 |
26 Feb 07 |
jari |
426 |
|
2 |
26 Feb 07 |
jari |
427 |
data.addMatrix("experiment", experiment); |
2 |
26 Feb 07 |
jari |
428 |
data.addParam("hcl-distance-function", String.valueOf(this.hcl_function)); |
2 |
26 Feb 07 |
jari |
429 |
data.addParam("hcl-distance-absolute", String.valueOf(this.hcl_absolute)); |
2 |
26 Feb 07 |
jari |
430 |
data.addParam("method-linkage", String.valueOf(method)); |
2 |
26 Feb 07 |
jari |
431 |
HCL hcl = new HCL(); |
2 |
26 Feb 07 |
jari |
432 |
AlgorithmData result; |
2 |
26 Feb 07 |
jari |
433 |
if (genes) { |
2 |
26 Feb 07 |
jari |
434 |
data.addParam("calculate-genes", String.valueOf(true)); |
2 |
26 Feb 07 |
jari |
435 |
result = hcl.execute(data); |
2 |
26 Feb 07 |
jari |
436 |
validate(result); |
2 |
26 Feb 07 |
jari |
437 |
addNodeValues(nodeList, result); |
2 |
26 Feb 07 |
jari |
438 |
} |
2 |
26 Feb 07 |
jari |
439 |
if (experiments) { |
2 |
26 Feb 07 |
jari |
440 |
data.addParam("calculate-genes", String.valueOf(false)); |
2 |
26 Feb 07 |
jari |
441 |
result = hcl.execute(data); |
2 |
26 Feb 07 |
jari |
442 |
validate(result); |
2 |
26 Feb 07 |
jari |
443 |
addNodeValues(nodeList, result); |
2 |
26 Feb 07 |
jari |
444 |
} |
2 |
26 Feb 07 |
jari |
445 |
return nodeList; |
2 |
26 Feb 07 |
jari |
446 |
} |
2 |
26 Feb 07 |
jari |
447 |
|
2 |
26 Feb 07 |
jari |
448 |
|
2 |
26 Feb 07 |
jari |
449 |
|
2 |
26 Feb 07 |
jari |
450 |
private FloatMatrix getSubExperiment(FloatMatrix experiment, int[] features) { |
2 |
26 Feb 07 |
jari |
451 |
FloatMatrix subExperiment = new FloatMatrix(features.length, experiment.getColumnDimension()); |
2 |
26 Feb 07 |
jari |
452 |
for (int i=0; i<features.length; i++) { |
2 |
26 Feb 07 |
jari |
453 |
subExperiment.A[i] = experiment.A[features[i]]; |
2 |
26 Feb 07 |
jari |
454 |
} |
2 |
26 Feb 07 |
jari |
455 |
return subExperiment; |
2 |
26 Feb 07 |
jari |
456 |
} |
2 |
26 Feb 07 |
jari |
457 |
|
2 |
26 Feb 07 |
jari |
458 |
/** |
2 |
26 Feb 07 |
jari |
* Creates a matrix with reduced columns (samples) as during experiment clustering |
2 |
26 Feb 07 |
jari |
460 |
*/ |
2 |
26 Feb 07 |
jari |
461 |
private FloatMatrix getSubExperimentReducedCols(FloatMatrix experiment, int[] features) { |
2 |
26 Feb 07 |
jari |
462 |
FloatMatrix copyMatrix = experiment.copy(); |
2 |
26 Feb 07 |
jari |
463 |
FloatMatrix subExperiment = new FloatMatrix(features.length, copyMatrix.getColumnDimension()); |
2 |
26 Feb 07 |
jari |
464 |
for (int i=0; i<features.length; i++) { |
2 |
26 Feb 07 |
jari |
465 |
subExperiment.A[i] = copyMatrix.A[features[i]]; |
2 |
26 Feb 07 |
jari |
466 |
} |
2 |
26 Feb 07 |
jari |
467 |
subExperiment = subExperiment.transpose(); |
2 |
26 Feb 07 |
jari |
468 |
return subExperiment; |
2 |
26 Feb 07 |
jari |
469 |
} |
2 |
26 Feb 07 |
jari |
470 |
|
2 |
26 Feb 07 |
jari |
471 |
private void addNodeValues(NodeValueList target_list, AlgorithmData source_result) { |
2 |
26 Feb 07 |
jari |
472 |
target_list.addNodeValue(new NodeValue("child-1-array", source_result.getIntArray("child-1-array"))); |
2 |
26 Feb 07 |
jari |
473 |
target_list.addNodeValue(new NodeValue("child-2-array", source_result.getIntArray("child-2-array"))); |
2 |
26 Feb 07 |
jari |
474 |
target_list.addNodeValue(new NodeValue("node-order", source_result.getIntArray("node-order"))); |
2 |
26 Feb 07 |
jari |
475 |
target_list.addNodeValue(new NodeValue("height", source_result.getMatrix("height").getRowPackedCopy())); |
2 |
26 Feb 07 |
jari |
476 |
} |
2 |
26 Feb 07 |
jari |
477 |
|
2 |
26 Feb 07 |
jari |
478 |
/** |
2 |
26 Feb 07 |
jari |
* Returns the Tree root <code>SOTACell</code> |
2 |
26 Feb 07 |
jari |
480 |
*/ |
2 |
26 Feb 07 |
jari |
481 |
public SOTACell getRoot(){ |
2 |
26 Feb 07 |
jari |
482 |
return root; |
2 |
26 Feb 07 |
jari |
483 |
} |
2 |
26 Feb 07 |
jari |
484 |
|
2 |
26 Feb 07 |
jari |
485 |
/** |
2 |
26 Feb 07 |
jari |
* returns the number of cells (leaf nodes) in a subtree (given root <code>SOTACell</code>) |
2 |
26 Feb 07 |
jari |
487 |
*/ |
2 |
26 Feb 07 |
jari |
488 |
public int getPopulation(SOTACell subRoot){ |
2 |
26 Feb 07 |
jari |
489 |
utilCounter = 0; |
2 |
26 Feb 07 |
jari |
490 |
getPop(subRoot); |
2 |
26 Feb 07 |
jari |
491 |
return utilCounter; |
2 |
26 Feb 07 |
jari |
492 |
} |
2 |
26 Feb 07 |
jari |
493 |
|
2 |
26 Feb 07 |
jari |
//gets number of clusters below a node |
2 |
26 Feb 07 |
jari |
495 |
private void getPop(SOTACell curr){ |
2 |
26 Feb 07 |
jari |
496 |
|
2 |
26 Feb 07 |
jari |
497 |
if(curr != null){ |
2 |
26 Feb 07 |
jari |
498 |
if(curr.left == null && curr.right == null) |
2 |
26 Feb 07 |
jari |
499 |
utilCounter++; |
2 |
26 Feb 07 |
jari |
500 |
|
2 |
26 Feb 07 |
jari |
501 |
getPop(curr.left); |
2 |
26 Feb 07 |
jari |
502 |
getPop(curr.right); |
2 |
26 Feb 07 |
jari |
503 |
} |
2 |
26 Feb 07 |
jari |
504 |
} |
2 |
26 Feb 07 |
jari |
505 |
|
2 |
26 Feb 07 |
jari |
506 |
/** |
2 |
26 Feb 07 |
jari |
* grows SOT |
2 |
26 Feb 07 |
jari |
508 |
*/ |
2 |
26 Feb 07 |
jari |
509 |
private void growSOT(){ |
2 |
26 Feb 07 |
jari |
510 |
|
2 |
26 Feb 07 |
jari |
511 |
AlgorithmEvent event = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, maxNumCycles); |
2 |
26 Feb 07 |
jari |
512 |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
513 |
|
2 |
26 Feb 07 |
jari |
514 |
|
2 |
26 Feb 07 |
jari |
515 |
AlgorithmEvent event1 = new AlgorithmEvent(this, AlgorithmEvent.SET_UNITS, maxNumCycles, "Growing Tree"); |
2 |
26 Feb 07 |
jari |
516 |
fireValueChanged(event1); |
2 |
26 Feb 07 |
jari |
517 |
event1.setIntValue(0); |
2 |
26 Feb 07 |
jari |
518 |
event1.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
519 |
fireValueChanged(event1); |
2 |
26 Feb 07 |
jari |
520 |
|
2 |
26 Feb 07 |
jari |
521 |
|
2 |
26 Feb 07 |
jari |
522 |
float currDivFraction; |
2 |
26 Feb 07 |
jari |
523 |
boolean stopGrowing = false; |
2 |
26 Feb 07 |
jari |
524 |
float lowerLim; |
2 |
26 Feb 07 |
jari |
525 |
|
2 |
26 Feb 07 |
jari |
526 |
if(!useClusterVariance && mostDiverseCell.cellDiversity < endCriteria) |
2 |
26 Feb 07 |
jari |
527 |
stopGrowing = true; |
2 |
26 Feb 07 |
jari |
528 |
else if( useClusterVariance && mostVariableCell.cellVariance < endCriteria) |
2 |
26 Feb 07 |
jari |
529 |
stopGrowing = true; |
2 |
26 Feb 07 |
jari |
530 |
|
2 |
26 Feb 07 |
jari |
531 |
for( cycleNum = 0; cycleNum < maxNumCycles && !stopGrowing ; cycleNum++){ |
2 |
26 Feb 07 |
jari |
532 |
|
2 |
26 Feb 07 |
jari |
533 |
runCycle(maxNumEpochs, epochCriteria); |
2 |
26 Feb 07 |
jari |
534 |
|
2 |
26 Feb 07 |
jari |
//training has optimized the sub-system centroids, trained node's profiles have been distributed |
2 |
26 Feb 07 |
jari |
//among potentially any cluster (closest chosen) |
2 |
26 Feb 07 |
jari |
537 |
|
2 |
26 Feb 07 |
jari |
//now with all profiles assigned to cells, get the cell's and tree's diversity (Resource) |
2 |
26 Feb 07 |
jari |
539 |
setDiversities(); |
2 |
26 Feb 07 |
jari |
540 |
|
2 |
26 Feb 07 |
jari |
541 |
cycleDiversity.add(new Float(treeDiversity)); |
2 |
26 Feb 07 |
jari |
542 |
|
2 |
26 Feb 07 |
jari |
543 |
if(!useClusterVariance){ |
2 |
26 Feb 07 |
jari |
544 |
if(mostDiverseCell.cellDiversity > endCriteria && cycleNum < maxNumCycles-1) //don't split if last cycle in maxNumCycles |
2 |
26 Feb 07 |
jari |
545 |
divideCell(mostDiverseCell); |
2 |
26 Feb 07 |
jari |
546 |
else |
2 |
26 Feb 07 |
jari |
547 |
stopGrowing = true; |
2 |
26 Feb 07 |
jari |
548 |
} |
2 |
26 Feb 07 |
jari |
549 |
|
2 |
26 Feb 07 |
jari |
550 |
else{ //using variabliity |
2 |
26 Feb 07 |
jari |
551 |
if(mostVariableCell.cellVariance > endCriteria && cycleNum < maxNumCycles-1) //don't split if last cycle in maxNumCycles |
2 |
26 Feb 07 |
jari |
552 |
divideCell(mostVariableCell); |
2 |
26 Feb 07 |
jari |
553 |
else |
2 |
26 Feb 07 |
jari |
554 |
stopGrowing = true; |
2 |
26 Feb 07 |
jari |
555 |
} |
2 |
26 Feb 07 |
jari |
556 |
|
2 |
26 Feb 07 |
jari |
//advance monitor |
2 |
26 Feb 07 |
jari |
558 |
event1.setId(AlgorithmEvent.PROGRESS_VALUE); |
2 |
26 Feb 07 |
jari |
559 |
event1.setIntValue(cycleNum+1); |
2 |
26 Feb 07 |
jari |
560 |
fireValueChanged(event1); |
2 |
26 Feb 07 |
jari |
561 |
|
2 |
26 Feb 07 |
jari |
562 |
|
2 |
26 Feb 07 |
jari |
//calculate and set monitor values |
2 |
26 Feb 07 |
jari |
//event.setId(AlgorithmEvent.MONITOR_VALUE); |
2 |
26 Feb 07 |
jari |
/* DISABLED OPTIONAL DIVERSITY MONITOR |
2 |
26 Feb 07 |
jari |
if(myFactor == 1) |
2 |
26 Feb 07 |
jari |
currDivFraction = (float)(treeDiversity/initDivSum); |
2 |
26 Feb 07 |
jari |
else{ |
2 |
26 Feb 07 |
jari |
lowerLim = numberOfGenes * myFactor; |
2 |
26 Feb 07 |
jari |
currDivFraction = (float)((treeDiversity + Math.abs(lowerLim))/(initDivSum+Math.abs(lowerLim))); |
2 |
26 Feb 07 |
jari |
571 |
} |
2 |
26 Feb 07 |
jari |
if(cycleNum < 245){ //monitor values limit is 245 |
2 |
26 Feb 07 |
jari |
event.setIntValue((int)(100*currDivFraction)); |
2 |
26 Feb 07 |
jari |
fireValueChanged(event); |
2 |
26 Feb 07 |
jari |
575 |
} |
2 |
26 Feb 07 |
jari |
576 |
*/ |
2 |
26 Feb 07 |
jari |
577 |
} |
2 |
26 Feb 07 |
jari |
578 |
trainLeaves(maxNumEpochs, epochCriteria); //reached stopping criteria, now just need to train leaves so centroids migrate to center |
2 |
26 Feb 07 |
jari |
579 |
getResults(); //add results to AlgorithmData (inData) |
2 |
26 Feb 07 |
jari |
580 |
return; |
2 |
26 Feb 07 |
jari |
581 |
} |
2 |
26 Feb 07 |
jari |
582 |
|
2 |
26 Feb 07 |
jari |
583 |
|
2 |
26 Feb 07 |
jari |
584 |
|
2 |
26 Feb 07 |
jari |
/** runs sota until maxCycle - 1 */ |
2 |
26 Feb 07 |
jari |
586 |
private void growSOTUnrestricted(){ |
2 |
26 Feb 07 |
jari |
587 |
|
2 |
26 Feb 07 |
jari |
588 |
boolean stopGrowing = false; |
2 |
26 Feb 07 |
jari |
589 |
|
2 |
26 Feb 07 |
jari |
590 |
for( cycleNum = 0; cycleNum < maxNumCycles && !stopGrowing ; cycleNum++){ |
2 |
26 Feb 07 |
jari |
591 |
|
2 |
26 Feb 07 |
jari |
592 |
runCycle(maxNumEpochs, epochCriteria); |
2 |
26 Feb 07 |
jari |
593 |
|
2 |
26 Feb 07 |
jari |
//training has optimized the sub-system centroids, trained node's profiles have been distributed |
2 |
26 Feb 07 |
jari |
//among potentially any cluster (closest chosen) |
2 |
26 Feb 07 |
jari |
596 |
|
2 |
26 Feb 07 |
jari |
//now with all profiles assigned to cells, get the cell's and tree's diversity (Resource) |
2 |
26 Feb 07 |
jari |
598 |
setDiversities(); |
2 |
26 Feb 07 |
jari |
599 |
|
2 |
26 Feb 07 |
jari |
600 |
cycleDiversity.add(new Float(treeDiversity)); |
2 |
26 Feb 07 |
jari |
601 |
|
2 |
26 Feb 07 |
jari |
602 |
if(cycleNum < maxNumCycles-1) //don't split if last cycle in maxNumCycles |
2 |
26 Feb 07 |
jari |
603 |
divideCell(mostDiverseCell); |
2 |
26 Feb 07 |
jari |
604 |
else |
2 |
26 Feb 07 |
jari |
605 |
stopGrowing = true; |
2 |
26 Feb 07 |
jari |
606 |
} |
2 |
26 Feb 07 |
jari |
//reached stopping criteria, now just need to train leaves so centroids migrate to center |
2 |
26 Feb 07 |
jari |
608 |
trainLeaves(maxNumEpochs, epochCriteria); |
2 |
26 Feb 07 |
jari |
//add results to AlgorithmData (inData) |
2 |
26 Feb 07 |
jari |
610 |
getResults(); |
2 |
26 Feb 07 |
jari |
611 |
|
2 |
26 Feb 07 |
jari |
612 |
return; |
2 |
26 Feb 07 |
jari |
613 |
|
2 |
26 Feb 07 |
jari |
614 |
} |
2 |
26 Feb 07 |
jari |
615 |
|
2 |
26 Feb 07 |
jari |
616 |
/** |
2 |
26 Feb 07 |
jari |
* Returns the nubmer of samples in the tree |
2 |
26 Feb 07 |
jari |
618 |
*/ |
2 |
26 Feb 07 |
jari |
619 |
public int getNumberOfSamples(){ return numberOfSamples; } |
2 |
26 Feb 07 |
jari |
620 |
|
2 |
26 Feb 07 |
jari |
621 |
/** |
2 |
26 Feb 07 |
jari |
* Returns the nubmer of leaf nodes (aka cells or clusters) |
2 |
26 Feb 07 |
jari |
623 |
*/ |
2 |
26 Feb 07 |
jari |
624 |
public int getNumberOfClusters(){ |
2 |
26 Feb 07 |
jari |
625 |
return numberOfClusters; |
2 |
26 Feb 07 |
jari |
626 |
} |
2 |
26 Feb 07 |
jari |
627 |
|
2 |
26 Feb 07 |
jari |
//Utility function to traverse the cells to get populations |
2 |
26 Feb 07 |
jari |
629 |
private int [] getGenesPerCluster(){ |
2 |
26 Feb 07 |
jari |
630 |
|
2 |
26 Feb 07 |
jari |
631 |
int [] genesPerCluster; |
2 |
26 Feb 07 |
jari |
632 |
SOTACell curr = head; |
2 |
26 Feb 07 |
jari |
633 |
int numCells = 0; |
2 |
26 Feb 07 |
jari |
634 |
int i = 0 ; |
2 |
26 Feb 07 |
jari |
635 |
|
2 |
26 Feb 07 |
jari |
636 |
while(curr != null){ |
2 |
26 Feb 07 |
jari |
637 |
numCells++; |
2 |
26 Feb 07 |
jari |
638 |
curr = curr.succ; |
2 |
26 Feb 07 |
jari |
639 |
} |
2 |
26 Feb 07 |
jari |
640 |
|
2 |
26 Feb 07 |
jari |
641 |
genesPerCluster = new int[numCells]; |
2 |
26 Feb 07 |
jari |
642 |
curr = head; |
2 |
26 Feb 07 |
jari |
643 |
|
2 |
26 Feb 07 |
jari |
644 |
while(curr != null){ |
2 |
26 Feb 07 |
jari |
645 |
genesPerCluster[i] = curr.members.size(); |
2 |
26 Feb 07 |
jari |
646 |
i++; |
2 |
26 Feb 07 |
jari |
647 |
curr = curr.succ; |
2 |
26 Feb 07 |
jari |
648 |
} |
2 |
26 Feb 07 |
jari |
649 |
|
2 |
26 Feb 07 |
jari |
650 |
return genesPerCluster; |
2 |
26 Feb 07 |
jari |
651 |
} |
2 |
26 Feb 07 |
jari |
652 |
|
2 |
26 Feb 07 |
jari |
653 |
|
2 |
26 Feb 07 |
jari |
654 |
/** |
2 |
26 Feb 07 |
jari |
* Returns the distance function used to construct the Tree |
2 |
26 Feb 07 |
jari |
656 |
*/ |
2 |
26 Feb 07 |
jari |
657 |
public int getFunction(){ |
2 |
26 Feb 07 |
jari |
658 |
return function; |
2 |
26 Feb 07 |
jari |
659 |
} |
2 |
26 Feb 07 |
jari |
660 |
|
2 |
26 Feb 07 |
jari |
661 |
/** |
2 |
26 Feb 07 |
jari |
* Returns whether or not distance values were absolute values |
2 |
26 Feb 07 |
jari |
663 |
*/ |
2 |
26 Feb 07 |
jari |
664 |
public boolean getAbsolute(){ |
2 |
26 Feb 07 |
jari |
665 |
return absolute; |
2 |
26 Feb 07 |
jari |
666 |
} |
2 |
26 Feb 07 |
jari |
667 |
|
2 |
26 Feb 07 |
jari |
668 |
/** |
2 |
26 Feb 07 |
jari |
* Returns the distance metric polarity factor |
2 |
26 Feb 07 |
jari |
670 |
*/ |
2 |
26 Feb 07 |
jari |
671 |
public float getFactor(){ |
2 |
26 Feb 07 |
jari |
672 |
return myFactor; |
2 |
26 Feb 07 |
jari |
673 |
} |
2 |
26 Feb 07 |
jari |
674 |
|
2 |
26 Feb 07 |
jari |
675 |
|
2 |
26 Feb 07 |
jari |
676 |
/** |
2 |
26 Feb 07 |
jari |
* Performs a singe cycle on appropriate cell |
2 |
26 Feb 07 |
jari |
678 |
*/ |
2 |
26 Feb 07 |
jari |
679 |
private void runCycle(int maxNumEpochs, double epochCriteria){ |
2 |
26 Feb 07 |
jari |
680 |
|
2 |
26 Feb 07 |
jari |
681 |
int negCounter = 0; |
2 |
26 Feb 07 |
jari |
682 |
|
2 |
26 Feb 07 |
jari |
683 |
double lastError = Double.POSITIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
684 |
double currError = Double.POSITIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
685 |
Dimension distribCount; |
2 |
26 Feb 07 |
jari |
686 |
|
2 |
26 Feb 07 |
jari |
687 |
double initialDiversity = treeDiversity; |
2 |
26 Feb 07 |
jari |
688 |
double diversityImprovement = Float.POSITIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
689 |
SOTACell trainingNode; |
2 |
26 Feb 07 |
jari |
690 |
|
2 |
26 Feb 07 |
jari |
691 |
if(!useClusterVariance) |
2 |
26 Feb 07 |
jari |
692 |
trainingNode = mostDiverseCell; |
2 |
26 Feb 07 |
jari |
693 |
else |
2 |
26 Feb 07 |
jari |
694 |
trainingNode = mostVariableCell; |
2 |
26 Feb 07 |
jari |
695 |
|
2 |
26 Feb 07 |
jari |
696 |
boolean stopEpoch = false; |
2 |
26 Feb 07 |
jari |
697 |
|
2 |
26 Feb 07 |
jari |
698 |
for(int epochNum = 0; epochNum < maxNumEpochs && !stopEpoch; epochNum++){ |
2 |
26 Feb 07 |
jari |
699 |
|
2 |
26 Feb 07 |
jari |
700 |
initialDiversity = treeDiversity; |
2 |
26 Feb 07 |
jari |
701 |
|
2 |
26 Feb 07 |
jari |
702 |
distribCount = runNodeEpoch(trainingNode); //Only distribute left and right, keep counter of left and right. |
2 |
26 Feb 07 |
jari |
//return a dimension(left, right) |
2 |
26 Feb 07 |
jari |
//if left or right == 0 then dont stop epoch |
2 |
26 Feb 07 |
jari |
//Don't redistribute, only alter centroids |
2 |
26 Feb 07 |
jari |
706 |
|
2 |
26 Feb 07 |
jari |
707 |
lastError = currError; |
2 |
26 Feb 07 |
jari |
//get error associated with the current input set |
2 |
26 Feb 07 |
jari |
709 |
currError = getInputError(trainingNode); //This is sum of errors of inputs to the new child cells |
2 |
26 Feb 07 |
jari |
710 |
|
2 |
26 Feb 07 |
jari |
711 |
if(lastError != 0) |
2 |
26 Feb 07 |
jari |
712 |
diversityImprovement = Math.abs( (currError - lastError)/lastError ); |
2 |
26 Feb 07 |
jari |
713 |
else |
2 |
26 Feb 07 |
jari |
714 |
diversityImprovement = 0; |
2 |
26 Feb 07 |
jari |
715 |
|
2 |
26 Feb 07 |
jari |
716 |
if(diversityImprovement < epochCriteria && diversityImprovement >= 0 |
2 |
26 Feb 07 |
jari |
717 |
&& distribCount.getHeight() != 0 && distribCount.getWidth() != 0) |
2 |
26 Feb 07 |
jari |
718 |
stopEpoch = true; |
2 |
26 Feb 07 |
jari |
719 |
} |
2 |
26 Feb 07 |
jari |
//genes in the training node are assigned to closest cell, and inserted |
2 |
26 Feb 07 |
jari |
721 |
assignGenesToCells(trainingNode); |
2 |
26 Feb 07 |
jari |
722 |
} |
2 |
26 Feb 07 |
jari |
723 |
|
2 |
26 Feb 07 |
jari |
724 |
|
2 |
26 Feb 07 |
jari |
725 |
/** |
2 |
26 Feb 07 |
jari |
* Following training, assignment of genes to cells |
2 |
26 Feb 07 |
jari |
727 |
*/ |
2 |
26 Feb 07 |
jari |
728 |
private void assignGenesToCells(SOTACell trainingNode){ |
2 |
26 Feb 07 |
jari |
729 |
int geneIndex; |
2 |
26 Feb 07 |
jari |
730 |
int n = trainingNode.members.size(); |
2 |
26 Feb 07 |
jari |
731 |
SOTACell myCell = null; |
2 |
26 Feb 07 |
jari |
732 |
|
2 |
26 Feb 07 |
jari |
733 |
for(int i = 0; i<n ; i++){ |
2 |
26 Feb 07 |
jari |
734 |
geneIndex = ((Integer)(trainingNode.members.elementAt(i))).intValue(); |
2 |
26 Feb 07 |
jari |
735 |
|
2 |
26 Feb 07 |
jari |
736 |
myCell = findMyCellInSubTree(trainingNode, geneIndex, neighborhoodLevel); //finds and adds index to correct cell |
2 |
26 Feb 07 |
jari |
737 |
} |
2 |
26 Feb 07 |
jari |
738 |
} |
2 |
26 Feb 07 |
jari |
739 |
|
2 |
26 Feb 07 |
jari |
740 |
|
2 |
26 Feb 07 |
jari |
//Note that leaves are threaded from left to right. |
2 |
26 Feb 07 |
jari |
//This means that if displayed top to bottom, centroids would be reversed |
2 |
26 Feb 07 |
jari |
//Therefore, accumulate in reverse order into AlgorithmData |
2 |
26 Feb 07 |
jari |
744 |
private void getResults(){ |
2 |
26 Feb 07 |
jari |
745 |
|
2 |
26 Feb 07 |
jari |
746 |
SOTACell curr = head; |
2 |
26 Feb 07 |
jari |
747 |
int numCells = 0; |
2 |
26 Feb 07 |
jari |
748 |
FloatMatrix centroidFM = new FloatMatrix(numberOfClusters, numberOfSamples); |
2 |
26 Feb 07 |
jari |
749 |
FloatMatrix varianceFM = new FloatMatrix(numberOfClusters, numberOfSamples); |
2 |
26 Feb 07 |
jari |
750 |
|
2 |
26 Feb 07 |
jari |
751 |
int [] clusterSize = new int[numberOfClusters]; |
2 |
26 Feb 07 |
jari |
752 |
FloatMatrix clusterDiversity = new FloatMatrix(numberOfClusters,1); |
2 |
26 Feb 07 |
jari |
753 |
int numDiv = cycleDiversity.size(); |
2 |
26 Feb 07 |
jari |
754 |
FloatMatrix cycleDivFM = new FloatMatrix(numDiv, 1); |
2 |
26 Feb 07 |
jari |
755 |
|
2 |
26 Feb 07 |
jari |
756 |
|
2 |
26 Feb 07 |
jari |
757 |
int [] clusterOrder = new int[numberOfClusters]; |
2 |
26 Feb 07 |
jari |
758 |
|
2 |
26 Feb 07 |
jari |
759 |
clusters = new Cluster(); |
2 |
26 Feb 07 |
jari |
760 |
NodeList nodeList = clusters.getNodeList(); |
2 |
26 Feb 07 |
jari |
761 |
Node newNode; |
2 |
26 Feb 07 |
jari |
762 |
int [] clusterMembership; |
2 |
26 Feb 07 |
jari |
763 |
int clusterPop; |
2 |
26 Feb 07 |
jari |
764 |
|
2 |
26 Feb 07 |
jari |
//move to tail |
2 |
26 Feb 07 |
jari |
766 |
while(curr.succ != null) |
2 |
26 Feb 07 |
jari |
767 |
curr = curr.succ; |
2 |
26 Feb 07 |
jari |
768 |
|
2 |
26 Feb 07 |
jari |
//now curr is at the tail |
2 |
26 Feb 07 |
jari |
770 |
while(numCells <= numberOfClusters && curr != null){ |
2 |
26 Feb 07 |
jari |
771 |
|
2 |
26 Feb 07 |
jari |
772 |
for(int i = 0; i < numberOfSamples; i++){ |
2 |
26 Feb 07 |
jari |
773 |
centroidFM.set( numCells, i, curr.centroidGene.get(0,i)); |
2 |
26 Feb 07 |
jari |
774 |
varianceFM.set( numCells, i, curr.getColumnVar(i)); |
2 |
26 Feb 07 |
jari |
775 |
} |
2 |
26 Feb 07 |
jari |
776 |
clusterPop = curr.members.size(); |
2 |
26 Feb 07 |
jari |
777 |
clusterSize[numCells] = clusterPop; |
2 |
26 Feb 07 |
jari |
778 |
clusterDiversity.set(numCells, 0, (float)curr.cellDiversity*(float)myFactor); //alter poloarity by myFactor based on metric |
2 |
26 Feb 07 |
jari |
779 |
clusterOrder[numCells] = numCells; |
2 |
26 Feb 07 |
jari |
780 |
|
2 |
26 Feb 07 |
jari |
//accumulate cluster probe indicies |
2 |
26 Feb 07 |
jari |
782 |
clusterMembership = new int[clusterPop]; |
2 |
26 Feb 07 |
jari |
783 |
for(int i = 0; i < clusterPop; i++){ |
2 |
26 Feb 07 |
jari |
784 |
clusterMembership[i] = ((Integer)(curr.members.elementAt(i))).intValue(); |
2 |
26 Feb 07 |
jari |
785 |
} |
2 |
26 Feb 07 |
jari |
786 |
|
2 |
26 Feb 07 |
jari |
787 |
newNode = new Node(); |
2 |
26 Feb 07 |
jari |
788 |
newNode.setProbesIndexes(clusterMembership); |
2 |
26 Feb 07 |
jari |
789 |
nodeList.addNode(newNode); |
2 |
26 Feb 07 |
jari |
790 |
|
2 |
26 Feb 07 |
jari |
791 |
numCells++; |
2 |
26 Feb 07 |
jari |
792 |
curr = curr.pred; |
2 |
26 Feb 07 |
jari |
793 |
} |
2 |
26 Feb 07 |
jari |
794 |
|
2 |
26 Feb 07 |
jari |
//now accumlate cycle divresity information |
2 |
26 Feb 07 |
jari |
796 |
if(myFactor == 1){ |
2 |
26 Feb 07 |
jari |
797 |
float initDiv = ((Float)(cycleDiversity.elementAt(0))).floatValue(); |
2 |
26 Feb 07 |
jari |
798 |
for(int i = 0; i < numDiv; i++){ |
2 |
26 Feb 07 |
jari |
799 |
cycleDivFM.set(i,0, (((Float)(cycleDiversity.elementAt(i))).floatValue())/initDiv ); |
2 |
26 Feb 07 |
jari |
800 |
} |
2 |
26 Feb 07 |
jari |
801 |
} |
2 |
26 Feb 07 |
jari |
802 |
else{ |
2 |
26 Feb 07 |
jari |
803 |
float lowerLim = numberOfGenes * myFactor; |
2 |
26 Feb 07 |
jari |
804 |
float initDiv = ((Float)(cycleDiversity.elementAt(0))).floatValue() + Math.abs(lowerLim) ; |
2 |
26 Feb 07 |
jari |
805 |
for(int i = 0; i < numDiv; i++){ |
2 |
26 Feb 07 |
jari |
806 |
cycleDivFM.set(i,0, (((Float)(cycleDiversity.elementAt(i))).floatValue()+Math.abs(lowerLim))/initDiv ); |
2 |
26 Feb 07 |
jari |
807 |
} |
2 |
26 Feb 07 |
jari |
808 |
|
2 |
26 Feb 07 |
jari |
809 |
} |
2 |
26 Feb 07 |
jari |
// put all important information into AlgorithmData |
2 |
26 Feb 07 |
jari |
811 |
inData.addParam("cycles", String.valueOf(numberOfClusters)); |
2 |
26 Feb 07 |
jari |
812 |
inData.addCluster("cluster", clusters); |
2 |
26 Feb 07 |
jari |
813 |
inData.addMatrix("centroid-matrix", centroidFM); |
2 |
26 Feb 07 |
jari |
814 |
inData.addMatrix("cluster-variances", varianceFM); |
2 |
26 Feb 07 |
jari |
815 |
inData.addMatrix("cluster-diversity", clusterDiversity); |
2 |
26 Feb 07 |
jari |
816 |
inData.addMatrix("cycle-diversity", cycleDivFM); |
2 |
26 Feb 07 |
jari |
817 |
inData.addIntArray("cluster-population", clusterSize); |
2 |
26 Feb 07 |
jari |
818 |
|
2 |
26 Feb 07 |
jari |
//Additions to AlgorithmData to allow drawing arrays |
2 |
26 Feb 07 |
jari |
820 |
float [] nodeHeight = new float[numberOfClusters*2]; |
2 |
26 Feb 07 |
jari |
821 |
int [] nodePopulation = new int[numberOfClusters*2]; |
2 |
26 Feb 07 |
jari |
822 |
int [] leftChild = new int[nodeHeight.length*2]; |
2 |
26 Feb 07 |
jari |
823 |
int [] rightChild = new int[nodeHeight.length*2]; |
2 |
26 Feb 07 |
jari |
824 |
|
2 |
26 Feb 07 |
jari |
825 |
initializeReturnValues(nodeHeight, nodePopulation, leftChild, rightChild); |
2 |
26 Feb 07 |
jari |
826 |
utilCounter = 0; |
2 |
26 Feb 07 |
jari |
827 |
loadReturnValues(root, 0, nodeHeight, nodePopulation, leftChild, rightChild); |
2 |
26 Feb 07 |
jari |
828 |
inData.addMatrix("node-heights", new FloatMatrix(nodeHeight, nodeHeight.length)); |
2 |
26 Feb 07 |
jari |
829 |
inData.addIntArray("left-child", leftChild); |
2 |
26 Feb 07 |
jari |
830 |
inData.addIntArray("right-child", rightChild); |
2 |
26 Feb 07 |
jari |
831 |
inData.addIntArray("node-population", nodePopulation); |
2 |
26 Feb 07 |
jari |
832 |
|
2 |
26 Feb 07 |
jari |
833 |
|
2 |
26 Feb 07 |
jari |
834 |
if(useClusterVariance) |
2 |
26 Feb 07 |
jari |
835 |
inData.addParam("computed-var-cutoff", String.valueOf(endCriteria)); |
2 |
26 Feb 07 |
jari |
836 |
return; |
2 |
26 Feb 07 |
jari |
837 |
} |
2 |
26 Feb 07 |
jari |
838 |
|
2 |
26 Feb 07 |
jari |
839 |
|
2 |
26 Feb 07 |
jari |
840 |
private void initializeReturnValues(float [] height, int [] pop, int [] left, int [] right){ |
2 |
26 Feb 07 |
jari |
841 |
int i = 0; |
2 |
26 Feb 07 |
jari |
842 |
for(; i < height.length ; i++){ |
2 |
26 Feb 07 |
jari |
843 |
height[i] = -1.0f; |
2 |
26 Feb 07 |
jari |
844 |
pop[i] = left[i] = right[i] = -1; |
2 |
26 Feb 07 |
jari |
845 |
} |
2 |
26 Feb 07 |
jari |
846 |
for(int j = i; j < left.length ; j++){ |
2 |
26 Feb 07 |
jari |
847 |
left[j] = right[j] = -1; |
2 |
26 Feb 07 |
jari |
848 |
} |
2 |
26 Feb 07 |
jari |
849 |
} |
2 |
26 Feb 07 |
jari |
850 |
|
2 |
26 Feb 07 |
jari |
851 |
private void loadReturnValues(SOTACell subRoot, int index, float [] h, int [] pop, int [] left, int [] right){ |
2 |
26 Feb 07 |
jari |
852 |
pop[index] = subRoot.members.size(); |
2 |
26 Feb 07 |
jari |
853 |
if(subRoot == root) |
2 |
26 Feb 07 |
jari |
854 |
h[index] = 0; |
2 |
26 Feb 07 |
jari |
855 |
else |
2 |
26 Feb 07 |
jari |
856 |
h[index] = ExperimentUtil.geneDistance(subRoot.centroidGene, subRoot.parent.centroidGene, 0, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
857 |
|
2 |
26 Feb 07 |
jari |
858 |
if(subRoot.left != null){ |
2 |
26 Feb 07 |
jari |
859 |
left[index] = utilCounter + 1; |
2 |
26 Feb 07 |
jari |
860 |
utilCounter++; |
2 |
26 Feb 07 |
jari |
861 |
loadReturnValues(subRoot.left, utilCounter, h, pop, left, right); |
2 |
26 Feb 07 |
jari |
862 |
right[index] = utilCounter + 1; |
2 |
26 Feb 07 |
jari |
863 |
utilCounter++; |
2 |
26 Feb 07 |
jari |
864 |
loadReturnValues(subRoot.right, utilCounter, h, pop, left, right); |
2 |
26 Feb 07 |
jari |
865 |
} |
2 |
26 Feb 07 |
jari |
866 |
} |
2 |
26 Feb 07 |
jari |
867 |
|
2 |
26 Feb 07 |
jari |
//gets sum of min distance of genes in parentNode to child centroids |
2 |
26 Feb 07 |
jari |
869 |
private double getInputError(SOTACell parentNode){ |
2 |
26 Feb 07 |
jari |
870 |
int n = parentNode.members.size(); |
2 |
26 Feb 07 |
jari |
871 |
double cumErr = 0; |
2 |
26 Feb 07 |
jari |
872 |
double d1; |
2 |
26 Feb 07 |
jari |
873 |
double d2; |
2 |
26 Feb 07 |
jari |
874 |
int geneIndex; |
2 |
26 Feb 07 |
jari |
875 |
|
2 |
26 Feb 07 |
jari |
876 |
for(int i = 0; i < n; i++){ |
2 |
26 Feb 07 |
jari |
877 |
geneIndex = ((Integer)(parentNode.members.elementAt(i))).intValue(); |
2 |
26 Feb 07 |
jari |
878 |
|
2 |
26 Feb 07 |
jari |
879 |
d1 = ExperimentUtil.geneDistance(dataMatrix, parentNode.left.centroidGene, geneIndex, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
880 |
d2 = ExperimentUtil.geneDistance(dataMatrix, parentNode.right.centroidGene, geneIndex, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
881 |
|
2 |
26 Feb 07 |
jari |
882 |
if(d1<=d2) |
2 |
26 Feb 07 |
jari |
883 |
cumErr += d1; |
2 |
26 Feb 07 |
jari |
884 |
else |
2 |
26 Feb 07 |
jari |
885 |
cumErr += d2; |
2 |
26 Feb 07 |
jari |
886 |
} |
2 |
26 Feb 07 |
jari |
887 |
return cumErr/n; |
2 |
26 Feb 07 |
jari |
888 |
} |
2 |
26 Feb 07 |
jari |
889 |
|
2 |
26 Feb 07 |
jari |
//returns the distribution of trainingNode memeber genes among left and right children |
2 |
26 Feb 07 |
jari |
891 |
private Dimension runNodeEpoch(SOTACell trainingNode){ |
2 |
26 Feb 07 |
jari |
892 |
|
2 |
26 Feb 07 |
jari |
893 |
SOTACell myCell = null; |
2 |
26 Feb 07 |
jari |
894 |
SOTACell sisterCell = null; |
2 |
26 Feb 07 |
jari |
895 |
int rightCnt = 0; |
2 |
26 Feb 07 |
jari |
896 |
int leftCnt = 0; |
2 |
26 Feb 07 |
jari |
897 |
int memberGene = 0; |
2 |
26 Feb 07 |
jari |
898 |
|
2 |
26 Feb 07 |
jari |
//for all genes in the training node, find closest child, migrate child |
2 |
26 Feb 07 |
jari |
900 |
for(int geneNum = 0; geneNum < trainingNode.members.size() ; geneNum++){ |
2 |
26 Feb 07 |
jari |
901 |
|
2 |
26 Feb 07 |
jari |
902 |
memberGene = ((Integer)trainingNode.members.elementAt(geneNum)).intValue(); |
2 |
26 Feb 07 |
jari |
903 |
|
2 |
26 Feb 07 |
jari |
904 |
myCell = findMyDaughterCell(trainingNode, memberGene); //only look among children |
2 |
26 Feb 07 |
jari |
//dont add to membership |
2 |
26 Feb 07 |
jari |
906 |
|
2 |
26 Feb 07 |
jari |
//later make sure that left and right membership set is not null |
2 |
26 Feb 07 |
jari |
908 |
if(myCell == trainingNode.left) |
2 |
26 Feb 07 |
jari |
909 |
leftCnt++; |
2 |
26 Feb 07 |
jari |
910 |
else |
2 |
26 Feb 07 |
jari |
911 |
rightCnt++; |
2 |
26 Feb 07 |
jari |
912 |
|
2 |
26 Feb 07 |
jari |
913 |
myCell.migrateCentroid(memberGene, migW); |
2 |
26 Feb 07 |
jari |
914 |
|
2 |
26 Feb 07 |
jari |
915 |
sisterCell = findSister(myCell); |
2 |
26 Feb 07 |
jari |
916 |
|
2 |
26 Feb 07 |
jari |
//if sister has no offspring then migrate parent and sister |
2 |
26 Feb 07 |
jari |
918 |
if(sisterCell.left == null && sisterCell.right == null){ |
2 |
26 Feb 07 |
jari |
919 |
myCell.parent.migrateCentroid(memberGene, migP); |
2 |
26 Feb 07 |
jari |
920 |
sisterCell.migrateCentroid(memberGene, migS); |
2 |
26 Feb 07 |
jari |
921 |
} |
2 |
26 Feb 07 |
jari |
922 |
} |
2 |
26 Feb 07 |
jari |
923 |
return new Dimension(leftCnt, rightCnt); |
2 |
26 Feb 07 |
jari |
924 |
} |
2 |
26 Feb 07 |
jari |
925 |
|
2 |
26 Feb 07 |
jari |
926 |
|
2 |
26 Feb 07 |
jari |
927 |
/** |
2 |
26 Feb 07 |
jari |
* Finds closest daughter cell |
2 |
26 Feb 07 |
jari |
929 |
*/ |
2 |
26 Feb 07 |
jari |
930 |
private SOTACell findMyDaughterCell(SOTACell parentNode, int geneIndex){ |
2 |
26 Feb 07 |
jari |
931 |
|
2 |
26 Feb 07 |
jari |
932 |
float dist1 = ExperimentUtil.geneDistance(dataMatrix, parentNode.left.centroidGene, geneIndex, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
933 |
float dist2 = ExperimentUtil.geneDistance(dataMatrix, parentNode.right.centroidGene, geneIndex, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
934 |
|
2 |
26 Feb 07 |
jari |
935 |
if(dist1 <= dist2) |
2 |
26 Feb 07 |
jari |
936 |
return parentNode.left; |
2 |
26 Feb 07 |
jari |
937 |
else |
2 |
26 Feb 07 |
jari |
938 |
return parentNode.right; |
2 |
26 Feb 07 |
jari |
939 |
} |
2 |
26 Feb 07 |
jari |
940 |
|
2 |
26 Feb 07 |
jari |
941 |
|
2 |
26 Feb 07 |
jari |
//sets cell diversities, and variances (if required) |
2 |
26 Feb 07 |
jari |
943 |
private void setDiversities(){ |
2 |
26 Feb 07 |
jari |
944 |
SOTACell curr = head; |
2 |
26 Feb 07 |
jari |
945 |
double cellSum = 0; |
2 |
26 Feb 07 |
jari |
946 |
double cellVar = 0; |
2 |
26 Feb 07 |
jari |
947 |
double treeSum = 0; |
2 |
26 Feb 07 |
jari |
948 |
double maxCellDiv = -1; |
2 |
26 Feb 07 |
jari |
949 |
double maxCellVar = -1; |
2 |
26 Feb 07 |
jari |
950 |
int numberOfCells = 0; |
2 |
26 Feb 07 |
jari |
951 |
double currDist = 0; |
2 |
26 Feb 07 |
jari |
952 |
|
2 |
26 Feb 07 |
jari |
953 |
mostDiverseCell = head; |
2 |
26 Feb 07 |
jari |
954 |
mostVariableCell = head; |
2 |
26 Feb 07 |
jari |
955 |
|
2 |
26 Feb 07 |
jari |
956 |
while(curr != null){ |
2 |
26 Feb 07 |
jari |
957 |
|
2 |
26 Feb 07 |
jari |
958 |
numberOfCells++; |
2 |
26 Feb 07 |
jari |
959 |
cellSum = 0; |
2 |
26 Feb 07 |
jari |
960 |
|
2 |
26 Feb 07 |
jari |
//for all members of the node get distance to set cell resource (diversity) |
2 |
26 Feb 07 |
jari |
962 |
for(int i = 0; i < curr.members.size() ; i++){ |
2 |
26 Feb 07 |
jari |
963 |
cellSum += ExperimentUtil.geneDistance(dataMatrix, curr.centroidGene, ((Integer)(curr.members.elementAt(i))).intValue(), |
2 |
26 Feb 07 |
jari |
964 |
0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
965 |
} |
2 |
26 Feb 07 |
jari |
966 |
|
2 |
26 Feb 07 |
jari |
967 |
curr.cellDiversity = (cellSum/curr.members.size()); |
2 |
26 Feb 07 |
jari |
968 |
|
2 |
26 Feb 07 |
jari |
969 |
if(curr.cellDiversity > maxCellDiv && curr.members.size() > 1){ |
2 |
26 Feb 07 |
jari |
970 |
maxCellDiv = curr.cellDiversity; |
2 |
26 Feb 07 |
jari |
971 |
mostDiverseCell = curr; |
2 |
26 Feb 07 |
jari |
972 |
} |
2 |
26 Feb 07 |
jari |
973 |
|
2 |
26 Feb 07 |
jari |
974 |
treeSum += cellSum; |
2 |
26 Feb 07 |
jari |
975 |
|
2 |
26 Feb 07 |
jari |
976 |
if(useClusterVariance){ //using cell variance, need to find mostVariable cell |
2 |
26 Feb 07 |
jari |
977 |
cellVar = 0; |
2 |
26 Feb 07 |
jari |
978 |
currDist = 0; |
2 |
26 Feb 07 |
jari |
979 |
|
2 |
26 Feb 07 |
jari |
//get cell varience |
2 |
26 Feb 07 |
jari |
//if new members have been added |
2 |
26 Feb 07 |
jari |
982 |
if(curr.changedMembership){ |
2 |
26 Feb 07 |
jari |
//use max gene to gene distance |
2 |
26 Feb 07 |
jari |
984 |
for(int i = 0; i < curr.members.size(); i++){ |
2 |
26 Feb 07 |
jari |
985 |
for(int j = 0; j < curr.members.size(); j++){ |
2 |
26 Feb 07 |
jari |
986 |
|
2 |
26 Feb 07 |
jari |
987 |
currDist = ExperimentUtil.geneDistance(dataMatrix, null, ((Integer)(curr.members.elementAt(i))).intValue(), |
2 |
26 Feb 07 |
jari |
988 |
((Integer)(curr.members.elementAt(j))).intValue(), function, factor, absolute); |
2 |
26 Feb 07 |
jari |
989 |
|
2 |
26 Feb 07 |
jari |
//get max dist. to be cellVar |
2 |
26 Feb 07 |
jari |
991 |
if(currDist > cellVar){ |
2 |
26 Feb 07 |
jari |
992 |
cellVar = currDist; |
2 |
26 Feb 07 |
jari |
993 |
} |
2 |
26 Feb 07 |
jari |
994 |
} |
2 |
26 Feb 07 |
jari |
995 |
} |
2 |
26 Feb 07 |
jari |
996 |
curr.cellVariance = cellVar; |
2 |
26 Feb 07 |
jari |
997 |
} |
2 |
26 Feb 07 |
jari |
998 |
else //no change to membership so we dont hve to recalculate variance |
2 |
26 Feb 07 |
jari |
999 |
cellVar = curr.cellVariance; |
2 |
26 Feb 07 |
jari |
1000 |
|
2 |
26 Feb 07 |
jari |
1001 |
if(cellVar > maxCellVar && curr.members.size() > 1){ |
2 |
26 Feb 07 |
jari |
1002 |
maxCellVar = cellVar; |
2 |
26 Feb 07 |
jari |
1003 |
mostVariableCell = curr; |
2 |
26 Feb 07 |
jari |
1004 |
} |
2 |
26 Feb 07 |
jari |
1005 |
} |
2 |
26 Feb 07 |
jari |
1006 |
curr.changedMembership = false; //variance already set for current population |
2 |
26 Feb 07 |
jari |
1007 |
curr = curr.succ; |
2 |
26 Feb 07 |
jari |
1008 |
} |
2 |
26 Feb 07 |
jari |
1009 |
treeDiversity = treeSum; |
2 |
26 Feb 07 |
jari |
1010 |
} |
2 |
26 Feb 07 |
jari |
1011 |
|
2 |
26 Feb 07 |
jari |
1012 |
|
2 |
26 Feb 07 |
jari |
1013 |
private float getNodeDiversitySum(SOTACell curr){ |
2 |
26 Feb 07 |
jari |
1014 |
|
2 |
26 Feb 07 |
jari |
1015 |
float div = 0; |
2 |
26 Feb 07 |
jari |
1016 |
int n = curr.members.size(); |
2 |
26 Feb 07 |
jari |
1017 |
int geneIndex; |
2 |
26 Feb 07 |
jari |
1018 |
float currDist; |
2 |
26 Feb 07 |
jari |
1019 |
float sum = 0; |
2 |
26 Feb 07 |
jari |
1020 |
|
2 |
26 Feb 07 |
jari |
1021 |
for(int i = 0; i < n; i++){ |
2 |
26 Feb 07 |
jari |
1022 |
geneIndex = ( (Integer)(curr.members.elementAt(i)) ).intValue(); |
2 |
26 Feb 07 |
jari |
1023 |
currDist = ExperimentUtil.geneDistance(dataMatrix, curr.centroidGene, geneIndex, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
1024 |
if(!Float.isNaN(currDist)){ |
2 |
26 Feb 07 |
jari |
1025 |
sum += currDist; |
2 |
26 Feb 07 |
jari |
1026 |
} |
2 |
26 Feb 07 |
jari |
1027 |
} |
2 |
26 Feb 07 |
jari |
1028 |
return sum; |
2 |
26 Feb 07 |
jari |
1029 |
} |
2 |
26 Feb 07 |
jari |
1030 |
|
2 |
26 Feb 07 |
jari |
1031 |
|
2 |
26 Feb 07 |
jari |
1032 |
|
2 |
26 Feb 07 |
jari |
1033 |
private float getNodeDiversity(SOTACell curr){ |
2 |
26 Feb 07 |
jari |
1034 |
float div = 0; |
2 |
26 Feb 07 |
jari |
1035 |
int n = curr.members.size(); |
2 |
26 Feb 07 |
jari |
1036 |
int geneIndex; |
2 |
26 Feb 07 |
jari |
1037 |
float currDist; |
2 |
26 Feb 07 |
jari |
1038 |
float sum = 0; |
2 |
26 Feb 07 |
jari |
1039 |
int numGoodRatios = 0; |
2 |
26 Feb 07 |
jari |
1040 |
|
2 |
26 Feb 07 |
jari |
1041 |
for(int i = 0; i < n; i++){ |
2 |
26 Feb 07 |
jari |
1042 |
geneIndex = ( (Integer)(curr.members.elementAt(i)) ).intValue(); |
2 |
26 Feb 07 |
jari |
1043 |
currDist = ExperimentUtil.geneDistance(dataMatrix, curr.centroidGene, geneIndex, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
1044 |
if(!Float.isNaN(currDist)){ |
2 |
26 Feb 07 |
jari |
1045 |
sum += currDist; |
2 |
26 Feb 07 |
jari |
1046 |
numGoodRatios++; |
2 |
26 Feb 07 |
jari |
1047 |
} |
2 |
26 Feb 07 |
jari |
1048 |
} |
2 |
26 Feb 07 |
jari |
1049 |
return (float)(sum/numGoodRatios); |
2 |
26 Feb 07 |
jari |
1050 |
} |
2 |
26 Feb 07 |
jari |
1051 |
|
2 |
26 Feb 07 |
jari |
1052 |
|
2 |
26 Feb 07 |
jari |
1053 |
|
2 |
26 Feb 07 |
jari |
1054 |
private float getNodeVariance(SOTACell curr){ |
2 |
26 Feb 07 |
jari |
1055 |
float div = 0; |
2 |
26 Feb 07 |
jari |
1056 |
int n = curr.members.size(); |
2 |
26 Feb 07 |
jari |
1057 |
int geneIndex1, geneIndex2; |
2 |
26 Feb 07 |
jari |
1058 |
float currDist; |
2 |
26 Feb 07 |
jari |
1059 |
float maxDist = 0; |
2 |
26 Feb 07 |
jari |
1060 |
int numGoodRatios = 0; |
2 |
26 Feb 07 |
jari |
1061 |
|
2 |
26 Feb 07 |
jari |
1062 |
for(int i = 0; i < n; i++){ |
2 |
26 Feb 07 |
jari |
1063 |
for(int j = i+1; j < n; j++){ |
2 |
26 Feb 07 |
jari |
1064 |
|
2 |
26 Feb 07 |
jari |
1065 |
geneIndex1 = ( (Integer)(curr.members.elementAt(i)) ).intValue(); |
2 |
26 Feb 07 |
jari |
1066 |
geneIndex2 = ( (Integer)(curr.members.elementAt(j)) ).intValue(); |
2 |
26 Feb 07 |
jari |
1067 |
|
2 |
26 Feb 07 |
jari |
1068 |
currDist = ExperimentUtil.geneDistance(dataMatrix, null, geneIndex1, geneIndex2, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
1069 |
|
2 |
26 Feb 07 |
jari |
1070 |
if(!Float.isNaN(currDist) && currDist > maxDist){ |
2 |
26 Feb 07 |
jari |
1071 |
maxDist = currDist; |
2 |
26 Feb 07 |
jari |
1072 |
} |
2 |
26 Feb 07 |
jari |
1073 |
} |
2 |
26 Feb 07 |
jari |
1074 |
} |
2 |
26 Feb 07 |
jari |
1075 |
return maxDist; |
2 |
26 Feb 07 |
jari |
1076 |
} |
2 |
26 Feb 07 |
jari |
1077 |
|
2 |
26 Feb 07 |
jari |
1078 |
|
2 |
26 Feb 07 |
jari |
//Final adjustment of cell centroids |
2 |
26 Feb 07 |
jari |
1080 |
private void trainLeaves(int maxNumEpochs, double epochCriteria){ |
2 |
26 Feb 07 |
jari |
1081 |
|
2 |
26 Feb 07 |
jari |
1082 |
double initialDiversity = treeDiversity; |
2 |
26 Feb 07 |
jari |
1083 |
double diversityImprovement = Double.POSITIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
1084 |
SOTACell trainingNode = head; |
2 |
26 Feb 07 |
jari |
1085 |
|
2 |
26 Feb 07 |
jari |
1086 |
while(trainingNode != null){ |
2 |
26 Feb 07 |
jari |
1087 |
diversityImprovement = Float.POSITIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
1088 |
|
2 |
26 Feb 07 |
jari |
1089 |
for(int epochNum = 0; epochNum < maxNumEpochs && diversityImprovement > epochCriteria; epochNum++){ |
2 |
26 Feb 07 |
jari |
1090 |
|
2 |
26 Feb 07 |
jari |
1091 |
initialDiversity = getNodeDiversity(trainingNode); |
2 |
26 Feb 07 |
jari |
1092 |
runLeafEpoch(trainingNode); |
2 |
26 Feb 07 |
jari |
1093 |
|
2 |
26 Feb 07 |
jari |
1094 |
diversityImprovement = Math.abs((getNodeDiversity(trainingNode) - initialDiversity)/initialDiversity); |
2 |
26 Feb 07 |
jari |
1095 |
} |
2 |
26 Feb 07 |
jari |
1096 |
trainingNode = trainingNode.succ; |
2 |
26 Feb 07 |
jari |
1097 |
} |
2 |
26 Feb 07 |
jari |
//calculate current cell diversities and tree diversity |
2 |
26 Feb 07 |
jari |
1099 |
setDiversities(); |
2 |
26 Feb 07 |
jari |
1100 |
} |
2 |
26 Feb 07 |
jari |
1101 |
|
2 |
26 Feb 07 |
jari |
1102 |
|
2 |
26 Feb 07 |
jari |
1103 |
|
2 |
26 Feb 07 |
jari |
1104 |
private void runLeafEpoch(SOTACell trainingNode){ |
2 |
26 Feb 07 |
jari |
1105 |
|
2 |
26 Feb 07 |
jari |
1106 |
SOTACell myCell = null; |
2 |
26 Feb 07 |
jari |
1107 |
SOTACell sisterCell = null; |
2 |
26 Feb 07 |
jari |
1108 |
|
2 |
26 Feb 07 |
jari |
1109 |
int memberGene = 0; |
2 |
26 Feb 07 |
jari |
1110 |
|
2 |
26 Feb 07 |
jari |
1111 |
for(int geneNum = 0; geneNum < trainingNode.members.size() ; geneNum++){ |
2 |
26 Feb 07 |
jari |
1112 |
memberGene = ((Integer)trainingNode.members.elementAt(geneNum)).intValue(); |
2 |
26 Feb 07 |
jari |
//leaf training is just migration of centroids |
2 |
26 Feb 07 |
jari |
1114 |
(myNucleus[memberGene]).migrateCentroid(memberGene, migW); |
2 |
26 Feb 07 |
jari |
1115 |
} |
2 |
26 Feb 07 |
jari |
1116 |
} |
2 |
26 Feb 07 |
jari |
1117 |
|
2 |
26 Feb 07 |
jari |
1118 |
|
2 |
26 Feb 07 |
jari |
1119 |
private SOTACell findMyCellInSubTree(SOTACell trainingCell, int geneNum, int level){ |
2 |
26 Feb 07 |
jari |
1120 |
|
2 |
26 Feb 07 |
jari |
1121 |
SOTACell currCell = trainingCell; |
2 |
26 Feb 07 |
jari |
1122 |
SOTACell myCell = trainingCell; |
2 |
26 Feb 07 |
jari |
1123 |
int levelIndex = 0; |
2 |
26 Feb 07 |
jari |
1124 |
|
2 |
26 Feb 07 |
jari |
1125 |
while(currCell.parent != null && levelIndex < level){ |
2 |
26 Feb 07 |
jari |
1126 |
currCell = currCell.parent; |
2 |
26 Feb 07 |
jari |
1127 |
levelIndex++; |
2 |
26 Feb 07 |
jari |
1128 |
} |
2 |
26 Feb 07 |
jari |
//now currNode is at root, or 'level' number of nodes above the training node |
2 |
26 Feb 07 |
jari |
1130 |
|
2 |
26 Feb 07 |
jari |
1131 |
Vector cellList = new Vector(); |
2 |
26 Feb 07 |
jari |
1132 |
|
2 |
26 Feb 07 |
jari |
1133 |
getCellsBelow(cellList, currCell); |
2 |
26 Feb 07 |
jari |
1134 |
|
2 |
26 Feb 07 |
jari |
1135 |
float minDist = Float.POSITIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
1136 |
float currDist; |
2 |
26 Feb 07 |
jari |
1137 |
|
2 |
26 Feb 07 |
jari |
1138 |
for(int i = 0; i < cellList.size() ; i++){ |
2 |
26 Feb 07 |
jari |
1139 |
currCell = (SOTACell)(cellList.elementAt(i)); |
2 |
26 Feb 07 |
jari |
1140 |
currDist = ExperimentUtil.geneDistance(dataMatrix, currCell.centroidGene, geneNum, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
1141 |
if(currDist < minDist){ |
2 |
26 Feb 07 |
jari |
1142 |
minDist = currDist; |
2 |
26 Feb 07 |
jari |
1143 |
myCell = currCell; |
2 |
26 Feb 07 |
jari |
1144 |
} |
2 |
26 Feb 07 |
jari |
1145 |
} |
2 |
26 Feb 07 |
jari |
1146 |
|
2 |
26 Feb 07 |
jari |
1147 |
if(myNucleus[geneNum] != myCell){ |
2 |
26 Feb 07 |
jari |
1148 |
|
2 |
26 Feb 07 |
jari |
1149 |
myNucleus[geneNum] = myCell; |
2 |
26 Feb 07 |
jari |
1150 |
myCell.addMember(geneNum); |
2 |
26 Feb 07 |
jari |
1151 |
} |
2 |
26 Feb 07 |
jari |
1152 |
return myCell; |
2 |
26 Feb 07 |
jari |
1153 |
} |
2 |
26 Feb 07 |
jari |
1154 |
|
2 |
26 Feb 07 |
jari |
1155 |
|
2 |
26 Feb 07 |
jari |
1156 |
private void getCellsBelow(Vector cellList, SOTACell subRoot){ |
2 |
26 Feb 07 |
jari |
1157 |
|
2 |
26 Feb 07 |
jari |
1158 |
if(subRoot == null) return; |
2 |
26 Feb 07 |
jari |
1159 |
|
2 |
26 Feb 07 |
jari |
1160 |
if(subRoot.left == null && subRoot.right == null){ |
2 |
26 Feb 07 |
jari |
1161 |
cellList.add(subRoot); |
2 |
26 Feb 07 |
jari |
1162 |
} |
2 |
26 Feb 07 |
jari |
1163 |
|
2 |
26 Feb 07 |
jari |
1164 |
else{ |
2 |
26 Feb 07 |
jari |
1165 |
getCellsBelow(cellList, subRoot.right); |
2 |
26 Feb 07 |
jari |
1166 |
getCellsBelow(cellList, subRoot.left); |
2 |
26 Feb 07 |
jari |
1167 |
} |
2 |
26 Feb 07 |
jari |
1168 |
} |
2 |
26 Feb 07 |
jari |
1169 |
|
2 |
26 Feb 07 |
jari |
1170 |
|
2 |
26 Feb 07 |
jari |
1171 |
|
2 |
26 Feb 07 |
jari |
1172 |
|
2 |
26 Feb 07 |
jari |
1173 |
private SOTACell findMyCell(int geneNum){ |
2 |
26 Feb 07 |
jari |
1174 |
SOTACell curr = head; |
2 |
26 Feb 07 |
jari |
1175 |
|
2 |
26 Feb 07 |
jari |
1176 |
SOTACell myClosestCell = head; |
2 |
26 Feb 07 |
jari |
1177 |
double keyDist = Float.POSITIVE_INFINITY; |
2 |
26 Feb 07 |
jari |
1178 |
double currDist = 0; |
2 |
26 Feb 07 |
jari |
1179 |
|
2 |
26 Feb 07 |
jari |
1180 |
|
2 |
26 Feb 07 |
jari |
1181 |
while(curr != null){ |
2 |
26 Feb 07 |
jari |
1182 |
|
2 |
26 Feb 07 |
jari |
1183 |
currDist = ExperimentUtil.geneDistance(dataMatrix, curr.centroidGene, geneNum, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
1184 |
|
2 |
26 Feb 07 |
jari |
1185 |
if(currDist <= keyDist){ |
2 |
26 Feb 07 |
jari |
1186 |
keyDist = currDist; |
2 |
26 Feb 07 |
jari |
1187 |
myClosestCell = curr; |
2 |
26 Feb 07 |
jari |
1188 |
} |
2 |
26 Feb 07 |
jari |
1189 |
curr = curr.succ; |
2 |
26 Feb 07 |
jari |
1190 |
} |
2 |
26 Feb 07 |
jari |
1191 |
|
2 |
26 Feb 07 |
jari |
1192 |
if(myNucleus[geneNum] != myClosestCell){ |
2 |
26 Feb 07 |
jari |
1193 |
myNucleus[geneNum] = myClosestCell; |
2 |
26 Feb 07 |
jari |
1194 |
myClosestCell.addMember(geneNum); |
2 |
26 Feb 07 |
jari |
1195 |
} |
2 |
26 Feb 07 |
jari |
1196 |
|
2 |
26 Feb 07 |
jari |
1197 |
return myClosestCell; |
2 |
26 Feb 07 |
jari |
1198 |
} |
2 |
26 Feb 07 |
jari |
1199 |
|
2 |
26 Feb 07 |
jari |
1200 |
|
2 |
26 Feb 07 |
jari |
1201 |
private SOTACell findSister(SOTACell curr){ |
2 |
26 Feb 07 |
jari |
1202 |
|
2 |
26 Feb 07 |
jari |
1203 |
if(curr != null){ |
2 |
26 Feb 07 |
jari |
1204 |
if(curr.parent.left == curr) |
2 |
26 Feb 07 |
jari |
1205 |
return curr.parent.right; |
2 |
26 Feb 07 |
jari |
1206 |
else |
2 |
26 Feb 07 |
jari |
1207 |
return curr.parent.left; |
2 |
26 Feb 07 |
jari |
1208 |
} |
2 |
26 Feb 07 |
jari |
1209 |
else |
2 |
26 Feb 07 |
jari |
1210 |
return null; |
2 |
26 Feb 07 |
jari |
1211 |
} |
2 |
26 Feb 07 |
jari |
1212 |
|
2 |
26 Feb 07 |
jari |
1213 |
|
2 |
26 Feb 07 |
jari |
1214 |
|
2 |
26 Feb 07 |
jari |
1215 |
private void divideCell(SOTACell cellToDivide){ |
2 |
26 Feb 07 |
jari |
1216 |
|
2 |
26 Feb 07 |
jari |
1217 |
float [] parentCentroid; |
2 |
26 Feb 07 |
jari |
1218 |
|
2 |
26 Feb 07 |
jari |
1219 |
cellToDivide.left = new SOTACell(numberOfSamples, dataMatrix); |
2 |
26 Feb 07 |
jari |
1220 |
cellToDivide.right = new SOTACell(numberOfSamples, dataMatrix); |
2 |
26 Feb 07 |
jari |
1221 |
|
2 |
26 Feb 07 |
jari |
1222 |
numberOfClusters++; |
2 |
26 Feb 07 |
jari |
1223 |
|
2 |
26 Feb 07 |
jari |
1224 |
cellToDivide.left.parent = cellToDivide; |
2 |
26 Feb 07 |
jari |
1225 |
cellToDivide.right.parent = cellToDivide; |
2 |
26 Feb 07 |
jari |
1226 |
|
2 |
26 Feb 07 |
jari |
1227 |
cellToDivide.right.pred = cellToDivide.left; |
2 |
26 Feb 07 |
jari |
1228 |
cellToDivide.left.succ = cellToDivide.right; |
2 |
26 Feb 07 |
jari |
1229 |
|
2 |
26 Feb 07 |
jari |
1230 |
if(cellToDivide.pred != null){ |
2 |
26 Feb 07 |
jari |
1231 |
cellToDivide.left.pred = cellToDivide.pred; |
2 |
26 Feb 07 |
jari |
1232 |
cellToDivide.left.pred.succ = cellToDivide.left; |
2 |
26 Feb 07 |
jari |
1233 |
} |
2 |
26 Feb 07 |
jari |
1234 |
else |
2 |
26 Feb 07 |
jari |
1235 |
cellToDivide.left.pred = null; |
2 |
26 Feb 07 |
jari |
1236 |
|
2 |
26 Feb 07 |
jari |
1237 |
if(cellToDivide.succ != null){ |
2 |
26 Feb 07 |
jari |
1238 |
cellToDivide.right.succ = cellToDivide.succ; |
2 |
26 Feb 07 |
jari |
1239 |
cellToDivide.right.succ.pred = cellToDivide.right; |
2 |
26 Feb 07 |
jari |
1240 |
} |
2 |
26 Feb 07 |
jari |
1241 |
else |
2 |
26 Feb 07 |
jari |
1242 |
cellToDivide.right.succ = null; |
2 |
26 Feb 07 |
jari |
1243 |
|
2 |
26 Feb 07 |
jari |
1244 |
if(cellToDivide == head) |
2 |
26 Feb 07 |
jari |
1245 |
head = cellToDivide.left; |
2 |
26 Feb 07 |
jari |
1246 |
|
2 |
26 Feb 07 |
jari |
1247 |
cellToDivide.succ = null; |
2 |
26 Feb 07 |
jari |
1248 |
cellToDivide.pred = null; |
2 |
26 Feb 07 |
jari |
1249 |
|
2 |
26 Feb 07 |
jari |
1250 |
for(int i = 0; i < numberOfSamples; i++){ |
2 |
26 Feb 07 |
jari |
1251 |
cellToDivide.left.centroidGene.set(0, i, cellToDivide.centroidGene.get(0, i)); |
2 |
26 Feb 07 |
jari |
1252 |
cellToDivide.right.centroidGene.set(0, i, cellToDivide.centroidGene.get(0,i)); |
2 |
26 Feb 07 |
jari |
1253 |
} |
2 |
26 Feb 07 |
jari |
1254 |
} |
2 |
26 Feb 07 |
jari |
1255 |
|
2 |
26 Feb 07 |
jari |
1256 |
|
2 |
26 Feb 07 |
jari |
1257 |
|
2 |
26 Feb 07 |
jari |
1258 |
public double getMaxLeafToRootPath(){ |
2 |
26 Feb 07 |
jari |
1259 |
|
2 |
26 Feb 07 |
jari |
1260 |
SOTACell curr = head; |
2 |
26 Feb 07 |
jari |
1261 |
SOTACell traveler = curr; |
2 |
26 Feb 07 |
jari |
1262 |
|
2 |
26 Feb 07 |
jari |
1263 |
double cumDist; |
2 |
26 Feb 07 |
jari |
1264 |
double maxDist = -1; |
2 |
26 Feb 07 |
jari |
1265 |
|
2 |
26 Feb 07 |
jari |
1266 |
while(curr != null){ |
2 |
26 Feb 07 |
jari |
1267 |
traveler = curr; |
2 |
26 Feb 07 |
jari |
1268 |
cumDist = 0; |
2 |
26 Feb 07 |
jari |
1269 |
|
2 |
26 Feb 07 |
jari |
1270 |
while(traveler != null){ |
2 |
26 Feb 07 |
jari |
1271 |
|
2 |
26 Feb 07 |
jari |
1272 |
if(traveler.parent != null) |
2 |
26 Feb 07 |
jari |
1273 |
cumDist += ExperimentUtil.geneDistance(traveler.parent.centroidGene, traveler.centroidGene, 0, 0, function, factor, absolute); |
2 |
26 Feb 07 |
jari |
1274 |
|
2 |
26 Feb 07 |
jari |
1275 |
traveler = traveler.parent; |
2 |
26 Feb 07 |
jari |
1276 |
} |
2 |
26 Feb 07 |
jari |
1277 |
|
2 |
26 Feb 07 |
jari |
1278 |
if(cumDist < maxDist) |
2 |
26 Feb 07 |
jari |
1279 |
maxDist = cumDist; |
2 |
26 Feb 07 |
jari |
1280 |
|
2 |
26 Feb 07 |
jari |
1281 |
curr = curr.succ; |
2 |
26 Feb 07 |
jari |
1282 |
} |
2 |
26 Feb 07 |
jari |
1283 |
|
2 |
26 Feb 07 |
jari |
1284 |
return maxDist; |
2 |
26 Feb 07 |
jari |
1285 |
|
2 |
26 Feb 07 |
jari |
1286 |
} |
2 |
26 Feb 07 |
jari |
1287 |
|
2 |
26 Feb 07 |
jari |
1288 |
|
2 |
26 Feb 07 |
jari |
1289 |
/** |
2 |
26 Feb 07 |
jari |
* Checking the result of hcl algorithm calculation. |
2 |
26 Feb 07 |
jari |
* @throws AlgorithmException, if the result is incorrect. |
2 |
26 Feb 07 |
jari |
1292 |
*/ |
2 |
26 Feb 07 |
jari |
1293 |
private void validate(AlgorithmData result) throws AlgorithmException { |
2 |
26 Feb 07 |
jari |
1294 |
if (result.getIntArray("child-1-array") == null) { |
2 |
26 Feb 07 |
jari |
1295 |
throw new AlgorithmException("parameter 'child-1-array' is null"); |
2 |
26 Feb 07 |
jari |
1296 |
} |
2 |
26 Feb 07 |
jari |
1297 |
if (result.getIntArray("child-2-array") == null) { |
2 |
26 Feb 07 |
jari |
1298 |
throw new AlgorithmException("parameter 'child-2-array' is null"); |
2 |
26 Feb 07 |
jari |
1299 |
} |
2 |
26 Feb 07 |
jari |
1300 |
if (result.getIntArray("node-order") == null) { |
2 |
26 Feb 07 |
jari |
1301 |
throw new AlgorithmException("parameter 'node-order' is null"); |
2 |
26 Feb 07 |
jari |
1302 |
} |
2 |
26 Feb 07 |
jari |
1303 |
if (result.getMatrix("height") == null) { |
2 |
26 Feb 07 |
jari |
1304 |
throw new AlgorithmException("parameter 'height' is null"); |
2 |
26 Feb 07 |
jari |
1305 |
} |
2 |
26 Feb 07 |
jari |
1306 |
} |
2 |
26 Feb 07 |
jari |
1307 |
|
2 |
26 Feb 07 |
jari |
1308 |
} |
2 |
26 Feb 07 |
jari |
1309 |
|
2 |
26 Feb 07 |
jari |
1310 |
|
2 |
26 Feb 07 |
jari |
1311 |
|
2 |
26 Feb 07 |
jari |
1312 |
|
2 |
26 Feb 07 |
jari |
1313 |
|