2 |
26 Feb 07 |
jari |
1 |
/* |
2 |
26 Feb 07 |
jari |
Copyright @ 1999-2004, The Institute for Genomic Research (TIGR). |
2 |
26 Feb 07 |
jari |
All rights reserved. |
2 |
26 Feb 07 |
jari |
4 |
*/ |
2 |
26 Feb 07 |
jari |
5 |
/* |
2 |
26 Feb 07 |
jari |
* ScriptDataTransformer.java |
2 |
26 Feb 07 |
jari |
7 |
* |
2 |
26 Feb 07 |
jari |
* Created on March 26, 2004, 10:24 AM |
2 |
26 Feb 07 |
jari |
9 |
*/ |
2 |
26 Feb 07 |
jari |
10 |
|
2 |
26 Feb 07 |
jari |
11 |
package org.tigr.microarray.mev.script.util; |
2 |
26 Feb 07 |
jari |
12 |
|
2 |
26 Feb 07 |
jari |
13 |
import java.util.Arrays; |
2 |
26 Feb 07 |
jari |
14 |
import java.util.Vector; |
2 |
26 Feb 07 |
jari |
15 |
|
2 |
26 Feb 07 |
jari |
16 |
import org.tigr.microarray.mev.DetectionFilter; |
2 |
26 Feb 07 |
jari |
17 |
import org.tigr.microarray.mev.FoldFilter; |
2 |
26 Feb 07 |
jari |
18 |
import org.tigr.microarray.mev.ISlideData; |
2 |
26 Feb 07 |
jari |
19 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmData; |
2 |
26 Feb 07 |
jari |
20 |
import org.tigr.microarray.mev.cluster.algorithm.AlgorithmParameters; |
2 |
26 Feb 07 |
jari |
21 |
import org.tigr.microarray.mev.cluster.algorithm.impl.ExperimentUtil; |
2 |
26 Feb 07 |
jari |
22 |
import org.tigr.microarray.mev.cluster.gui.Experiment; |
2 |
26 Feb 07 |
jari |
23 |
import org.tigr.microarray.mev.cluster.gui.IData; |
2 |
26 Feb 07 |
jari |
24 |
import org.tigr.microarray.mev.cluster.gui.IFramework; |
2 |
26 Feb 07 |
jari |
25 |
import org.tigr.microarray.util.Adjustment; |
2 |
26 Feb 07 |
jari |
26 |
import org.tigr.util.FloatMatrix; |
2 |
26 Feb 07 |
jari |
27 |
import org.tigr.util.QSort; |
2 |
26 Feb 07 |
jari |
28 |
|
2 |
26 Feb 07 |
jari |
29 |
|
2 |
26 Feb 07 |
jari |
/** The ScriptDataTransformer class supports script execution by providing |
2 |
26 Feb 07 |
jari |
* methods to produce new Experiment objects with trimmed gene or experiment |
2 |
26 Feb 07 |
jari |
* lists representing results from scripting algorithms. The class also handles |
2 |
26 Feb 07 |
jari |
* the execution of all Adjustment class algorithms and all of the Cluster Selection |
2 |
26 Feb 07 |
jari |
* class algorithms. |
2 |
26 Feb 07 |
jari |
* @author braisted |
2 |
26 Feb 07 |
jari |
36 |
*/ |
2 |
26 Feb 07 |
jari |
37 |
public class ScriptDataTransformer { |
2 |
26 Feb 07 |
jari |
38 |
|
2 |
26 Feb 07 |
jari |
/** The base Experiment object to be modified by the transformer. |
2 |
26 Feb 07 |
jari |
40 |
*/ |
2 |
26 Feb 07 |
jari |
41 |
private Experiment experiment; |
2 |
26 Feb 07 |
jari |
/** MeV's framework object. |
2 |
26 Feb 07 |
jari |
43 |
*/ |
2 |
26 Feb 07 |
jari |
44 |
private IFramework framework; |
2 |
26 Feb 07 |
jari |
45 |
|
2 |
26 Feb 07 |
jari |
/** Creates a new instance of ScriptDataTransformer |
2 |
26 Feb 07 |
jari |
* @param experiment Experiment object, target of transformation |
2 |
26 Feb 07 |
jari |
* @param framework MeV IFramework |
2 |
26 Feb 07 |
jari |
49 |
*/ |
2 |
26 Feb 07 |
jari |
50 |
public ScriptDataTransformer(Experiment experiment, IFramework framework) { |
2 |
26 Feb 07 |
jari |
51 |
this.experiment = experiment.copy(); |
2 |
26 Feb 07 |
jari |
52 |
this.framework = framework; |
2 |
26 Feb 07 |
jari |
53 |
} |
2 |
26 Feb 07 |
jari |
54 |
|
2 |
26 Feb 07 |
jari |
/** Transforms the Experiment object according to the AlgorithmData's parameter |
2 |
26 Feb 07 |
jari |
* set. These transformations include processing of Adjustment class algorithms. |
2 |
26 Feb 07 |
jari |
* @param data AlgorithmData holding the parameters used of a transformation. |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
59 |
*/ |
2 |
26 Feb 07 |
jari |
60 |
public Experiment transformData(AlgorithmData data) { |
2 |
26 Feb 07 |
jari |
61 |
AlgorithmParameters params = data.getParams(); |
2 |
26 Feb 07 |
jari |
62 |
|
2 |
26 Feb 07 |
jari |
63 |
String algName = params.getString("name"); |
2 |
26 Feb 07 |
jari |
64 |
|
2 |
26 Feb 07 |
jari |
65 |
if(algName == null) |
2 |
26 Feb 07 |
jari |
66 |
return null; |
2 |
26 Feb 07 |
jari |
67 |
|
2 |
26 Feb 07 |
jari |
68 |
if(algName.equals("Percentage Cutoff")) { |
2 |
26 Feb 07 |
jari |
69 |
float percent = params.getFloat("percent-cutoff"); |
2 |
26 Feb 07 |
jari |
70 |
experiment = createPercentCutoffExperiment(percent); |
2 |
26 Feb 07 |
jari |
71 |
} else if (algName.equals("Lower Cutoff")) { |
2 |
26 Feb 07 |
jari |
72 |
float cy3Cutoff = params.getFloat("cy3-lower-cutoff"); |
2 |
26 Feb 07 |
jari |
73 |
float cy5Cutoff = params.getFloat("cy5-lower-cutoff"); |
2 |
26 Feb 07 |
jari |
//create gene list where all must excede limits |
2 |
26 Feb 07 |
jari |
75 |
|
2 |
26 Feb 07 |
jari |
76 |
} else if(algName.equals("Affy Detection Filter")) { |
2 |
26 Feb 07 |
jari |
77 |
experiment = applyAffyDetectionFilter(data); |
2 |
26 Feb 07 |
jari |
78 |
} else if(algName.equals("Affy Fold Filter")) { |
2 |
26 Feb 07 |
jari |
79 |
experiment = applyAffyFoldFilter(data); |
2 |
26 Feb 07 |
jari |
80 |
} else if(algName.equals("Normalize Spots")) { |
2 |
26 Feb 07 |
jari |
81 |
Adjustment.normalizeSpots(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
82 |
} else if(algName.equals("Divide Spots by RMS")) { |
2 |
26 Feb 07 |
jari |
83 |
Adjustment.divideSpotsRMS(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
84 |
} else if(algName.equals("Divide Spots by SD")) { |
2 |
26 Feb 07 |
jari |
85 |
Adjustment.divideSpotsSD(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
86 |
} else if(algName.equals("Mean Center Spots")) { |
2 |
26 Feb 07 |
jari |
87 |
Adjustment.meanCenterSpots(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
88 |
} else if(algName.equals("Median Center Spots")) { |
2 |
26 Feb 07 |
jari |
89 |
Adjustment.medianCenterSpots(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
90 |
} else if(algName.equals("Digital Spots")) { |
2 |
26 Feb 07 |
jari |
91 |
Adjustment.digitalSpots(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
92 |
} else if(algName.equals("Normalize Experiments")) { |
2 |
26 Feb 07 |
jari |
93 |
Adjustment.normalizeExperiments(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
94 |
} else if(algName.equals("Divide Experiments by RMS")) { |
2 |
26 Feb 07 |
jari |
95 |
Adjustment.divideExperimentsRMS(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
96 |
} else if(algName.equals("Divide Experiments by SD")) { |
2 |
26 Feb 07 |
jari |
97 |
Adjustment.divideExperimentsSD(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
98 |
} else if(algName.equals("Mean Center Experiments")) { |
2 |
26 Feb 07 |
jari |
99 |
Adjustment.meanCenterExperiments(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
100 |
} else if(algName.equals("Median Center Experiments")) { |
2 |
26 Feb 07 |
jari |
101 |
Adjustment.medianCenterExperiments(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
102 |
} else if(algName.equals("Digital Experiments")) { |
2 |
26 Feb 07 |
jari |
103 |
Adjustment.digitalExperiments(experiment.getMatrix()); |
2 |
26 Feb 07 |
jari |
104 |
} |
2 |
26 Feb 07 |
jari |
105 |
return experiment; |
2 |
26 Feb 07 |
jari |
106 |
} |
2 |
26 Feb 07 |
jari |
107 |
|
2 |
26 Feb 07 |
jari |
108 |
|
2 |
26 Feb 07 |
jari |
/** Creates a new experiment based on cut-off criteria (%). Genes are retained |
2 |
26 Feb 07 |
jari |
* if they have more than the criteria % of valid expression values over the |
2 |
26 Feb 07 |
jari |
* loaded samples. |
2 |
26 Feb 07 |
jari |
* @param percent Percentage criteria. |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
114 |
*/ |
2 |
26 Feb 07 |
jari |
115 |
private Experiment createPercentCutoffExperiment(float percent) { |
2 |
26 Feb 07 |
jari |
116 |
|
2 |
26 Feb 07 |
jari |
117 |
FloatMatrix fm = experiment.getMatrix(); |
2 |
26 Feb 07 |
jari |
118 |
int [] origRowMap = experiment.getRowMappingArrayCopy(); |
2 |
26 Feb 07 |
jari |
119 |
int colCount = fm.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
120 |
int validExperimentCount = (int) (colCount * (percent/100f)); |
2 |
26 Feb 07 |
jari |
121 |
boolean [] isValid = new boolean[fm.getRowDimension()]; |
2 |
26 Feb 07 |
jari |
122 |
int cnt; |
2 |
26 Feb 07 |
jari |
123 |
int validCount = 0; |
2 |
26 Feb 07 |
jari |
124 |
|
2 |
26 Feb 07 |
jari |
//validate genes |
2 |
26 Feb 07 |
jari |
126 |
for(int i = 0; i < isValid.length; i++) { |
2 |
26 Feb 07 |
jari |
127 |
cnt = 0; |
2 |
26 Feb 07 |
jari |
128 |
for(int j = 0; j < colCount; j++) { |
2 |
26 Feb 07 |
jari |
129 |
if(!Float.isNaN(fm.A[i][j])) |
2 |
26 Feb 07 |
jari |
130 |
cnt++; |
2 |
26 Feb 07 |
jari |
131 |
if(cnt > validExperimentCount) { |
2 |
26 Feb 07 |
jari |
132 |
isValid[i] = true; |
2 |
26 Feb 07 |
jari |
133 |
validCount++; |
2 |
26 Feb 07 |
jari |
134 |
break; |
2 |
26 Feb 07 |
jari |
135 |
} |
2 |
26 Feb 07 |
jari |
136 |
} |
2 |
26 Feb 07 |
jari |
137 |
} |
2 |
26 Feb 07 |
jari |
138 |
|
2 |
26 Feb 07 |
jari |
139 |
float [][] matrix = new float[validCount][colCount]; |
2 |
26 Feb 07 |
jari |
140 |
int [] newRowMap = new int[validCount]; |
2 |
26 Feb 07 |
jari |
141 |
int currRow = 0; |
2 |
26 Feb 07 |
jari |
142 |
|
2 |
26 Feb 07 |
jari |
143 |
for(int i = 0; i < fm.A.length; i++) { |
2 |
26 Feb 07 |
jari |
144 |
if(isValid[i]) { |
2 |
26 Feb 07 |
jari |
145 |
newRowMap[currRow] = origRowMap[i]; |
2 |
26 Feb 07 |
jari |
146 |
for(int j = 0; j < colCount; j++) { |
2 |
26 Feb 07 |
jari |
147 |
matrix[currRow][j] = fm.A[i][j]; |
2 |
26 Feb 07 |
jari |
148 |
} |
2 |
26 Feb 07 |
jari |
149 |
currRow++; |
2 |
26 Feb 07 |
jari |
150 |
} |
2 |
26 Feb 07 |
jari |
151 |
} |
2 |
26 Feb 07 |
jari |
152 |
|
2 |
26 Feb 07 |
jari |
153 |
return new Experiment(new FloatMatrix(matrix), experiment.getColumnIndicesCopy(), newRowMap); |
2 |
26 Feb 07 |
jari |
154 |
} |
2 |
26 Feb 07 |
jari |
155 |
|
2 |
26 Feb 07 |
jari |
156 |
|
2 |
26 Feb 07 |
jari |
/** Trims the experiment based on index list. Boolean value indicates if |
2 |
26 Feb 07 |
jari |
* its a gene or experment trim. |
2 |
26 Feb 07 |
jari |
* @param indices Element indices to retain |
2 |
26 Feb 07 |
jari |
* @param geneCut if true genes will be trimmed, else experiments will be trimmed |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
162 |
*/ |
2 |
26 Feb 07 |
jari |
163 |
public Experiment getTrimmedExperiment(int [] indices, boolean geneCut) { |
2 |
26 Feb 07 |
jari |
164 |
if(geneCut) |
2 |
26 Feb 07 |
jari |
165 |
return getReducedExperiment_GeneReduction(indices); |
2 |
26 Feb 07 |
jari |
166 |
return getReducedExperiment_ExperimentReduction(indices); |
2 |
26 Feb 07 |
jari |
167 |
} |
2 |
26 Feb 07 |
jari |
168 |
|
2 |
26 Feb 07 |
jari |
169 |
|
2 |
26 Feb 07 |
jari |
/** Specifically trims genes based on passed indices which are to be |
2 |
26 Feb 07 |
jari |
* retained. |
2 |
26 Feb 07 |
jari |
* @param indices Gene indices to retain |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
174 |
*/ |
2 |
26 Feb 07 |
jari |
175 |
private Experiment getReducedExperiment_GeneReduction(int [] indices) { |
2 |
26 Feb 07 |
jari |
176 |
FloatMatrix fm = experiment.getMatrix(); |
2 |
26 Feb 07 |
jari |
177 |
int [] origRowMap = experiment.getRowMappingArrayCopy(); |
2 |
26 Feb 07 |
jari |
178 |
int colCount = fm.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
179 |
float [][] matrix = new float[indices.length][colCount]; |
2 |
26 Feb 07 |
jari |
180 |
int [] newRowMap = new int[indices.length]; |
2 |
26 Feb 07 |
jari |
181 |
int currRow = 0; |
2 |
26 Feb 07 |
jari |
182 |
|
2 |
26 Feb 07 |
jari |
183 |
int dataRow = 0; |
2 |
26 Feb 07 |
jari |
184 |
for(int i = 0; i < indices.length; i++) { |
2 |
26 Feb 07 |
jari |
185 |
dataRow = origRowMap[indices[i]]; |
2 |
26 Feb 07 |
jari |
186 |
newRowMap[i] = dataRow; |
2 |
26 Feb 07 |
jari |
187 |
for(int j = 0; j < colCount; j++) { |
2 |
26 Feb 07 |
jari |
188 |
matrix[i][j] = fm.A[indices[i]][j]; |
2 |
26 Feb 07 |
jari |
189 |
} |
2 |
26 Feb 07 |
jari |
190 |
} |
2 |
26 Feb 07 |
jari |
191 |
return new Experiment(new FloatMatrix(matrix), experiment.getColumnIndicesCopy(), newRowMap); |
2 |
26 Feb 07 |
jari |
192 |
} |
2 |
26 Feb 07 |
jari |
193 |
|
2 |
26 Feb 07 |
jari |
/** Trims experiments based on indices to retain |
2 |
26 Feb 07 |
jari |
* @param colIndices Experiment indicies to retain |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
197 |
*/ |
2 |
26 Feb 07 |
jari |
198 |
private Experiment getReducedExperiment_ExperimentReduction(int [] colIndices) { |
2 |
26 Feb 07 |
jari |
199 |
FloatMatrix fm = experiment.getMatrix(); |
2 |
26 Feb 07 |
jari |
200 |
int rowCount = fm.getRowDimension(); |
2 |
26 Feb 07 |
jari |
201 |
float [][] matrix = new float[rowCount][colIndices.length]; |
2 |
26 Feb 07 |
jari |
202 |
|
2 |
26 Feb 07 |
jari |
203 |
for(int i = 0; i < rowCount; i++) { |
2 |
26 Feb 07 |
jari |
204 |
for(int j = 0; j < colIndices.length; j++) { |
2 |
26 Feb 07 |
jari |
205 |
matrix[i][j] = fm.A[i][colIndices[j]]; |
2 |
26 Feb 07 |
jari |
206 |
} |
2 |
26 Feb 07 |
jari |
207 |
} |
2 |
26 Feb 07 |
jari |
208 |
return new Experiment(new FloatMatrix(matrix), colIndices, experiment.getRowMappingArrayCopy()); |
2 |
26 Feb 07 |
jari |
209 |
} |
2 |
26 Feb 07 |
jari |
210 |
|
2 |
26 Feb 07 |
jari |
/** This is the main support for cluster selection class algorithms. |
2 |
26 Feb 07 |
jari |
* The resulting indices represent the selected clusters and the |
2 |
26 Feb 07 |
jari |
* AlgorithmData passed in is augmented with values upon which the |
2 |
26 Feb 07 |
jari |
* selection was based, e.g. calculated cluter diversities or |
2 |
26 Feb 07 |
jari |
* centroid varibilities. Note tht the input AlgorithmData indicates |
2 |
26 Feb 07 |
jari |
* the selection algorithm and parameters as well as the critical boolean |
2 |
26 Feb 07 |
jari |
* indicator to direct the selection of gene vs. experiment clusters. |
2 |
26 Feb 07 |
jari |
* @param algData AlgorithmData containing the parameters. |
2 |
26 Feb 07 |
jari |
* @param clusters Cluster indices |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
221 |
*/ |
2 |
26 Feb 07 |
jari |
222 |
public int [][] selectClusters(AlgorithmData algData, int [][] clusters) { |
2 |
26 Feb 07 |
jari |
223 |
AlgorithmParameters params = algData.getParams(); |
2 |
26 Feb 07 |
jari |
224 |
int numOfDesiredClusters = params.getInt("desired-cluster-count"); |
2 |
26 Feb 07 |
jari |
225 |
int minClusterSize = params.getInt("minimum-cluster-size"); |
2 |
26 Feb 07 |
jari |
226 |
boolean areGeneClusters = params.getBoolean("process-gene-clusters"); |
2 |
26 Feb 07 |
jari |
227 |
String algName = params.getString("name"); |
2 |
26 Feb 07 |
jari |
228 |
int [][] selectedClusters; |
2 |
26 Feb 07 |
jari |
229 |
int [][] orderedClusters; |
2 |
26 Feb 07 |
jari |
230 |
|
2 |
26 Feb 07 |
jari |
231 |
if(algName.equals("Diversity Ranking Cluster Selection")) { |
2 |
26 Feb 07 |
jari |
232 |
int function = params.getInt("distance-function"); |
2 |
26 Feb 07 |
jari |
233 |
boolean useAbsolute = params.getBoolean("use-absolute"); |
2 |
26 Feb 07 |
jari |
234 |
boolean useCentroidBasedDiversity = params.getBoolean("use-centroid-based-variability"); |
2 |
26 Feb 07 |
jari |
235 |
orderedClusters = getClustersBasedOnDiversityRank(algData, experiment.getMatrix(), clusters, areGeneClusters, useCentroidBasedDiversity, function, useAbsolute); |
2 |
26 Feb 07 |
jari |
236 |
|
2 |
26 Feb 07 |
jari |
237 |
int clusterCount = 0; |
2 |
26 Feb 07 |
jari |
238 |
Vector clusterVector = new Vector(); |
2 |
26 Feb 07 |
jari |
239 |
for(int i = 0; i < orderedClusters.length && clusterCount < numOfDesiredClusters; i++) { |
2 |
26 Feb 07 |
jari |
240 |
if(orderedClusters[i].length >= minClusterSize) { |
2 |
26 Feb 07 |
jari |
241 |
clusterVector.add(orderedClusters[i]); |
2 |
26 Feb 07 |
jari |
242 |
clusterCount++; |
2 |
26 Feb 07 |
jari |
243 |
} |
2 |
26 Feb 07 |
jari |
244 |
} |
2 |
26 Feb 07 |
jari |
245 |
|
2 |
26 Feb 07 |
jari |
246 |
selectedClusters = new int[clusterVector.size()][]; |
2 |
26 Feb 07 |
jari |
247 |
|
2 |
26 Feb 07 |
jari |
248 |
for(int i = 0; i < selectedClusters.length; i++) |
2 |
26 Feb 07 |
jari |
249 |
selectedClusters[i] = (int [])(clusterVector.elementAt(i)); |
2 |
26 Feb 07 |
jari |
250 |
|
2 |
26 Feb 07 |
jari |
251 |
} else { // (if other algorithms are added --> if(algName.equals("Centroid Entropy/Variance Ranking Cluster Selection")) { |
2 |
26 Feb 07 |
jari |
252 |
FloatMatrix matrix = this.experiment.getMatrix(); |
2 |
26 Feb 07 |
jari |
253 |
|
2 |
26 Feb 07 |
jari |
254 |
if(!areGeneClusters) |
2 |
26 Feb 07 |
jari |
255 |
matrix = matrix.transpose(); |
2 |
26 Feb 07 |
jari |
256 |
|
2 |
26 Feb 07 |
jari |
257 |
FloatMatrix means = getMeans(matrix, clusters); |
2 |
26 Feb 07 |
jari |
258 |
|
2 |
26 Feb 07 |
jari |
//restore matrix |
2 |
26 Feb 07 |
jari |
260 |
if(!areGeneClusters) |
2 |
26 Feb 07 |
jari |
261 |
matrix = matrix.transpose(); |
2 |
26 Feb 07 |
jari |
262 |
|
2 |
26 Feb 07 |
jari |
263 |
boolean useCentroidVariance = params.getBoolean("use-centroid-variance"); |
2 |
26 Feb 07 |
jari |
264 |
if(useCentroidVariance) |
2 |
26 Feb 07 |
jari |
265 |
orderedClusters = getClustersBasedOnVarianceRank(algData, means, clusters, areGeneClusters); |
2 |
26 Feb 07 |
jari |
266 |
else |
2 |
26 Feb 07 |
jari |
267 |
orderedClusters = getClustersBasedOnEntropyRank(algData, means, clusters, areGeneClusters); |
2 |
26 Feb 07 |
jari |
268 |
|
2 |
26 Feb 07 |
jari |
269 |
int clusterCount = 0; |
2 |
26 Feb 07 |
jari |
270 |
Vector clusterVector = new Vector(); |
2 |
26 Feb 07 |
jari |
271 |
for(int i = 0; i < orderedClusters.length && clusterCount < numOfDesiredClusters; i++) { |
2 |
26 Feb 07 |
jari |
272 |
if(orderedClusters[i].length >= minClusterSize) { |
2 |
26 Feb 07 |
jari |
273 |
clusterVector.add(orderedClusters[i]); |
2 |
26 Feb 07 |
jari |
274 |
clusterCount++; |
2 |
26 Feb 07 |
jari |
275 |
} |
2 |
26 Feb 07 |
jari |
276 |
} |
2 |
26 Feb 07 |
jari |
277 |
|
2 |
26 Feb 07 |
jari |
278 |
selectedClusters = new int[clusterVector.size()][]; |
2 |
26 Feb 07 |
jari |
279 |
|
2 |
26 Feb 07 |
jari |
280 |
for(int i = 0; i < selectedClusters.length; i++) |
2 |
26 Feb 07 |
jari |
281 |
selectedClusters[i] = (int [])(clusterVector.elementAt(i)); |
2 |
26 Feb 07 |
jari |
282 |
} |
2 |
26 Feb 07 |
jari |
283 |
return selectedClusters; |
2 |
26 Feb 07 |
jari |
284 |
} |
2 |
26 Feb 07 |
jari |
285 |
|
2 |
26 Feb 07 |
jari |
286 |
|
2 |
26 Feb 07 |
jari |
/** Applys the diveristy rank cluster selection. |
2 |
26 Feb 07 |
jari |
* @param algData parameters |
2 |
26 Feb 07 |
jari |
* @param data Input data |
2 |
26 Feb 07 |
jari |
* @param inputClusters clusters |
2 |
26 Feb 07 |
jari |
* @param geneClusters indicates nature of input clusters |
2 |
26 Feb 07 |
jari |
* @param useCentroids indicates if centroids should be used or if diversity should |
2 |
26 Feb 07 |
jari |
* be intra-gene distances. |
2 |
26 Feb 07 |
jari |
* @param function distance function |
2 |
26 Feb 07 |
jari |
* @param absolute is distance absolute |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
297 |
*/ |
2 |
26 Feb 07 |
jari |
298 |
private int [][] getClustersBasedOnDiversityRank(AlgorithmData algData, FloatMatrix data, int [][] inputClusters, boolean geneClusters, boolean useCentroids, int function, boolean absolute) { |
2 |
26 Feb 07 |
jari |
299 |
FloatMatrix means; |
2 |
26 Feb 07 |
jari |
300 |
float [] diversities; |
2 |
26 Feb 07 |
jari |
301 |
int [][] newClusters = new int[inputClusters.length][]; |
2 |
26 Feb 07 |
jari |
302 |
|
2 |
26 Feb 07 |
jari |
//insures that means are correct |
2 |
26 Feb 07 |
jari |
304 |
if(!geneClusters) |
2 |
26 Feb 07 |
jari |
305 |
data = data.transpose(); |
2 |
26 Feb 07 |
jari |
306 |
|
2 |
26 Feb 07 |
jari |
307 |
if(useCentroids) { |
2 |
26 Feb 07 |
jari |
308 |
means = getMeans(data, inputClusters); |
2 |
26 Feb 07 |
jari |
309 |
diversities = getCentroidBasedDiversities(data, means, inputClusters, function, absolute); |
2 |
26 Feb 07 |
jari |
310 |
} else { |
2 |
26 Feb 07 |
jari |
311 |
diversities = getGeneBasedDiversities(data, inputClusters, function, absolute); |
2 |
26 Feb 07 |
jari |
312 |
} |
2 |
26 Feb 07 |
jari |
313 |
|
2 |
26 Feb 07 |
jari |
314 |
QSort sort = new QSort(diversities); |
2 |
26 Feb 07 |
jari |
315 |
diversities = sort.getSorted(); |
2 |
26 Feb 07 |
jari |
316 |
|
2 |
26 Feb 07 |
jari |
317 |
int [] origOrder = sort.getOrigIndx(); |
2 |
26 Feb 07 |
jari |
318 |
|
2 |
26 Feb 07 |
jari |
319 |
for(int i = 0; i < newClusters.length; i++) |
2 |
26 Feb 07 |
jari |
320 |
newClusters[i] = inputClusters[origOrder[i]]; |
2 |
26 Feb 07 |
jari |
321 |
|
2 |
26 Feb 07 |
jari |
//store results |
2 |
26 Feb 07 |
jari |
323 |
String [] diversityArray = new String[diversities.length]; |
2 |
26 Feb 07 |
jari |
324 |
String [] clusterPop = new String[diversities.length]; |
2 |
26 Feb 07 |
jari |
325 |
for(int i = 0; i < diversities.length; i++) { |
2 |
26 Feb 07 |
jari |
326 |
diversityArray[i] = String.valueOf(diversities[i]); |
2 |
26 Feb 07 |
jari |
327 |
clusterPop[i] = String.valueOf(newClusters[i].length); |
2 |
26 Feb 07 |
jari |
328 |
} |
2 |
26 Feb 07 |
jari |
329 |
algData.addStringArray("diversity-value-array", diversityArray); |
2 |
26 Feb 07 |
jari |
330 |
algData.addStringArray("cluster-population-array", clusterPop); |
2 |
26 Feb 07 |
jari |
331 |
|
2 |
26 Feb 07 |
jari |
332 |
|
2 |
26 Feb 07 |
jari |
//transpose to restore original ordering |
2 |
26 Feb 07 |
jari |
334 |
if(!geneClusters) |
2 |
26 Feb 07 |
jari |
335 |
data = data.transpose(); |
2 |
26 Feb 07 |
jari |
336 |
|
2 |
26 Feb 07 |
jari |
337 |
return newClusters; |
2 |
26 Feb 07 |
jari |
338 |
} |
2 |
26 Feb 07 |
jari |
339 |
|
2 |
26 Feb 07 |
jari |
340 |
|
2 |
26 Feb 07 |
jari |
/** Calculates means for the clusters |
2 |
26 Feb 07 |
jari |
* @param data Expression matrix |
2 |
26 Feb 07 |
jari |
* @param clusters cluster indices |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
345 |
*/ |
2 |
26 Feb 07 |
jari |
346 |
private FloatMatrix getMeans(FloatMatrix data, int [][] clusters){ |
2 |
26 Feb 07 |
jari |
347 |
FloatMatrix means = new FloatMatrix(clusters.length, data.getColumnDimension()); |
2 |
26 Feb 07 |
jari |
348 |
for(int i = 0; i < clusters.length; i++){ |
2 |
26 Feb 07 |
jari |
349 |
means.A[i] = getMeans(data, clusters[i]); |
2 |
26 Feb 07 |
jari |
350 |
} |
2 |
26 Feb 07 |
jari |
351 |
return means; |
2 |
26 Feb 07 |
jari |
352 |
} |
2 |
26 Feb 07 |
jari |
353 |
|
2 |
26 Feb 07 |
jari |
354 |
|
2 |
26 Feb 07 |
jari |
/** Returns a set of means for an element |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
* @param data input data |
2 |
26 Feb 07 |
jari |
* @param indices indices to use */ |
2 |
26 Feb 07 |
jari |
359 |
private float [] getMeans(FloatMatrix data, int [] indices){ |
2 |
26 Feb 07 |
jari |
360 |
int nSamples = data.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
361 |
float [] means = new float[nSamples]; |
2 |
26 Feb 07 |
jari |
362 |
float sum = 0; |
2 |
26 Feb 07 |
jari |
363 |
float n = 0; |
2 |
26 Feb 07 |
jari |
364 |
float value; |
2 |
26 Feb 07 |
jari |
365 |
for(int i = 0; i < nSamples; i++){ |
2 |
26 Feb 07 |
jari |
366 |
n = 0; |
2 |
26 Feb 07 |
jari |
367 |
sum = 0; |
2 |
26 Feb 07 |
jari |
368 |
for(int j = 0; j < indices.length; j++){ |
2 |
26 Feb 07 |
jari |
369 |
value = data.get(indices[j],i); |
2 |
26 Feb 07 |
jari |
370 |
if(!Float.isNaN(value)){ |
2 |
26 Feb 07 |
jari |
371 |
sum += value; |
2 |
26 Feb 07 |
jari |
372 |
n++; |
2 |
26 Feb 07 |
jari |
373 |
} |
2 |
26 Feb 07 |
jari |
374 |
} |
2 |
26 Feb 07 |
jari |
375 |
if(n > 0) |
2 |
26 Feb 07 |
jari |
376 |
means[i] = sum/n; |
2 |
26 Feb 07 |
jari |
377 |
else |
2 |
26 Feb 07 |
jari |
378 |
means[i] = Float.NaN; |
2 |
26 Feb 07 |
jari |
379 |
} |
2 |
26 Feb 07 |
jari |
380 |
return means; |
2 |
26 Feb 07 |
jari |
381 |
} |
2 |
26 Feb 07 |
jari |
382 |
|
2 |
26 Feb 07 |
jari |
/** Gest the centroid based diversities |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
* @param data Input data |
2 |
26 Feb 07 |
jari |
* @param means Centroids (means patterns) |
2 |
26 Feb 07 |
jari |
* @param clusters cluster indicies |
2 |
26 Feb 07 |
jari |
* @param function distance function |
2 |
26 Feb 07 |
jari |
* @param absolute use absolute distance? */ |
2 |
26 Feb 07 |
jari |
390 |
private float [] getCentroidBasedDiversities(FloatMatrix data, FloatMatrix means, int [][] clusters, int function, boolean absolute) { |
2 |
26 Feb 07 |
jari |
391 |
float [] div = new float[clusters.length]; |
2 |
26 Feb 07 |
jari |
392 |
|
2 |
26 Feb 07 |
jari |
393 |
for(int i = 0; i < div.length; i++) { |
2 |
26 Feb 07 |
jari |
394 |
div[i] = 0; |
2 |
26 Feb 07 |
jari |
395 |
for(int j = 0; j < clusters[i].length; j++) { |
2 |
26 Feb 07 |
jari |
396 |
div[i] += ExperimentUtil.geneDistance(means, data, i, clusters[i][j], function, 1.0f, absolute); |
2 |
26 Feb 07 |
jari |
397 |
} |
2 |
26 Feb 07 |
jari |
398 |
div[i] /= clusters[i].length; |
2 |
26 Feb 07 |
jari |
399 |
} |
2 |
26 Feb 07 |
jari |
400 |
return div; |
2 |
26 Feb 07 |
jari |
401 |
} |
2 |
26 Feb 07 |
jari |
402 |
|
2 |
26 Feb 07 |
jari |
/** get diversity based on intra gene diatance. |
2 |
26 Feb 07 |
jari |
* @param data |
2 |
26 Feb 07 |
jari |
* @param clusters |
2 |
26 Feb 07 |
jari |
* @param function |
2 |
26 Feb 07 |
jari |
* @param absolute |
2 |
26 Feb 07 |
jari |
* @return |
2 |
26 Feb 07 |
jari |
409 |
*/ |
2 |
26 Feb 07 |
jari |
410 |
private float [] getGeneBasedDiversities(FloatMatrix data, int [][] clusters, int function, boolean absolute) { |
2 |
26 Feb 07 |
jari |
411 |
float [] div = new float[clusters.length]; |
2 |
26 Feb 07 |
jari |
412 |
|
2 |
26 Feb 07 |
jari |
//for each cluser |
2 |
26 Feb 07 |
jari |
414 |
for(int i = 0; i < div.length; i++) { |
2 |
26 Feb 07 |
jari |
415 |
div[i] = 0; |
2 |
26 Feb 07 |
jari |
416 |
for(int j = 0; j < clusters[i].length; j++) { |
2 |
26 Feb 07 |
jari |
417 |
for(int k = 0; k < clusters[i].length/2; k++) { |
2 |
26 Feb 07 |
jari |
418 |
div[i] += ExperimentUtil.geneDistance(data, data, clusters[i][j], clusters[i][k], function, 1.0f, absolute); |
2 |
26 Feb 07 |
jari |
419 |
} |
2 |
26 Feb 07 |
jari |
420 |
} |
2 |
26 Feb 07 |
jari |
421 |
div[i] /= (Math.pow(clusters[i].length, 2.0))/2-(clusters[i].length)/2; |
2 |
26 Feb 07 |
jari |
422 |
} |
2 |
26 Feb 07 |
jari |
423 |
return div; |
2 |
26 Feb 07 |
jari |
424 |
} |
2 |
26 Feb 07 |
jari |
425 |
|
2 |
26 Feb 07 |
jari |
/** Select clusters based on variance ranking |
2 |
26 Feb 07 |
jari |
* @param algData |
2 |
26 Feb 07 |
jari |
* @param means |
2 |
26 Feb 07 |
jari |
* @param clusters |
2 |
26 Feb 07 |
jari |
* @param areGeneClusters |
2 |
26 Feb 07 |
jari |
* @return */ |
2 |
26 Feb 07 |
jari |
432 |
private int [][] getClustersBasedOnVarianceRank(AlgorithmData algData, FloatMatrix means, int [][] clusters, boolean areGeneClusters) { |
2 |
26 Feb 07 |
jari |
433 |
float [] variances = getCentroidVariances(means); |
2 |
26 Feb 07 |
jari |
434 |
int [][] newClusters = new int[clusters.length][]; |
2 |
26 Feb 07 |
jari |
435 |
|
2 |
26 Feb 07 |
jari |
436 |
QSort sort = new QSort(variances, QSort.DESCENDING); |
2 |
26 Feb 07 |
jari |
437 |
variances = sort.getSorted(); |
2 |
26 Feb 07 |
jari |
438 |
|
2 |
26 Feb 07 |
jari |
439 |
int [] origOrder = sort.getOrigIndx(); |
2 |
26 Feb 07 |
jari |
440 |
|
2 |
26 Feb 07 |
jari |
441 |
for(int i = 0; i < newClusters.length; i++) |
2 |
26 Feb 07 |
jari |
442 |
newClusters[i] = clusters[origOrder[i]]; |
2 |
26 Feb 07 |
jari |
443 |
|
2 |
26 Feb 07 |
jari |
//store results |
2 |
26 Feb 07 |
jari |
445 |
String [] varianceArray = new String[variances.length]; |
2 |
26 Feb 07 |
jari |
446 |
String [] clusterPop = new String[variances.length]; |
2 |
26 Feb 07 |
jari |
447 |
for(int i = 0; i < variances.length; i++) { |
2 |
26 Feb 07 |
jari |
448 |
varianceArray[i] = String.valueOf(variances[i]); |
2 |
26 Feb 07 |
jari |
449 |
clusterPop[i] = String.valueOf(newClusters[i].length); |
2 |
26 Feb 07 |
jari |
450 |
} |
2 |
26 Feb 07 |
jari |
451 |
algData.addStringArray("diversity-value-array", varianceArray); |
2 |
26 Feb 07 |
jari |
452 |
algData.addStringArray("cluster-population-array", clusterPop); |
2 |
26 Feb 07 |
jari |
453 |
|
2 |
26 Feb 07 |
jari |
454 |
return newClusters; |
2 |
26 Feb 07 |
jari |
455 |
} |
2 |
26 Feb 07 |
jari |
456 |
|
2 |
26 Feb 07 |
jari |
/** returns centroid based variances |
2 |
26 Feb 07 |
jari |
458 |
*/ |
2 |
26 Feb 07 |
jari |
459 |
private float [] getCentroidVariances(FloatMatrix means) { |
2 |
26 Feb 07 |
jari |
460 |
float [] vars = new float[means.getRowDimension()]; |
2 |
26 Feb 07 |
jari |
461 |
int cols = means.getColumnDimension(); |
2 |
26 Feb 07 |
jari |
462 |
float sos; |
2 |
26 Feb 07 |
jari |
463 |
float mean; |
2 |
26 Feb 07 |
jari |
464 |
|
2 |
26 Feb 07 |
jari |
465 |
for(int i = 0; i < vars.length; i++) { |
2 |
26 Feb 07 |
jari |
466 |
sos = 0; |
2 |
26 Feb 07 |
jari |
467 |
mean = getMean(means.A[i]); |
2 |
26 Feb 07 |
jari |
468 |
for(int j = 0; j < cols ; j++) { |
2 |
26 Feb 07 |
jari |
469 |
sos += Math.pow((means.A[i][j]-mean),2); |
2 |
26 Feb 07 |
jari |
470 |
} |
2 |
26 Feb 07 |
jari |
471 |
vars[i] = sos; |
2 |
26 Feb 07 |
jari |
472 |
} |
2 |
26 Feb 07 |
jari |
473 |
return vars; |
2 |
26 Feb 07 |
jari |
474 |
} |
2 |
26 Feb 07 |
jari |
475 |
|
2 |
26 Feb 07 |
jari |
/** returns a mean value for input |
2 |
26 Feb 07 |
jari |
477 |
*/ |
2 |
26 Feb 07 |
jari |
478 |
private float getMean(float [] vals) { |
2 |
26 Feb 07 |
jari |
479 |
float mean = 0; |
2 |
26 Feb 07 |
jari |
480 |
int n = 0; |
2 |
26 Feb 07 |
jari |
481 |
for(int i = 0; i < vals.length; i++) { |
2 |
26 Feb 07 |
jari |
482 |
if(!Float.isNaN(vals[i])) { |
2 |
26 Feb 07 |
jari |
483 |
n++; |
2 |
26 Feb 07 |
jari |
484 |
mean += vals[i]; |
2 |
26 Feb 07 |
jari |
485 |
} |
2 |
26 Feb 07 |
jari |
486 |
} |
2 |
26 Feb 07 |
jari |
487 |
return (n > 0 ? ((float)(mean/n)) : 0f); |
2 |
26 Feb 07 |
jari |
488 |
} |
2 |
26 Feb 07 |
jari |
489 |
|
2 |
26 Feb 07 |
jari |
/** returns clusters based on entropy ranking. |
2 |
26 Feb 07 |
jari |
491 |
*/ |
2 |
26 Feb 07 |
jari |
492 |
private int [][] getClustersBasedOnEntropyRank(AlgorithmData algData, FloatMatrix means, int [][] clusters, boolean areGeneClusters) { |
2 |
26 Feb 07 |
jari |
493 |
float [] entropies = new float[means.getRowDimension()]; |
2 |
26 Feb 07 |
jari |
494 |
|
2 |
26 Feb 07 |
jari |
495 |
for(int i = 0; i < entropies.length; i++) { |
2 |
26 Feb 07 |
jari |
496 |
entropies[i] = (float)(getEntropy(means.A[i])); |
2 |
26 Feb 07 |
jari |
497 |
} |
2 |
26 Feb 07 |
jari |
498 |
|
2 |
26 Feb 07 |
jari |
499 |
int [][] newClusters = new int[clusters.length][]; |
2 |
26 Feb 07 |
jari |
500 |
|
2 |
26 Feb 07 |
jari |
501 |
QSort sort = new QSort(entropies, QSort.DESCENDING); |
2 |
26 Feb 07 |
jari |
502 |
entropies = sort.getSorted(); |
2 |
26 Feb 07 |
jari |
503 |
|
2 |
26 Feb 07 |
jari |
504 |
int [] origOrder = sort.getOrigIndx(); |
2 |
26 Feb 07 |
jari |
505 |
|
2 |
26 Feb 07 |
jari |
506 |
for(int i = 0; i < newClusters.length; i++) |
2 |
26 Feb 07 |
jari |
507 |
newClusters[i] = clusters[origOrder[i]]; |
2 |
26 Feb 07 |
jari |
508 |
|
2 |
26 Feb 07 |
jari |
//store results |
2 |
26 Feb 07 |
jari |
510 |
String [] varianceArray = new String[entropies.length]; |
2 |
26 Feb 07 |
jari |
511 |
String [] clusterPop = new String[entropies.length]; |
2 |
26 Feb 07 |
jari |
512 |
for(int i = 0; i < entropies.length; i++) { |
2 |
26 Feb 07 |
jari |
513 |
varianceArray[i] = String.valueOf(entropies[i]); |
2 |
26 Feb 07 |
jari |
514 |
clusterPop[i] = String.valueOf(newClusters[i].length); |
2 |
26 Feb 07 |
jari |
515 |
} |
2 |
26 Feb 07 |
jari |
516 |
algData.addStringArray("diversity-value-array", varianceArray); |
2 |
26 Feb 07 |
jari |
517 |
algData.addStringArray("cluster-population-array", clusterPop); |
2 |
26 Feb 07 |
jari |
518 |
|
2 |
26 Feb 07 |
jari |
519 |
return newClusters; |
2 |
26 Feb 07 |
jari |
520 |
} |
2 |
26 Feb 07 |
jari |
521 |
|
2 |
26 Feb 07 |
jari |
522 |
|
2 |
26 Feb 07 |
jari |
/** returns the entropy of a set of values, entropy method extracted from |
2 |
26 Feb 07 |
jari |
* RN.java. |
2 |
26 Feb 07 |
jari |
525 |
*/ |
2 |
26 Feb 07 |
jari |
526 |
private double getEntropy(float[] pVector) { |
2 |
26 Feb 07 |
jari |
527 |
int c_DecileCount = 10; |
2 |
26 Feb 07 |
jari |
528 |
double fltMin = Double.MAX_VALUE; |
2 |
26 Feb 07 |
jari |
529 |
double fltMax = -Double.MAX_VALUE; |
2 |
26 Feb 07 |
jari |
530 |
int i=0; |
2 |
26 Feb 07 |
jari |
531 |
int[] arrDeciles = new int[c_DecileCount]; |
2 |
26 Feb 07 |
jari |
532 |
|
2 |
26 Feb 07 |
jari |
533 |
final int iSize = pVector.length; |
2 |
26 Feb 07 |
jari |
534 |
int iValCount = 0; |
2 |
26 Feb 07 |
jari |
535 |
for (i=0; i<iSize; i++) { |
2 |
26 Feb 07 |
jari |
536 |
if (Double.isNaN(pVector[i])) |
2 |
26 Feb 07 |
jari |
537 |
continue; |
2 |
26 Feb 07 |
jari |
538 |
fltMin = Math.min(fltMin, pVector[i]); |
2 |
26 Feb 07 |
jari |
539 |
fltMax = Math.max(fltMax, pVector[i]); |
2 |
26 Feb 07 |
jari |
540 |
iValCount++; |
2 |
26 Feb 07 |
jari |
541 |
} |
2 |
26 Feb 07 |
jari |
542 |
|
2 |
26 Feb 07 |
jari |
543 |
double fltStep = (fltMax-fltMin)/(c_DecileCount); |
2 |
26 Feb 07 |
jari |
544 |
if (fltStep == 0d) { |
2 |
26 Feb 07 |
jari |
545 |
return -1.0*Math.log(1.0)/(Math.log(2.0)); |
2 |
26 Feb 07 |
jari |
546 |
} |
2 |
26 Feb 07 |
jari |
547 |
|
2 |
26 Feb 07 |
jari |
548 |
if (fltMin == Double.MAX_VALUE) |
2 |
26 Feb 07 |
jari |
549 |
return 0d; |
2 |
26 Feb 07 |
jari |
550 |
|
2 |
26 Feb 07 |
jari |
551 |
Arrays.fill(arrDeciles, 0); |
2 |
26 Feb 07 |
jari |
552 |
for (i=0; i<iSize; i++) { |
2 |
26 Feb 07 |
jari |
553 |
if (Double.isNaN(pVector[i])) |
2 |
26 Feb 07 |
jari |
554 |
continue; |
2 |
26 Feb 07 |
jari |
555 |
int iDecileInd = (int)Math.ceil((pVector[i]-fltMin)/fltStep)-1; |
2 |
26 Feb 07 |
jari |
556 |
if (iDecileInd < 0) { |
2 |
26 Feb 07 |
jari |
557 |
iDecileInd = 0; |
2 |
26 Feb 07 |
jari |
558 |
} |
2 |
26 Feb 07 |
jari |
559 |
arrDeciles[iDecileInd]++; |
2 |
26 Feb 07 |
jari |
560 |
} |
2 |
26 Feb 07 |
jari |
561 |
if (iValCount == 0) |
2 |
26 Feb 07 |
jari |
562 |
return 0d; |
2 |
26 Feb 07 |
jari |
563 |
|
2 |
26 Feb 07 |
jari |
// finally, calculate entropy |
2 |
26 Feb 07 |
jari |
565 |
double dblEntropy=0; |
2 |
26 Feb 07 |
jari |
566 |
|
2 |
26 Feb 07 |
jari |
567 |
for (i=0; i<c_DecileCount; i++) { |
2 |
26 Feb 07 |
jari |
568 |
if (arrDeciles[i] == 0) { |
2 |
26 Feb 07 |
jari |
569 |
continue; |
2 |
26 Feb 07 |
jari |
570 |
} |
2 |
26 Feb 07 |
jari |
571 |
double dblPx=((double)arrDeciles[i])/iValCount; |
2 |
26 Feb 07 |
jari |
572 |
dblEntropy += dblPx*Math.log(dblPx)/(Math.log(2.0)); // log2(x)==log(x)/log(2) |
2 |
26 Feb 07 |
jari |
573 |
} |
2 |
26 Feb 07 |
jari |
574 |
return -dblEntropy; |
2 |
26 Feb 07 |
jari |
575 |
} |
2 |
26 Feb 07 |
jari |
576 |
|
2 |
26 Feb 07 |
jari |
/** Applies the detection filter trim |
2 |
26 Feb 07 |
jari |
578 |
*/ |
2 |
26 Feb 07 |
jari |
579 |
private Experiment applyAffyDetectionFilter(AlgorithmData algData) { |
2 |
26 Feb 07 |
jari |
580 |
|
2 |
26 Feb 07 |
jari |
//parameters |
2 |
26 Feb 07 |
jari |
582 |
AlgorithmParameters params = algData.getParams(); |
2 |
26 Feb 07 |
jari |
583 |
|
2 |
26 Feb 07 |
jari |
//get IData |
2 |
26 Feb 07 |
jari |
585 |
IData data = framework.getData(); |
2 |
26 Feb 07 |
jari |
586 |
|
2 |
26 Feb 07 |
jari |
//extract indices in Experiment to check |
2 |
26 Feb 07 |
jari |
588 |
int [] rows = experiment.getRowMappingArrayCopy(); |
2 |
26 Feb 07 |
jari |
589 |
int [] cols = experiment.getColumnIndicesCopy(); |
2 |
26 Feb 07 |
jari |
590 |
int numGenes = experiment.getNumberOfGenes(); |
2 |
26 Feb 07 |
jari |
591 |
int expCount = experiment.getNumberOfSamples(); |
2 |
26 Feb 07 |
jari |
592 |
|
2 |
26 Feb 07 |
jari |
593 |
|
2 |
26 Feb 07 |
jari |
//construct DetectionFilter and set properties |
2 |
26 Feb 07 |
jari |
595 |
String [] expNames = new String[expCount]; |
2 |
26 Feb 07 |
jari |
596 |
for(int i = 0; i < expNames.length; i++) { |
2 |
26 Feb 07 |
jari |
597 |
expNames[i] = data.getFullSampleName(cols[i]); |
2 |
26 Feb 07 |
jari |
598 |
} |
2 |
26 Feb 07 |
jari |
599 |
DetectionFilter filter = new DetectionFilter(expNames); |
2 |
26 Feb 07 |
jari |
600 |
filter.set_both(params.getBoolean("is-required-in-both-groups")); |
2 |
26 Feb 07 |
jari |
601 |
|
2 |
26 Feb 07 |
jari |
602 |
int [] numReq = algData.getIntArray("number-required"); |
2 |
26 Feb 07 |
jari |
// filter.set_num_required(algData.getIntArray("number-required")); |
2 |
26 Feb 07 |
jari |
//this takes a group number, and a number required |
2 |
26 Feb 07 |
jari |
605 |
for(int i = 0; i < numReq.length; i++) { |
2 |
26 Feb 07 |
jari |
606 |
filter.set_num_required(i, numReq[i]); |
2 |
26 Feb 07 |
jari |
607 |
} |
2 |
26 Feb 07 |
jari |
608 |
|
2 |
26 Feb 07 |
jari |
609 |
int [] groupMembership = algData.getIntArray("group-memberships"); |
2 |
26 Feb 07 |
jari |
//filter.set_group_membership(); |
2 |
26 Feb 07 |
jari |
//takes a group index and the file (col) index? |
2 |
26 Feb 07 |
jari |
612 |
for(int i = 0; i < groupMembership.length; i++) { |
2 |
26 Feb 07 |
jari |
613 |
filter.set_group_membership(groupMembership[i], i); |
2 |
26 Feb 07 |
jari |
614 |
} |
2 |
26 Feb 07 |
jari |
615 |
|
2 |
26 Feb 07 |
jari |
616 |
String [] detectionCalls = new String[expCount]; |
2 |
26 Feb 07 |
jari |
617 |
String detection; |
2 |
26 Feb 07 |
jari |
618 |
ISlideData slideData; //supports getDetection(int row) |
2 |
26 Feb 07 |
jari |
619 |
boolean [] isPresent = new boolean[numGenes]; |
2 |
26 Feb 07 |
jari |
620 |
int numPresent = 0; |
2 |
26 Feb 07 |
jari |
621 |
boolean present; |
2 |
26 Feb 07 |
jari |
622 |
|
2 |
26 Feb 07 |
jari |
//tally present genes |
2 |
26 Feb 07 |
jari |
624 |
for(int probe = 0; probe < numGenes; probe++) { |
2 |
26 Feb 07 |
jari |
625 |
for(int exp = 0; exp < expCount; exp++) { |
2 |
26 Feb 07 |
jari |
626 |
detectionCalls[exp] = data.getFeature(cols[exp]).getDetection(rows[probe]); |
2 |
26 Feb 07 |
jari |
627 |
} |
2 |
26 Feb 07 |
jari |
628 |
present = filter.keep_gene(detectionCalls); |
2 |
26 Feb 07 |
jari |
629 |
if(present) { |
2 |
26 Feb 07 |
jari |
630 |
isPresent[probe] = true; |
2 |
26 Feb 07 |
jari |
631 |
numPresent++; |
2 |
26 Feb 07 |
jari |
632 |
} |
2 |
26 Feb 07 |
jari |
633 |
} |
2 |
26 Feb 07 |
jari |
634 |
|
2 |
26 Feb 07 |
jari |
//construct Experiment |
2 |
26 Feb 07 |
jari |
636 |
FloatMatrix matrix = experiment.getMatrix(); |
2 |
26 Feb 07 |
jari |
637 |
float [][] values = new float[numPresent][expCount]; |
2 |
26 Feb 07 |
jari |
638 |
int [] rowMap = new int[numPresent]; |
2 |
26 Feb 07 |
jari |
639 |
int cnt = 0; |
2 |
26 Feb 07 |
jari |
640 |
for(int i = 0; i < numGenes; i++) { |
2 |
26 Feb 07 |
jari |
641 |
if(isPresent[i]) { |
2 |
26 Feb 07 |
jari |
642 |
rowMap[cnt] = rows[i]; |
2 |
26 Feb 07 |
jari |
643 |
values[cnt] = matrix.A[i]; |
2 |
26 Feb 07 |
jari |
644 |
cnt++; |
2 |
26 Feb 07 |
jari |
645 |
} |
2 |
26 Feb 07 |
jari |
646 |
} |
2 |
26 Feb 07 |
jari |
647 |
|
2 |
26 Feb 07 |
jari |
648 |
return (new Experiment(new FloatMatrix(values), cols, rowMap)); |
2 |
26 Feb 07 |
jari |
649 |
} |
2 |
26 Feb 07 |
jari |
650 |
|
2 |
26 Feb 07 |
jari |
651 |
|
2 |
26 Feb 07 |
jari |
652 |
|
2 |
26 Feb 07 |
jari |
/** applies the affy fold filter trim |
2 |
26 Feb 07 |
jari |
* @param algData |
2 |
26 Feb 07 |
jari |
* @return */ |
2 |
26 Feb 07 |
jari |
656 |
private Experiment applyAffyFoldFilter(AlgorithmData algData) { |
2 |
26 Feb 07 |
jari |
657 |
|
2 |
26 Feb 07 |
jari |
//parameters |
2 |
26 Feb 07 |
jari |
659 |
AlgorithmParameters params = algData.getParams(); |
2 |
26 Feb 07 |
jari |
660 |
|
2 |
26 Feb 07 |
jari |
//get IData |
2 |
26 Feb 07 |
jari |
662 |
IData data = framework.getData(); |
2 |
26 Feb 07 |
jari |
663 |
FloatMatrix matrix = experiment.getMatrix(); |
2 |
26 Feb 07 |
jari |
664 |
|
2 |
26 Feb 07 |
jari |
//extract indices in Experiment to check |
2 |
26 Feb 07 |
jari |
666 |
int [] rows = experiment.getRowMappingArrayCopy(); |
2 |
26 Feb 07 |
jari |
667 |
int [] cols = experiment.getColumnIndicesCopy(); |
2 |
26 Feb 07 |
jari |
668 |
int numGenes = experiment.getNumberOfGenes(); |
2 |
26 Feb 07 |
jari |
669 |
int expCount = experiment.getNumberOfSamples(); |
2 |
26 Feb 07 |
jari |
670 |
|
2 |
26 Feb 07 |
jari |
671 |
|
2 |
26 Feb 07 |
jari |
//construct DetectionFilter and set properties |
2 |
26 Feb 07 |
jari |
673 |
String [] expNames = new String[expCount]; |
2 |
26 Feb 07 |
jari |
674 |
for(int i = 0; i < expNames.length; i++) { |
2 |
26 Feb 07 |
jari |
675 |
expNames[i] = data.getFullSampleName(cols[i]); |
2 |
26 Feb 07 |
jari |
676 |
} |
2 |
26 Feb 07 |
jari |
677 |
FoldFilter filter = new FoldFilter(expNames); |
2 |
26 Feb 07 |
jari |
678 |
float foldChange = params.getFloat("fold-change"); |
2 |
26 Feb 07 |
jari |
679 |
int [] numReq = algData.getIntArray("number-of-members"); |
2 |
26 Feb 07 |
jari |
680 |
String divider = params.getString("divider-string"); |
2 |
26 Feb 07 |
jari |
681 |
|
2 |
26 Feb 07 |
jari |
682 |
filter.set_fold_change(foldChange); |
2 |
26 Feb 07 |
jari |
683 |
filter.set_divider(divider); |
2 |
26 Feb 07 |
jari |
684 |
|
2 |
26 Feb 07 |
jari |
685 |
int [] groupMembership = algData.getIntArray("group-memberships"); |
2 |
26 Feb 07 |
jari |
686 |
|
2 |
26 Feb 07 |
jari |
687 |
for(int i = 0; i < groupMembership.length; i++) { |
2 |
26 Feb 07 |
jari |
688 |
filter.set_group_membership(groupMembership[i], i); |
2 |
26 Feb 07 |
jari |
689 |
} |
2 |
26 Feb 07 |
jari |
690 |
|
2 |
26 Feb 07 |
jari |
691 |
float [] foldHits = new float[expCount]; |
2 |
26 Feb 07 |
jari |
692 |
String detection; |
2 |
26 Feb 07 |
jari |
693 |
ISlideData slideData; //supports getDetection(int row) |
2 |
26 Feb 07 |
jari |
694 |
boolean [] isPresent = new boolean[numGenes]; |
2 |
26 Feb 07 |
jari |
695 |
int numPresent = 0; |
2 |
26 Feb 07 |
jari |
696 |
boolean present; |
2 |
26 Feb 07 |
jari |
697 |
float [] vals = new float[numGenes]; |
2 |
26 Feb 07 |
jari |
//tally present genes |
2 |
26 Feb 07 |
jari |
699 |
for(int probe = 0; probe < numGenes; probe++) { |
2 |
26 Feb 07 |
jari |
//for(int exp = 0; exp < expCount; exp++) { |
2 |
26 Feb 07 |
jari |
// foldHits[exp] = data.getFeature(cols[exp]).get_fold_change(rows[probe]); |
2 |
26 Feb 07 |
jari |
702 |
//} |
2 |
26 Feb 07 |
jari |
703 |
present = filter.keep_gene(matrix.A[probe]); |
2 |
26 Feb 07 |
jari |
704 |
if(present) { |
2 |
26 Feb 07 |
jari |
705 |
isPresent[probe] = true; |
2 |
26 Feb 07 |
jari |
706 |
numPresent++; |
2 |
26 Feb 07 |
jari |
707 |
} |
2 |
26 Feb 07 |
jari |
708 |
} |
2 |
26 Feb 07 |
jari |
709 |
|
2 |
26 Feb 07 |
jari |
//construct Experiment |
2 |
26 Feb 07 |
jari |
711 |
float [][] values = new float[numPresent][expCount]; |
2 |
26 Feb 07 |
jari |
712 |
int [] rowMap = new int[numPresent]; |
2 |
26 Feb 07 |
jari |
713 |
int cnt = 0; |
2 |
26 Feb 07 |
jari |
714 |
for(int i = 0; i < numGenes; i++) { |
2 |
26 Feb 07 |
jari |
715 |
if(isPresent[i]) { |
2 |
26 Feb 07 |
jari |
716 |
rowMap[cnt] = rows[i]; |
2 |
26 Feb 07 |
jari |
717 |
values[cnt] = matrix.A[i]; |
2 |
26 Feb 07 |
jari |
718 |
cnt++; |
2 |
26 Feb 07 |
jari |
719 |
} |
2 |
26 Feb 07 |
jari |
720 |
} |
2 |
26 Feb 07 |
jari |
721 |
|
2 |
26 Feb 07 |
jari |
722 |
return (new Experiment(new FloatMatrix(values), cols, rowMap)); |
2 |
26 Feb 07 |
jari |
723 |
|
2 |
26 Feb 07 |
jari |
724 |
} |
2 |
26 Feb 07 |
jari |
725 |
|
2 |
26 Feb 07 |
jari |
726 |
|
2 |
26 Feb 07 |
jari |
727 |
|
2 |
26 Feb 07 |
jari |
728 |
} |