149 |
10 Aug 06 |
enell |
1 |
/* |
783 |
18 Sep 08 |
jari |
$Id$ |
783 |
18 Sep 08 |
jari |
3 |
|
783 |
18 Sep 08 |
jari |
Copyright (C) 2006 Johan Enell |
783 |
18 Sep 08 |
jari |
5 |
|
783 |
18 Sep 08 |
jari |
This file is part of the se.lu.onk.MergeBioAssay plug-in for |
783 |
18 Sep 08 |
jari |
BASE. Available at http://baseplugins.thep.lu.se/ and BASE web |
783 |
18 Sep 08 |
jari |
site is http://base.thep.lu.se |
783 |
18 Sep 08 |
jari |
9 |
|
783 |
18 Sep 08 |
jari |
This is free software; you can redistribute it and/or modify it |
783 |
18 Sep 08 |
jari |
under the terms of the GNU General Public License as published by |
783 |
18 Sep 08 |
jari |
the Free Software Foundation; either version 3 of the License, or |
783 |
18 Sep 08 |
jari |
(at your option) any later version. |
783 |
18 Sep 08 |
jari |
14 |
|
783 |
18 Sep 08 |
jari |
The software is distributed in the hope that it will be useful, but |
783 |
18 Sep 08 |
jari |
WITHOUT ANY WARRANTY; without even the implied warranty of |
783 |
18 Sep 08 |
jari |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
783 |
18 Sep 08 |
jari |
General Public License for more details. |
783 |
18 Sep 08 |
jari |
19 |
|
783 |
18 Sep 08 |
jari |
You should have received a copy of the GNU General Public License |
783 |
18 Sep 08 |
jari |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
783 |
18 Sep 08 |
jari |
22 |
*/ |
149 |
10 Aug 06 |
enell |
23 |
package mergebioassay.mergers; |
149 |
10 Aug 06 |
enell |
24 |
|
149 |
10 Aug 06 |
enell |
25 |
import java.util.HashMap; |
149 |
10 Aug 06 |
enell |
26 |
import java.util.List; |
149 |
10 Aug 06 |
enell |
27 |
|
149 |
10 Aug 06 |
enell |
28 |
import basefile.BASEFileReader; |
149 |
10 Aug 06 |
enell |
29 |
import basefile.BASEFileSection; |
149 |
10 Aug 06 |
enell |
30 |
|
149 |
10 Aug 06 |
enell |
31 |
|
149 |
10 Aug 06 |
enell |
32 |
/** |
149 |
10 Aug 06 |
enell |
* @author Johan Enell |
149 |
10 Aug 06 |
enell |
34 |
* |
149 |
10 Aug 06 |
enell |
* Abstract class for merging BioAssays. The only method that the sybclass must |
149 |
10 Aug 06 |
enell |
* implement is the merge metod. |
149 |
10 Aug 06 |
enell |
37 |
*/ |
149 |
10 Aug 06 |
enell |
38 |
public abstract class Merger |
149 |
10 Aug 06 |
enell |
39 |
{ |
149 |
10 Aug 06 |
enell |
40 |
protected int missingValues = 0; |
149 |
10 Aug 06 |
enell |
41 |
|
149 |
10 Aug 06 |
enell |
42 |
protected int nbrOfColumns = -1; |
149 |
10 Aug 06 |
enell |
43 |
|
149 |
10 Aug 06 |
enell |
44 |
protected int positionPos = -1; |
149 |
10 Aug 06 |
enell |
45 |
|
149 |
10 Aug 06 |
enell |
46 |
protected int assayDataPos = -1; |
149 |
10 Aug 06 |
enell |
47 |
|
149 |
10 Aug 06 |
enell |
48 |
protected int reporterPos = -1; |
149 |
10 Aug 06 |
enell |
49 |
|
149 |
10 Aug 06 |
enell |
50 |
protected List<String> assayFieldsMap; |
149 |
10 Aug 06 |
enell |
51 |
|
149 |
10 Aug 06 |
enell |
52 |
protected List<String> columnsMap; |
149 |
10 Aug 06 |
enell |
53 |
|
149 |
10 Aug 06 |
enell |
54 |
protected List<String> assaysMap; |
149 |
10 Aug 06 |
enell |
55 |
|
149 |
10 Aug 06 |
enell |
56 |
/** |
149 |
10 Aug 06 |
enell |
* Will merge the bioassays in the groups and print the result on <code>System.out</code>. |
149 |
10 Aug 06 |
enell |
58 |
* |
149 |
10 Aug 06 |
enell |
* @param section - the spot section with the parameters |
149 |
10 Aug 06 |
enell |
* @param reader - a baseFileReader to read the data |
149 |
10 Aug 06 |
enell |
* @param assayGroups - the group to be merged |
149 |
10 Aug 06 |
enell |
62 |
*/ |
149 |
10 Aug 06 |
enell |
63 |
public void merge(BASEFileSection section, BASEFileReader reader, HashMap<String, List<Merge_assay>> assayGroups) |
149 |
10 Aug 06 |
enell |
64 |
{} |
149 |
10 Aug 06 |
enell |
65 |
|
149 |
10 Aug 06 |
enell |
66 |
/** |
149 |
10 Aug 06 |
enell |
* Will initiate the merger by reading the parameters used in the merge |
149 |
10 Aug 06 |
enell |
* method. Usally called in the start of the merge method. |
149 |
10 Aug 06 |
enell |
69 |
* |
149 |
10 Aug 06 |
enell |
* @param section - the spot section with the parameters |
149 |
10 Aug 06 |
enell |
* @param reader - a baseFileReader to read the data |
149 |
10 Aug 06 |
enell |
72 |
*/ |
149 |
10 Aug 06 |
enell |
73 |
public void init(BASEFileSection section, BASEFileReader reader) |
149 |
10 Aug 06 |
enell |
74 |
{ |
149 |
10 Aug 06 |
enell |
75 |
assaysMap = section.findFieldList("assays"); |
149 |
10 Aug 06 |
enell |
76 |
if (assaysMap == null) |
149 |
10 Aug 06 |
enell |
77 |
{ |
149 |
10 Aug 06 |
enell |
78 |
System.err.print("assaysMap row in spots section not found\n"); |
149 |
10 Aug 06 |
enell |
79 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
80 |
} |
149 |
10 Aug 06 |
enell |
81 |
|
149 |
10 Aug 06 |
enell |
82 |
columnsMap = section.findFieldList("columns"); |
149 |
10 Aug 06 |
enell |
83 |
if (columnsMap == null) |
149 |
10 Aug 06 |
enell |
84 |
{ |
149 |
10 Aug 06 |
enell |
85 |
System.err.print("Columns row in spots section not found\n"); |
149 |
10 Aug 06 |
enell |
86 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
87 |
} |
149 |
10 Aug 06 |
enell |
88 |
|
149 |
10 Aug 06 |
enell |
89 |
assayFieldsMap = section.findFieldList("assayFields"); |
149 |
10 Aug 06 |
enell |
90 |
if (assayFieldsMap == null) |
149 |
10 Aug 06 |
enell |
91 |
{ |
149 |
10 Aug 06 |
enell |
92 |
System.err.print("assayFields row in spots section not found\n"); |
149 |
10 Aug 06 |
enell |
93 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
94 |
} |
149 |
10 Aug 06 |
enell |
95 |
nbrOfColumns = (columnsMap.size() - 1) + (assayFieldsMap.size() * assaysMap.size()); |
149 |
10 Aug 06 |
enell |
96 |
|
149 |
10 Aug 06 |
enell |
97 |
positionPos = columnsMap.indexOf("position"); |
149 |
10 Aug 06 |
enell |
98 |
if (positionPos == -1) |
149 |
10 Aug 06 |
enell |
99 |
{ |
149 |
10 Aug 06 |
enell |
100 |
System.err.print("position column in columns row missing\n"); |
149 |
10 Aug 06 |
enell |
101 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
102 |
} |
149 |
10 Aug 06 |
enell |
103 |
|
149 |
10 Aug 06 |
enell |
104 |
reporterPos = columnsMap.indexOf("reporter"); |
149 |
10 Aug 06 |
enell |
105 |
if (reporterPos == -1) |
149 |
10 Aug 06 |
enell |
106 |
{ |
149 |
10 Aug 06 |
enell |
107 |
System.err.print("reporter column in columns row missing\n"); |
149 |
10 Aug 06 |
enell |
108 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
109 |
} |
149 |
10 Aug 06 |
enell |
110 |
|
149 |
10 Aug 06 |
enell |
111 |
assayDataPos = columnsMap.indexOf("assayData"); |
149 |
10 Aug 06 |
enell |
112 |
if (assayDataPos == -1) |
149 |
10 Aug 06 |
enell |
113 |
{ |
149 |
10 Aug 06 |
enell |
114 |
System.err.print("assayData column in columns row missing\n"); |
149 |
10 Aug 06 |
enell |
115 |
System.exit(0); |
149 |
10 Aug 06 |
enell |
116 |
} |
149 |
10 Aug 06 |
enell |
117 |
} |
149 |
10 Aug 06 |
enell |
118 |
|
149 |
10 Aug 06 |
enell |
119 |
/** |
149 |
10 Aug 06 |
enell |
* Used to calculate the geometric mean. |
149 |
10 Aug 06 |
enell |
121 |
* |
149 |
10 Aug 06 |
enell |
* @param values - the values to calculate mean |
149 |
10 Aug 06 |
enell |
* @return the geometrc mean |
149 |
10 Aug 06 |
enell |
124 |
*/ |
149 |
10 Aug 06 |
enell |
125 |
protected static double geometricMean(double[] values) |
149 |
10 Aug 06 |
enell |
126 |
{ |
149 |
10 Aug 06 |
enell |
127 |
double ret = values[0]; |
149 |
10 Aug 06 |
enell |
128 |
|
149 |
10 Aug 06 |
enell |
129 |
for (int i = 1; i < values.length; i++) |
149 |
10 Aug 06 |
enell |
130 |
ret *= values[i]; |
149 |
10 Aug 06 |
enell |
131 |
|
149 |
10 Aug 06 |
enell |
132 |
ret = (Math.pow(ret, (1.0 / values.length))); |
149 |
10 Aug 06 |
enell |
133 |
return ret; |
149 |
10 Aug 06 |
enell |
134 |
} |
149 |
10 Aug 06 |
enell |
135 |
|
149 |
10 Aug 06 |
enell |
136 |
/** |
149 |
10 Aug 06 |
enell |
* Used to calculate the arithmetic mean. |
149 |
10 Aug 06 |
enell |
138 |
* |
149 |
10 Aug 06 |
enell |
* @param values - the values to calculate mean |
149 |
10 Aug 06 |
enell |
* @return the geometrc mean |
149 |
10 Aug 06 |
enell |
141 |
*/ |
149 |
10 Aug 06 |
enell |
142 |
protected static double arithmeticMean(double[] values) |
149 |
10 Aug 06 |
enell |
143 |
{ |
149 |
10 Aug 06 |
enell |
144 |
double ret = values[0]; |
149 |
10 Aug 06 |
enell |
145 |
|
149 |
10 Aug 06 |
enell |
146 |
for (int i = 1; i < values.length; i++) |
149 |
10 Aug 06 |
enell |
147 |
ret += values[i]; |
149 |
10 Aug 06 |
enell |
148 |
|
149 |
10 Aug 06 |
enell |
149 |
ret /= values.length; |
149 |
10 Aug 06 |
enell |
150 |
return ret; |
149 |
10 Aug 06 |
enell |
151 |
} |
149 |
10 Aug 06 |
enell |
152 |
|
149 |
10 Aug 06 |
enell |
153 |
/** |
149 |
10 Aug 06 |
enell |
* Return the missing values. |
149 |
10 Aug 06 |
enell |
155 |
* |
149 |
10 Aug 06 |
enell |
* @return the missing values |
149 |
10 Aug 06 |
enell |
157 |
*/ |
149 |
10 Aug 06 |
enell |
158 |
public int getMissingValues() |
149 |
10 Aug 06 |
enell |
159 |
{ |
149 |
10 Aug 06 |
enell |
160 |
return missingValues; |
149 |
10 Aug 06 |
enell |
161 |
} |
149 |
10 Aug 06 |
enell |
162 |
|
149 |
10 Aug 06 |
enell |
163 |
/** |
149 |
10 Aug 06 |
enell |
* An internal class used to hold double values. Works basically as a Vector |
149 |
10 Aug 06 |
enell |
* but is specialiced to hold the primitiv type <code>double</code>. |
149 |
10 Aug 06 |
enell |
166 |
* |
149 |
10 Aug 06 |
enell |
* @author Johan Enell |
149 |
10 Aug 06 |
enell |
168 |
*/ |
149 |
10 Aug 06 |
enell |
169 |
protected class DoubleVector |
149 |
10 Aug 06 |
enell |
170 |
{ |
149 |
10 Aug 06 |
enell |
171 |
private double[] elementData; |
149 |
10 Aug 06 |
enell |
172 |
|
149 |
10 Aug 06 |
enell |
173 |
private int elementCount; |
149 |
10 Aug 06 |
enell |
174 |
|
149 |
10 Aug 06 |
enell |
175 |
private int capacityIncrement; |
149 |
10 Aug 06 |
enell |
176 |
|
149 |
10 Aug 06 |
enell |
177 |
/** |
149 |
10 Aug 06 |
enell |
* Constructs a new DoubleVector. |
149 |
10 Aug 06 |
enell |
179 |
*/ |
149 |
10 Aug 06 |
enell |
180 |
public DoubleVector() |
149 |
10 Aug 06 |
enell |
181 |
{ |
149 |
10 Aug 06 |
enell |
182 |
elementData = new double[10]; |
149 |
10 Aug 06 |
enell |
183 |
elementCount = 0; |
149 |
10 Aug 06 |
enell |
184 |
capacityIncrement = 2; |
149 |
10 Aug 06 |
enell |
185 |
} |
149 |
10 Aug 06 |
enell |
186 |
|
149 |
10 Aug 06 |
enell |
187 |
/** |
149 |
10 Aug 06 |
enell |
* Appends the specified element to the end of this vector. |
149 |
10 Aug 06 |
enell |
189 |
* |
149 |
10 Aug 06 |
enell |
* @param element - element to be added to this Vector |
149 |
10 Aug 06 |
enell |
191 |
*/ |
149 |
10 Aug 06 |
enell |
192 |
public void add(double element) |
149 |
10 Aug 06 |
enell |
193 |
{ |
149 |
10 Aug 06 |
enell |
194 |
if (elementCount < elementData.length) |
149 |
10 Aug 06 |
enell |
195 |
{ |
149 |
10 Aug 06 |
enell |
196 |
elementData[elementCount] = element; |
149 |
10 Aug 06 |
enell |
197 |
elementCount++; |
149 |
10 Aug 06 |
enell |
198 |
} |
149 |
10 Aug 06 |
enell |
199 |
else |
149 |
10 Aug 06 |
enell |
200 |
{ |
149 |
10 Aug 06 |
enell |
201 |
increaseCapacity(); |
149 |
10 Aug 06 |
enell |
202 |
add(element); |
149 |
10 Aug 06 |
enell |
203 |
} |
149 |
10 Aug 06 |
enell |
204 |
} |
149 |
10 Aug 06 |
enell |
205 |
|
149 |
10 Aug 06 |
enell |
206 |
/** |
149 |
10 Aug 06 |
enell |
* Returns an double array containing all of the elements in this list in |
149 |
10 Aug 06 |
enell |
* proper sequence. |
149 |
10 Aug 06 |
enell |
209 |
* |
149 |
10 Aug 06 |
enell |
* @return an array containing all of the elements in this list in proper |
149 |
10 Aug 06 |
enell |
* sequence. |
149 |
10 Aug 06 |
enell |
212 |
*/ |
149 |
10 Aug 06 |
enell |
213 |
public double[] toArray() |
149 |
10 Aug 06 |
enell |
214 |
{ |
149 |
10 Aug 06 |
enell |
215 |
double[] ret = new double[elementCount]; |
149 |
10 Aug 06 |
enell |
216 |
for (int i = 0; i < elementCount; i++) |
149 |
10 Aug 06 |
enell |
217 |
ret[i] = elementData[i]; |
149 |
10 Aug 06 |
enell |
218 |
return ret; |
149 |
10 Aug 06 |
enell |
219 |
} |
149 |
10 Aug 06 |
enell |
220 |
|
149 |
10 Aug 06 |
enell |
221 |
/** |
149 |
10 Aug 06 |
enell |
* Returns the number of elements in this list. |
149 |
10 Aug 06 |
enell |
223 |
* |
149 |
10 Aug 06 |
enell |
* @return the number of elements of this list. |
149 |
10 Aug 06 |
enell |
225 |
*/ |
149 |
10 Aug 06 |
enell |
226 |
public int size() |
149 |
10 Aug 06 |
enell |
227 |
{ |
149 |
10 Aug 06 |
enell |
228 |
return elementCount; |
149 |
10 Aug 06 |
enell |
229 |
} |
149 |
10 Aug 06 |
enell |
230 |
|
149 |
10 Aug 06 |
enell |
231 |
/** |
149 |
10 Aug 06 |
enell |
* Will increase the capacity. |
149 |
10 Aug 06 |
enell |
233 |
* |
149 |
10 Aug 06 |
enell |
234 |
*/ |
149 |
10 Aug 06 |
enell |
235 |
private void increaseCapacity() |
149 |
10 Aug 06 |
enell |
236 |
{ |
149 |
10 Aug 06 |
enell |
237 |
double[] tmp = new double[elementData.length * capacityIncrement]; |
149 |
10 Aug 06 |
enell |
238 |
for (int i = 0; i < elementCount; i++) |
149 |
10 Aug 06 |
enell |
239 |
tmp[i] = elementData[i]; |
149 |
10 Aug 06 |
enell |
240 |
elementData = tmp; |
149 |
10 Aug 06 |
enell |
241 |
} |
149 |
10 Aug 06 |
enell |
242 |
} |
149 |
10 Aug 06 |
enell |
243 |
} |