139 |
10 Aug 06 |
enell |
1 |
package smooth; |
139 |
10 Aug 06 |
enell |
2 |
|
222 |
22 Dec 06 |
enell |
3 |
import org.jfree.data.statistics.BoxAndWhiskerCalculator; |
222 |
22 Dec 06 |
enell |
4 |
import org.jfree.data.statistics.BoxAndWhiskerItem; |
222 |
22 Dec 06 |
enell |
5 |
|
139 |
10 Aug 06 |
enell |
6 |
import basefile.BASEFileSpotSection; |
139 |
10 Aug 06 |
enell |
7 |
|
139 |
10 Aug 06 |
enell |
8 |
import java.util.Arrays; |
222 |
22 Dec 06 |
enell |
9 |
import java.util.Collections; |
139 |
10 Aug 06 |
enell |
10 |
import java.util.Comparator; |
222 |
22 Dec 06 |
enell |
11 |
import java.util.HashMap; |
139 |
10 Aug 06 |
enell |
12 |
import java.util.Iterator; |
139 |
10 Aug 06 |
enell |
13 |
import java.util.LinkedList; |
139 |
10 Aug 06 |
enell |
14 |
import java.util.List; |
222 |
22 Dec 06 |
enell |
15 |
import java.util.Map; |
139 |
10 Aug 06 |
enell |
16 |
|
139 |
10 Aug 06 |
enell |
17 |
/** |
139 |
10 Aug 06 |
enell |
* This class computes a smooth M for every spot using a sliding window across |
139 |
10 Aug 06 |
enell |
* the chromosomes. The window size can either be defined as number of spots or |
139 |
10 Aug 06 |
enell |
* number of basepairs. I spots are choosen then the window will have a constant |
139 |
10 Aug 06 |
enell |
* size but the distance on the chromosome between the fist and last spot can |
139 |
10 Aug 06 |
enell |
* differ a lot. On the other hand, if basepairs are choosen the number of spots |
139 |
10 Aug 06 |
enell |
* can differ but the distance on the chromosome will never be greater than the |
139 |
10 Aug 06 |
enell |
* window size value. The spot can aither be smoothed against a mean or a median |
139 |
10 Aug 06 |
enell |
* and the impact of the smooth can be adjusted using the weight parameter. A |
139 |
10 Aug 06 |
enell |
* weight value of 0 means that the spot wont be smoothed at all and a value of |
139 |
10 Aug 06 |
enell |
* 1 means the spot will be smoothed the while distance. Calculation: |
139 |
10 Aug 06 |
enell |
28 |
* |
139 |
10 Aug 06 |
enell |
* <pre> |
139 |
10 Aug 06 |
enell |
* M' = M + w * (a - M) |
139 |
10 Aug 06 |
enell |
* </pre> |
139 |
10 Aug 06 |
enell |
32 |
* |
139 |
10 Aug 06 |
enell |
* Where M' is the new M and M is the old, w is the weight and a is the average |
139 |
10 Aug 06 |
enell |
* of the window. |
139 |
10 Aug 06 |
enell |
35 |
* |
139 |
10 Aug 06 |
enell |
* @author Johan Enell, johan.enell@med.lu.se, Dept Oncology, Lund University |
139 |
10 Aug 06 |
enell |
37 |
*/ |
139 |
10 Aug 06 |
enell |
38 |
class Smooth |
139 |
10 Aug 06 |
enell |
39 |
{ |
139 |
10 Aug 06 |
enell |
40 |
private String winType; |
139 |
10 Aug 06 |
enell |
41 |
|
139 |
10 Aug 06 |
enell |
42 |
private String average; |
139 |
10 Aug 06 |
enell |
43 |
|
139 |
10 Aug 06 |
enell |
44 |
private int winSize; |
139 |
10 Aug 06 |
enell |
45 |
|
139 |
10 Aug 06 |
enell |
46 |
private float w; |
222 |
22 Dec 06 |
enell |
47 |
|
222 |
22 Dec 06 |
enell |
48 |
private Map<Integer, BoxAndWhiskerItem> boxAndWhiskerItems; |
139 |
10 Aug 06 |
enell |
49 |
|
139 |
10 Aug 06 |
enell |
50 |
/** |
139 |
10 Aug 06 |
enell |
* Constructs a new smoother with a set of parameters. |
139 |
10 Aug 06 |
enell |
52 |
* |
139 |
10 Aug 06 |
enell |
* @param winType |
139 |
10 Aug 06 |
enell |
* the type of window. Can either be "cl" for clones or "bp" |
139 |
10 Aug 06 |
enell |
* basepairs. |
139 |
10 Aug 06 |
enell |
* @param average |
139 |
10 Aug 06 |
enell |
* the average method to use. Can either be "median" or "mean". |
139 |
10 Aug 06 |
enell |
* @param winSize |
139 |
10 Aug 06 |
enell |
* the size of the window. The interpretation is determined of the |
139 |
10 Aug 06 |
enell |
* window type. |
139 |
10 Aug 06 |
enell |
* @param w |
139 |
10 Aug 06 |
enell |
* the weight |
139 |
10 Aug 06 |
enell |
63 |
*/ |
139 |
10 Aug 06 |
enell |
64 |
Smooth(String winType, String average, int winSize, float w) |
139 |
10 Aug 06 |
enell |
65 |
{ |
139 |
10 Aug 06 |
enell |
66 |
this.winType = winType; |
139 |
10 Aug 06 |
enell |
67 |
this.average = average; |
139 |
10 Aug 06 |
enell |
68 |
this.winSize = winSize; |
139 |
10 Aug 06 |
enell |
69 |
this.w = w; |
222 |
22 Dec 06 |
enell |
70 |
this.boxAndWhiskerItems = new HashMap<Integer, BoxAndWhiskerItem>(); |
139 |
10 Aug 06 |
enell |
71 |
} |
139 |
10 Aug 06 |
enell |
72 |
|
139 |
10 Aug 06 |
enell |
73 |
/** |
139 |
10 Aug 06 |
enell |
* This will smooth the spots in the section and store the smoothed value in |
139 |
10 Aug 06 |
enell |
* {@link Spot#setMprim(float)}. The method will start by sorting the |
139 |
10 Aug 06 |
enell |
* reporters in the section by chromosome and each chromosome by chromosome |
139 |
10 Aug 06 |
enell |
* position. Then it will iterate over the chromosome and slide a window |
139 |
10 Aug 06 |
enell |
* over each. |
139 |
10 Aug 06 |
enell |
79 |
* |
139 |
10 Aug 06 |
enell |
* @param bfss |
139 |
10 Aug 06 |
enell |
* the spot section that is going to be smoothed. |
139 |
10 Aug 06 |
enell |
82 |
*/ |
139 |
10 Aug 06 |
enell |
83 |
void compute(BASEFileSpotSection<Reporter, Spot> bfss) |
139 |
10 Aug 06 |
enell |
84 |
{ |
139 |
10 Aug 06 |
enell |
85 |
ChromosomeIterator chromosomeIterator = new ChromosomeIterator(bfss); |
139 |
10 Aug 06 |
enell |
86 |
bfss.sortReporter(new Comparator<Reporter>() |
139 |
10 Aug 06 |
enell |
87 |
{ |
139 |
10 Aug 06 |
enell |
88 |
public int compare(Reporter r1, Reporter r2) |
139 |
10 Aug 06 |
enell |
89 |
{ |
139 |
10 Aug 06 |
enell |
90 |
if (r1.getChromosome() < r2.getChromosome()) return -1; |
139 |
10 Aug 06 |
enell |
91 |
else if (r1.getChromosome() == r2.getChromosome()) |
139 |
10 Aug 06 |
enell |
92 |
{ |
139 |
10 Aug 06 |
enell |
93 |
if (r1.getChrPosition() < r2.getChrPosition()) return -1; |
139 |
10 Aug 06 |
enell |
94 |
else if (r1.getChrPosition() == r2.getChrPosition()) return 0; |
139 |
10 Aug 06 |
enell |
95 |
else if (r1.getChrPosition() > r2.getChrPosition()) return 1; |
139 |
10 Aug 06 |
enell |
96 |
} |
139 |
10 Aug 06 |
enell |
97 |
return 1; |
139 |
10 Aug 06 |
enell |
98 |
} |
139 |
10 Aug 06 |
enell |
99 |
}); |
139 |
10 Aug 06 |
enell |
100 |
|
222 |
22 Dec 06 |
enell |
101 |
List<Integer> winSizes = new LinkedList<Integer>(); |
222 |
22 Dec 06 |
enell |
102 |
|
139 |
10 Aug 06 |
enell |
103 |
while (chromosomeIterator.hasNext()) |
139 |
10 Aug 06 |
enell |
104 |
{ |
139 |
10 Aug 06 |
enell |
105 |
List<Reporter> reporters = chromosomeIterator.next(); |
139 |
10 Aug 06 |
enell |
106 |
LinkedList<Float> window = new LinkedList<Float>(); |
139 |
10 Aug 06 |
enell |
107 |
|
139 |
10 Aug 06 |
enell |
// Start and stop index for window include, exclude |
139 |
10 Aug 06 |
enell |
// [start, stop) |
139 |
10 Aug 06 |
enell |
110 |
int start = 0; |
139 |
10 Aug 06 |
enell |
111 |
int stop = 0; |
139 |
10 Aug 06 |
enell |
112 |
|
139 |
10 Aug 06 |
enell |
113 |
for (Reporter r : reporters) |
139 |
10 Aug 06 |
enell |
114 |
{ |
139 |
10 Aug 06 |
enell |
115 |
if (winType.equals("cl")) |
139 |
10 Aug 06 |
enell |
116 |
{ |
139 |
10 Aug 06 |
enell |
117 |
if (stop < reporters.size()) |
139 |
10 Aug 06 |
enell |
118 |
{ |
227 |
17 Jan 07 |
enell |
119 |
winSizes.add(reporters.get(stop).getChrPosition() - reporters.get(start).getChrPosition()); |
139 |
10 Aug 06 |
enell |
120 |
if (!window.isEmpty()) |
139 |
10 Aug 06 |
enell |
121 |
{ |
139 |
10 Aug 06 |
enell |
122 |
window.removeLast(); |
139 |
10 Aug 06 |
enell |
123 |
start++; |
139 |
10 Aug 06 |
enell |
124 |
} |
139 |
10 Aug 06 |
enell |
125 |
while (stop < reporters.size() && window.size() < winSize) |
139 |
10 Aug 06 |
enell |
126 |
{ |
139 |
10 Aug 06 |
enell |
127 |
window.addFirst(reporters.get(stop).getSpot().getM()); |
139 |
10 Aug 06 |
enell |
128 |
stop++; |
139 |
10 Aug 06 |
enell |
129 |
} |
139 |
10 Aug 06 |
enell |
130 |
} |
139 |
10 Aug 06 |
enell |
131 |
} |
139 |
10 Aug 06 |
enell |
132 |
else if (winType.equals("bp")) |
139 |
10 Aug 06 |
enell |
133 |
{ |
139 |
10 Aug 06 |
enell |
134 |
if (stop < reporters.size()) |
139 |
10 Aug 06 |
enell |
135 |
{ |
227 |
17 Jan 07 |
enell |
136 |
winSizes.add(stop - start); |
139 |
10 Aug 06 |
enell |
137 |
while (!window.isEmpty() && r.getChrPosition() - reporters.get(start).getChrPosition() > winSize / 2) |
139 |
10 Aug 06 |
enell |
138 |
{ |
139 |
10 Aug 06 |
enell |
139 |
window.removeFirst(); |
139 |
10 Aug 06 |
enell |
140 |
start++; |
139 |
10 Aug 06 |
enell |
141 |
} |
139 |
10 Aug 06 |
enell |
142 |
while (stop < reporters.size() && reporters.get(stop).getChrPosition() - reporters.get(start).getChrPosition() < winSize) |
139 |
10 Aug 06 |
enell |
143 |
{ |
139 |
10 Aug 06 |
enell |
144 |
window.addFirst(reporters.get(stop).getSpot().getM()); |
139 |
10 Aug 06 |
enell |
145 |
stop++; |
139 |
10 Aug 06 |
enell |
146 |
} |
139 |
10 Aug 06 |
enell |
147 |
} |
139 |
10 Aug 06 |
enell |
148 |
} |
139 |
10 Aug 06 |
enell |
149 |
float mean = Float.NaN; |
139 |
10 Aug 06 |
enell |
150 |
if (average.equals("median")) |
139 |
10 Aug 06 |
enell |
151 |
{ |
139 |
10 Aug 06 |
enell |
152 |
mean = median(window.toArray(new Float[window.size()])); |
139 |
10 Aug 06 |
enell |
153 |
} |
139 |
10 Aug 06 |
enell |
154 |
else if (average.equals("mean")) |
139 |
10 Aug 06 |
enell |
155 |
{ |
139 |
10 Aug 06 |
enell |
156 |
mean = mean(window.toArray(new Float[window.size()])); |
139 |
10 Aug 06 |
enell |
157 |
} |
139 |
10 Aug 06 |
enell |
158 |
|
139 |
10 Aug 06 |
enell |
159 |
if (!Float.isNaN(mean)) |
139 |
10 Aug 06 |
enell |
160 |
{ |
139 |
10 Aug 06 |
enell |
161 |
float fit = w * (mean - r.getSpot().getM()); |
139 |
10 Aug 06 |
enell |
162 |
r.getSpot().setMprim(r.getSpot().getM() + fit); |
139 |
10 Aug 06 |
enell |
163 |
} |
139 |
10 Aug 06 |
enell |
164 |
} |
139 |
10 Aug 06 |
enell |
165 |
} |
222 |
22 Dec 06 |
enell |
166 |
boxAndWhiskerItems.put(bfss.findIntOpt("assays"), BoxAndWhiskerCalculator.calculateBoxAndWhiskerStatistics(winSizes)); |
139 |
10 Aug 06 |
enell |
167 |
} |
139 |
10 Aug 06 |
enell |
168 |
|
222 |
22 Dec 06 |
enell |
169 |
public BoxAndWhiskerItem getBoxAndWhiskerItem(Integer assayId) |
222 |
22 Dec 06 |
enell |
170 |
{ |
222 |
22 Dec 06 |
enell |
171 |
return boxAndWhiskerItems.get(assayId); |
222 |
22 Dec 06 |
enell |
172 |
} |
222 |
22 Dec 06 |
enell |
173 |
|
139 |
10 Aug 06 |
enell |
174 |
/** |
139 |
10 Aug 06 |
enell |
* This will calculare the mean of an array. |
139 |
10 Aug 06 |
enell |
176 |
* |
139 |
10 Aug 06 |
enell |
* @param floats |
139 |
10 Aug 06 |
enell |
* the array to calculate the mean |
139 |
10 Aug 06 |
enell |
* @return the mean or Float.NaN if the mean cant be calculated |
139 |
10 Aug 06 |
enell |
180 |
*/ |
139 |
10 Aug 06 |
enell |
181 |
private float mean(Float[] floats) |
139 |
10 Aug 06 |
enell |
182 |
{ |
139 |
10 Aug 06 |
enell |
183 |
float sum = 0; |
139 |
10 Aug 06 |
enell |
184 |
if (floats.length == 0) |
139 |
10 Aug 06 |
enell |
185 |
{ |
139 |
10 Aug 06 |
enell |
186 |
return Float.NaN; |
139 |
10 Aug 06 |
enell |
187 |
} |
139 |
10 Aug 06 |
enell |
188 |
else |
139 |
10 Aug 06 |
enell |
189 |
{ |
139 |
10 Aug 06 |
enell |
190 |
for (int i = 0; i < floats.length; i++) |
139 |
10 Aug 06 |
enell |
191 |
{ |
139 |
10 Aug 06 |
enell |
192 |
sum += floats[i]; |
139 |
10 Aug 06 |
enell |
193 |
} |
139 |
10 Aug 06 |
enell |
194 |
} |
139 |
10 Aug 06 |
enell |
195 |
return sum / floats.length; |
139 |
10 Aug 06 |
enell |
196 |
} |
139 |
10 Aug 06 |
enell |
197 |
|
139 |
10 Aug 06 |
enell |
198 |
/** |
139 |
10 Aug 06 |
enell |
* This will calculate the median of an array. |
139 |
10 Aug 06 |
enell |
200 |
* |
139 |
10 Aug 06 |
enell |
* @param floats |
139 |
10 Aug 06 |
enell |
* the array to calculate the median |
139 |
10 Aug 06 |
enell |
* @return the median or Float.NaN if the median cant be calculated |
139 |
10 Aug 06 |
enell |
204 |
*/ |
139 |
10 Aug 06 |
enell |
205 |
private float median(Float[] floats) |
139 |
10 Aug 06 |
enell |
206 |
{ |
139 |
10 Aug 06 |
enell |
207 |
if (floats.length < 3) |
139 |
10 Aug 06 |
enell |
208 |
{ |
139 |
10 Aug 06 |
enell |
209 |
return mean(floats); |
139 |
10 Aug 06 |
enell |
210 |
} |
139 |
10 Aug 06 |
enell |
211 |
else |
139 |
10 Aug 06 |
enell |
212 |
{ |
139 |
10 Aug 06 |
enell |
213 |
Arrays.sort(floats); |
139 |
10 Aug 06 |
enell |
214 |
int i = (int) Math.ceil((floats.length - 1) / 2); |
139 |
10 Aug 06 |
enell |
215 |
if (floats.length % 2 == 1) |
139 |
10 Aug 06 |
enell |
216 |
{ |
139 |
10 Aug 06 |
enell |
217 |
return floats[i]; |
139 |
10 Aug 06 |
enell |
218 |
} |
139 |
10 Aug 06 |
enell |
219 |
return floats[i] + floats[i + 1] / 2; |
139 |
10 Aug 06 |
enell |
220 |
} |
139 |
10 Aug 06 |
enell |
221 |
} |
139 |
10 Aug 06 |
enell |
222 |
|
139 |
10 Aug 06 |
enell |
223 |
/** |
139 |
10 Aug 06 |
enell |
* This iterator will over chromosome in a spot section. The reporters must |
139 |
10 Aug 06 |
enell |
* be sorted by chromosome for this iterator to work correct. |
139 |
10 Aug 06 |
enell |
226 |
* |
139 |
10 Aug 06 |
enell |
* @author Johan Enell, johan.enell@med.lu.se, Dept Oncology, Lund |
139 |
10 Aug 06 |
enell |
* University |
139 |
10 Aug 06 |
enell |
229 |
*/ |
139 |
10 Aug 06 |
enell |
230 |
private class ChromosomeIterator implements Iterator<List<Reporter>> |
139 |
10 Aug 06 |
enell |
231 |
{ |
139 |
10 Aug 06 |
enell |
232 |
private int start = 0; |
139 |
10 Aug 06 |
enell |
233 |
|
139 |
10 Aug 06 |
enell |
234 |
private int end = 0; |
139 |
10 Aug 06 |
enell |
235 |
|
139 |
10 Aug 06 |
enell |
236 |
private int currentChr = -1; |
139 |
10 Aug 06 |
enell |
237 |
|
139 |
10 Aug 06 |
enell |
238 |
private final BASEFileSpotSection<Reporter, Spot> bfss; |
139 |
10 Aug 06 |
enell |
239 |
|
139 |
10 Aug 06 |
enell |
240 |
public ChromosomeIterator(BASEFileSpotSection<Reporter, Spot> bfss) |
139 |
10 Aug 06 |
enell |
241 |
{ |
139 |
10 Aug 06 |
enell |
242 |
this.bfss = bfss; |
139 |
10 Aug 06 |
enell |
243 |
} |
139 |
10 Aug 06 |
enell |
244 |
|
139 |
10 Aug 06 |
enell |
245 |
public boolean hasNext() |
139 |
10 Aug 06 |
enell |
246 |
{ |
139 |
10 Aug 06 |
enell |
247 |
start = end; |
139 |
10 Aug 06 |
enell |
248 |
if (start >= bfss.getReporterSize()) |
139 |
10 Aug 06 |
enell |
249 |
{ |
139 |
10 Aug 06 |
enell |
250 |
return false; |
139 |
10 Aug 06 |
enell |
251 |
} |
139 |
10 Aug 06 |
enell |
252 |
currentChr = bfss.getReporter(start).getChromosome(); |
139 |
10 Aug 06 |
enell |
253 |
while (end < bfss.getReporterSize()) |
139 |
10 Aug 06 |
enell |
254 |
{ |
139 |
10 Aug 06 |
enell |
255 |
Reporter r = bfss.getReporter(end); |
139 |
10 Aug 06 |
enell |
256 |
if (r.getChromosome() != currentChr) |
139 |
10 Aug 06 |
enell |
257 |
{ |
139 |
10 Aug 06 |
enell |
258 |
break; |
139 |
10 Aug 06 |
enell |
259 |
} |
139 |
10 Aug 06 |
enell |
260 |
end++; |
139 |
10 Aug 06 |
enell |
261 |
} |
139 |
10 Aug 06 |
enell |
262 |
return true; |
139 |
10 Aug 06 |
enell |
263 |
} |
139 |
10 Aug 06 |
enell |
264 |
|
139 |
10 Aug 06 |
enell |
265 |
public List<Reporter> next() |
139 |
10 Aug 06 |
enell |
266 |
{ |
139 |
10 Aug 06 |
enell |
267 |
if (start >= bfss.getHeight()) |
139 |
10 Aug 06 |
enell |
268 |
{ |
139 |
10 Aug 06 |
enell |
269 |
return null; |
139 |
10 Aug 06 |
enell |
270 |
} |
139 |
10 Aug 06 |
enell |
271 |
return bfss.reporterSubList(start, end); |
139 |
10 Aug 06 |
enell |
272 |
} |
139 |
10 Aug 06 |
enell |
273 |
|
139 |
10 Aug 06 |
enell |
274 |
public void remove() |
139 |
10 Aug 06 |
enell |
275 |
{ |
139 |
10 Aug 06 |
enell |
276 |
throw new UnsupportedOperationException(); |
139 |
10 Aug 06 |
enell |
277 |
} |
139 |
10 Aug 06 |
enell |
278 |
|
139 |
10 Aug 06 |
enell |
279 |
public final int getCurrentChr() |
139 |
10 Aug 06 |
enell |
280 |
{ |
139 |
10 Aug 06 |
enell |
281 |
return currentChr; |
139 |
10 Aug 06 |
enell |
282 |
} |
139 |
10 Aug 06 |
enell |
283 |
|
139 |
10 Aug 06 |
enell |
284 |
} |
139 |
10 Aug 06 |
enell |
285 |
} |