5045 |
22 Oct 18 |
nicklas |
1 |
package net.sf.basedb.reggie.baf; |
5045 |
22 Oct 18 |
nicklas |
2 |
|
5045 |
22 Oct 18 |
nicklas |
3 |
import java.util.ArrayList; |
5045 |
22 Oct 18 |
nicklas |
4 |
import java.util.List; |
5045 |
22 Oct 18 |
nicklas |
5 |
import java.util.Locale; |
5045 |
22 Oct 18 |
nicklas |
6 |
|
5045 |
22 Oct 18 |
nicklas |
7 |
import org.apache.commons.math3.distribution.NormalDistribution; |
5045 |
22 Oct 18 |
nicklas |
8 |
|
5045 |
22 Oct 18 |
nicklas |
9 |
import net.sf.basedb.reggie.vcf.GtData; |
5045 |
22 Oct 18 |
nicklas |
10 |
|
5045 |
22 Oct 18 |
nicklas |
11 |
/** |
5045 |
22 Oct 18 |
nicklas |
Class for collection mBAF statistics from SNP:s within |
5045 |
22 Oct 18 |
nicklas |
a region. |
5045 |
22 Oct 18 |
nicklas |
14 |
|
5045 |
22 Oct 18 |
nicklas |
@author nicklas |
5045 |
22 Oct 18 |
nicklas |
@since 4.20 |
5045 |
22 Oct 18 |
nicklas |
17 |
*/ |
5045 |
22 Oct 18 |
nicklas |
18 |
public class RegionStat |
5045 |
22 Oct 18 |
nicklas |
19 |
{ |
5045 |
22 Oct 18 |
nicklas |
20 |
|
5045 |
22 Oct 18 |
nicklas |
21 |
private static final NormalDistribution norm = new NormalDistribution(); |
5045 |
22 Oct 18 |
nicklas |
22 |
|
5045 |
22 Oct 18 |
nicklas |
23 |
/** |
5045 |
22 Oct 18 |
nicklas |
Create statistics collectors for the given regions. |
5045 |
22 Oct 18 |
nicklas |
25 |
*/ |
5045 |
22 Oct 18 |
nicklas |
26 |
public static final List<RegionStat> create(List<Region> regions) |
5045 |
22 Oct 18 |
nicklas |
27 |
{ |
5045 |
22 Oct 18 |
nicklas |
28 |
List<RegionStat> stat = new ArrayList<RegionStat>(regions.size()); |
5045 |
22 Oct 18 |
nicklas |
29 |
for (Region r : regions) |
5045 |
22 Oct 18 |
nicklas |
30 |
{ |
5045 |
22 Oct 18 |
nicklas |
31 |
stat.add(new RegionStat(r)); |
5045 |
22 Oct 18 |
nicklas |
32 |
} |
5045 |
22 Oct 18 |
nicklas |
33 |
return stat; |
5045 |
22 Oct 18 |
nicklas |
34 |
} |
5045 |
22 Oct 18 |
nicklas |
35 |
|
5045 |
22 Oct 18 |
nicklas |
36 |
private final Region region; |
5045 |
22 Oct 18 |
nicklas |
37 |
|
5045 |
22 Oct 18 |
nicklas |
38 |
private int count; |
5045 |
22 Oct 18 |
nicklas |
39 |
private double sumMBaf; |
5045 |
22 Oct 18 |
nicklas |
40 |
|
5045 |
22 Oct 18 |
nicklas |
41 |
public RegionStat(Region region) |
5045 |
22 Oct 18 |
nicklas |
42 |
{ |
5045 |
22 Oct 18 |
nicklas |
43 |
this.region = region; |
5045 |
22 Oct 18 |
nicklas |
44 |
} |
5045 |
22 Oct 18 |
nicklas |
45 |
|
5045 |
22 Oct 18 |
nicklas |
46 |
/** |
5045 |
22 Oct 18 |
nicklas |
Get the region. |
5045 |
22 Oct 18 |
nicklas |
48 |
*/ |
5045 |
22 Oct 18 |
nicklas |
49 |
public Region getRegion() |
5045 |
22 Oct 18 |
nicklas |
50 |
{ |
5045 |
22 Oct 18 |
nicklas |
51 |
return region; |
5045 |
22 Oct 18 |
nicklas |
52 |
} |
5045 |
22 Oct 18 |
nicklas |
53 |
|
5045 |
22 Oct 18 |
nicklas |
54 |
/** |
5045 |
22 Oct 18 |
nicklas |
Get the number of SNPs that was found in this region. |
5045 |
22 Oct 18 |
nicklas |
56 |
*/ |
5051 |
24 Oct 18 |
nicklas |
57 |
public int getSnpCount() |
5045 |
22 Oct 18 |
nicklas |
58 |
{ |
5045 |
22 Oct 18 |
nicklas |
59 |
return count; |
5045 |
22 Oct 18 |
nicklas |
60 |
} |
5045 |
22 Oct 18 |
nicklas |
61 |
|
5045 |
22 Oct 18 |
nicklas |
62 |
/** |
5045 |
22 Oct 18 |
nicklas |
Get the average mBAF value. |
5045 |
22 Oct 18 |
nicklas |
64 |
*/ |
5045 |
22 Oct 18 |
nicklas |
65 |
public double getAvgMBaf() |
5045 |
22 Oct 18 |
nicklas |
66 |
{ |
5045 |
22 Oct 18 |
nicklas |
67 |
return (sumMBaf / count); |
5045 |
22 Oct 18 |
nicklas |
68 |
} |
5045 |
22 Oct 18 |
nicklas |
69 |
|
5045 |
22 Oct 18 |
nicklas |
70 |
/** |
5045 |
22 Oct 18 |
nicklas |
Get the Z-score for the average mBAF of the SNPs in this |
5045 |
22 Oct 18 |
nicklas |
region when compared to the normal reference. |
5045 |
22 Oct 18 |
nicklas |
73 |
|
5045 |
22 Oct 18 |
nicklas |
Z = (mBAF - mBAFRef) / sdRef |
5045 |
22 Oct 18 |
nicklas |
75 |
*/ |
5045 |
22 Oct 18 |
nicklas |
76 |
public double getZScore() |
5045 |
22 Oct 18 |
nicklas |
77 |
{ |
5045 |
22 Oct 18 |
nicklas |
78 |
return (getAvgMBaf()-region.getAvgBafRef()) / region.getSdBafRef(); |
5045 |
22 Oct 18 |
nicklas |
79 |
} |
5045 |
22 Oct 18 |
nicklas |
80 |
|
5045 |
22 Oct 18 |
nicklas |
81 |
/** |
5045 |
22 Oct 18 |
nicklas |
Get the P-value for the average mBAF of the SNPs in this |
5045 |
22 Oct 18 |
nicklas |
region when compared to the normal reference. |
5045 |
22 Oct 18 |
nicklas |
84 |
|
5045 |
22 Oct 18 |
nicklas |
p = pnorm(-Z) |
5045 |
22 Oct 18 |
nicklas |
86 |
*/ |
5045 |
22 Oct 18 |
nicklas |
87 |
public double getPValue() |
5045 |
22 Oct 18 |
nicklas |
88 |
{ |
5045 |
22 Oct 18 |
nicklas |
89 |
return norm.cumulativeProbability(-getZScore()); |
5045 |
22 Oct 18 |
nicklas |
90 |
} |
5045 |
22 Oct 18 |
nicklas |
91 |
|
5045 |
22 Oct 18 |
nicklas |
92 |
/** |
5045 |
22 Oct 18 |
nicklas |
Add a new SNP to this region. |
5045 |
22 Oct 18 |
nicklas |
94 |
*/ |
5045 |
22 Oct 18 |
nicklas |
95 |
void addToStat(GtData gt) |
5045 |
22 Oct 18 |
nicklas |
96 |
{ |
5045 |
22 Oct 18 |
nicklas |
97 |
count++; |
5045 |
22 Oct 18 |
nicklas |
98 |
sumMBaf+= gt.getMBaf(); |
5045 |
22 Oct 18 |
nicklas |
99 |
} |
5045 |
22 Oct 18 |
nicklas |
100 |
|
5045 |
22 Oct 18 |
nicklas |
101 |
@Override |
5045 |
22 Oct 18 |
nicklas |
102 |
public String toString() |
5045 |
22 Oct 18 |
nicklas |
103 |
{ |
5051 |
24 Oct 18 |
nicklas |
104 |
return region.toString() + String.format(Locale.ENGLISH, "[%d,%.3f,%.3f,%.3e]", getSnpCount(), getAvgMBaf(), getZScore(), getPValue()); |
5045 |
22 Oct 18 |
nicklas |
105 |
} |
5045 |
22 Oct 18 |
nicklas |
106 |
|
5045 |
22 Oct 18 |
nicklas |
107 |
|
5045 |
22 Oct 18 |
nicklas |
108 |
|
5045 |
22 Oct 18 |
nicklas |
109 |
} |