5040 |
19 Oct 18 |
nicklas |
1 |
package net.sf.basedb.reggie.baf; |
5040 |
19 Oct 18 |
nicklas |
2 |
|
5040 |
19 Oct 18 |
nicklas |
3 |
import net.sf.basedb.reggie.vcf.GtData; |
5040 |
19 Oct 18 |
nicklas |
4 |
import net.sf.basedb.reggie.vcf.SnpData; |
5040 |
19 Oct 18 |
nicklas |
5 |
import net.sf.basedb.reggie.vcf.VcfDataFilter; |
5051 |
24 Oct 18 |
nicklas |
6 |
import net.sf.basedb.util.filter.Filter; |
5040 |
19 Oct 18 |
nicklas |
7 |
|
5040 |
19 Oct 18 |
nicklas |
8 |
/** |
5040 |
19 Oct 18 |
nicklas |
Options for extracting mBAF data from VCF files. |
5040 |
19 Oct 18 |
nicklas |
10 |
|
5040 |
19 Oct 18 |
nicklas |
11 |
|
5040 |
19 Oct 18 |
nicklas |
12 |
|
5040 |
19 Oct 18 |
nicklas |
@author nicklas |
5040 |
19 Oct 18 |
nicklas |
@since 4.20 |
5040 |
19 Oct 18 |
nicklas |
15 |
*/ |
5040 |
19 Oct 18 |
nicklas |
16 |
public class MBafOptions |
5040 |
19 Oct 18 |
nicklas |
17 |
{ |
5040 |
19 Oct 18 |
nicklas |
18 |
|
5040 |
19 Oct 18 |
nicklas |
19 |
public static final int DEFAULT_MIN_DP = 20; |
5040 |
19 Oct 18 |
nicklas |
20 |
public static final float DEFAULT_MAX_MBAF = 0.9f; |
5050 |
24 Oct 18 |
nicklas |
21 |
public static final float DEFAULT_SIGNIFICANT_PVAL = 0.01f; |
5051 |
24 Oct 18 |
nicklas |
22 |
public static final int DEFAULT_MIN_SNPCOUNT_FOR_REGION = 20; |
5040 |
19 Oct 18 |
nicklas |
23 |
|
5040 |
19 Oct 18 |
nicklas |
24 |
private int minDP = DEFAULT_MIN_DP; |
5040 |
19 Oct 18 |
nicklas |
25 |
private float maxMBAF = DEFAULT_MAX_MBAF; |
5050 |
24 Oct 18 |
nicklas |
26 |
private float significantPVal = DEFAULT_SIGNIFICANT_PVAL; |
5051 |
24 Oct 18 |
nicklas |
27 |
private int minSnpCountForRegion = DEFAULT_MIN_SNPCOUNT_FOR_REGION; |
5040 |
19 Oct 18 |
nicklas |
28 |
|
5040 |
19 Oct 18 |
nicklas |
29 |
public MBafOptions() |
5040 |
19 Oct 18 |
nicklas |
30 |
{} |
5040 |
19 Oct 18 |
nicklas |
31 |
|
5040 |
19 Oct 18 |
nicklas |
32 |
/** |
5040 |
19 Oct 18 |
nicklas |
Get the minimal value for DP that is accepted for a SNP |
5040 |
19 Oct 18 |
nicklas |
in order to use it for mBAF statistics. Default value is 20. |
5040 |
19 Oct 18 |
nicklas |
@see GtData#getDP() |
5040 |
19 Oct 18 |
nicklas |
36 |
*/ |
5040 |
19 Oct 18 |
nicklas |
37 |
public int getMinDP() |
5040 |
19 Oct 18 |
nicklas |
38 |
{ |
5040 |
19 Oct 18 |
nicklas |
39 |
return minDP; |
5040 |
19 Oct 18 |
nicklas |
40 |
} |
5040 |
19 Oct 18 |
nicklas |
41 |
public void setMinDP(int minDP) |
5040 |
19 Oct 18 |
nicklas |
42 |
{ |
5040 |
19 Oct 18 |
nicklas |
43 |
this.minDP = minDP; |
5040 |
19 Oct 18 |
nicklas |
44 |
} |
5040 |
19 Oct 18 |
nicklas |
45 |
|
5040 |
19 Oct 18 |
nicklas |
46 |
/** |
5050 |
24 Oct 18 |
nicklas |
Get the maximum value for mBAF that is accepted for a SNP |
5040 |
19 Oct 18 |
nicklas |
in order to use it for mBAF statistics. Default value is 0.9. |
5040 |
19 Oct 18 |
nicklas |
@see GtData#getMBaf() |
5040 |
19 Oct 18 |
nicklas |
50 |
*/ |
5040 |
19 Oct 18 |
nicklas |
51 |
public float getMaxMBaf() |
5040 |
19 Oct 18 |
nicklas |
52 |
{ |
5040 |
19 Oct 18 |
nicklas |
53 |
return maxMBAF; |
5040 |
19 Oct 18 |
nicklas |
54 |
} |
5051 |
24 Oct 18 |
nicklas |
55 |
public void setMaxMBaf(float maxMBAF) |
5040 |
19 Oct 18 |
nicklas |
56 |
{ |
5040 |
19 Oct 18 |
nicklas |
57 |
this.maxMBAF = maxMBAF; |
5040 |
19 Oct 18 |
nicklas |
58 |
} |
5040 |
19 Oct 18 |
nicklas |
59 |
|
5040 |
19 Oct 18 |
nicklas |
60 |
/** |
5050 |
24 Oct 18 |
nicklas |
Get the p-value to use when checking if the average mBAF for |
5050 |
24 Oct 18 |
nicklas |
a region is significantly different from the average mBAF of |
5050 |
24 Oct 18 |
nicklas |
normal samples. |
5050 |
24 Oct 18 |
nicklas |
64 |
*/ |
5050 |
24 Oct 18 |
nicklas |
65 |
public float getSignificantPVal() |
5050 |
24 Oct 18 |
nicklas |
66 |
{ |
5050 |
24 Oct 18 |
nicklas |
67 |
return significantPVal; |
5050 |
24 Oct 18 |
nicklas |
68 |
} |
5051 |
24 Oct 18 |
nicklas |
69 |
public void setSignificantPVal(float significantPVal) |
5050 |
24 Oct 18 |
nicklas |
70 |
{ |
5050 |
24 Oct 18 |
nicklas |
71 |
this.significantPVal = significantPVal; |
5050 |
24 Oct 18 |
nicklas |
72 |
} |
5050 |
24 Oct 18 |
nicklas |
73 |
|
5050 |
24 Oct 18 |
nicklas |
74 |
/** |
5051 |
24 Oct 18 |
nicklas |
Get the p-value to use when checking if the average mBAF for |
5051 |
24 Oct 18 |
nicklas |
a region is significantly different from the average mBAF of |
5051 |
24 Oct 18 |
nicklas |
normal samples. |
5051 |
24 Oct 18 |
nicklas |
78 |
*/ |
5051 |
24 Oct 18 |
nicklas |
79 |
public int getMinSnpCountForRegion() |
5051 |
24 Oct 18 |
nicklas |
80 |
{ |
5051 |
24 Oct 18 |
nicklas |
81 |
return minSnpCountForRegion; |
5051 |
24 Oct 18 |
nicklas |
82 |
} |
5051 |
24 Oct 18 |
nicklas |
83 |
public void setMinSnpCountForRegion(int minSnpCountForRegion) |
5051 |
24 Oct 18 |
nicklas |
84 |
{ |
5051 |
24 Oct 18 |
nicklas |
85 |
this.minSnpCountForRegion = minSnpCountForRegion; |
5051 |
24 Oct 18 |
nicklas |
86 |
} |
5051 |
24 Oct 18 |
nicklas |
87 |
|
5051 |
24 Oct 18 |
nicklas |
88 |
/** |
5040 |
19 Oct 18 |
nicklas |
Returns a filter implementation for accepting GtData |
5040 |
19 Oct 18 |
nicklas |
instances that should be used in mBAF statistics. |
5051 |
24 Oct 18 |
nicklas |
The default implemenation will accept SNPs with at |
5051 |
24 Oct 18 |
nicklas |
least {@link #getMinDP()} reads and with mBAF value lower |
5051 |
24 Oct 18 |
nicklas |
than {@link #getMaxMBaf()}. |
5040 |
19 Oct 18 |
nicklas |
94 |
*/ |
5051 |
24 Oct 18 |
nicklas |
95 |
public VcfDataFilter getVcfFilter() |
5040 |
19 Oct 18 |
nicklas |
96 |
{ |
5040 |
19 Oct 18 |
nicklas |
97 |
return new VcfDataFilter() |
5040 |
19 Oct 18 |
nicklas |
98 |
{ |
5040 |
19 Oct 18 |
nicklas |
99 |
@Override |
5040 |
19 Oct 18 |
nicklas |
100 |
public boolean accept(GtData gt, SnpData snp) |
5040 |
19 Oct 18 |
nicklas |
101 |
{ |
5040 |
19 Oct 18 |
nicklas |
102 |
return gt.getDP() >= minDP && gt.getMBaf() <= maxMBAF; |
5040 |
19 Oct 18 |
nicklas |
103 |
} |
5040 |
19 Oct 18 |
nicklas |
104 |
}; |
5040 |
19 Oct 18 |
nicklas |
105 |
} |
5040 |
19 Oct 18 |
nicklas |
106 |
|
5051 |
24 Oct 18 |
nicklas |
107 |
/** |
5051 |
24 Oct 18 |
nicklas |
Returns a filter implementation for accepting RegionStat |
5051 |
24 Oct 18 |
nicklas |
instances that should be used in mBAF statistics. The default |
5051 |
24 Oct 18 |
nicklas |
implementation will accept regions with at least |
5051 |
24 Oct 18 |
nicklas |
{@link #getMinSnpCountForRegion()} SNPs. |
5051 |
24 Oct 18 |
nicklas |
112 |
*/ |
5051 |
24 Oct 18 |
nicklas |
113 |
public Filter<RegionStat> getRegionFilter() |
5051 |
24 Oct 18 |
nicklas |
114 |
{ |
5051 |
24 Oct 18 |
nicklas |
115 |
return new Filter<RegionStat>() |
5051 |
24 Oct 18 |
nicklas |
116 |
{ |
5051 |
24 Oct 18 |
nicklas |
117 |
@Override |
5051 |
24 Oct 18 |
nicklas |
118 |
public boolean evaluate(RegionStat rs) |
5051 |
24 Oct 18 |
nicklas |
119 |
{ |
5051 |
24 Oct 18 |
nicklas |
120 |
return rs.getSnpCount() >= minSnpCountForRegion; |
5051 |
24 Oct 18 |
nicklas |
121 |
} |
5051 |
24 Oct 18 |
nicklas |
122 |
}; |
5051 |
24 Oct 18 |
nicklas |
123 |
} |
5051 |
24 Oct 18 |
nicklas |
124 |
|
5040 |
19 Oct 18 |
nicklas |
125 |
} |