4620 |
16 Nov 17 |
nicklas |
1 |
package net.sf.basedb.reggie.vcf; |
4620 |
16 Nov 17 |
nicklas |
2 |
|
4646 |
14 Dec 17 |
nicklas |
3 |
import java.io.Serializable; |
4646 |
14 Dec 17 |
nicklas |
4 |
|
4620 |
16 Nov 17 |
nicklas |
5 |
import org.json.simple.JSONObject; |
4620 |
16 Nov 17 |
nicklas |
6 |
|
5039 |
19 Oct 18 |
nicklas |
7 |
import net.sf.basedb.util.Values; |
5039 |
19 Oct 18 |
nicklas |
8 |
|
4620 |
16 Nov 17 |
nicklas |
9 |
/** |
4620 |
16 Nov 17 |
nicklas |
Holds genotype data for a single SNP and sample. |
4620 |
16 Nov 17 |
nicklas |
This is for data from the 'sample' column in the VCF file |
4620 |
16 Nov 17 |
nicklas |
and we provide support for the following properties |
4620 |
16 Nov 17 |
nicklas |
(specified in the FORMAT column): |
4620 |
16 Nov 17 |
nicklas |
14 |
|
4620 |
16 Nov 17 |
nicklas |
GT, DP, GQ, AD |
4620 |
16 Nov 17 |
nicklas |
16 |
|
4620 |
16 Nov 17 |
nicklas |
@author nicklas |
4620 |
16 Nov 17 |
nicklas |
@since 4.13 |
4620 |
16 Nov 17 |
nicklas |
19 |
*/ |
4620 |
16 Nov 17 |
nicklas |
20 |
public class GtData |
4646 |
14 Dec 17 |
nicklas |
21 |
implements Serializable |
4620 |
16 Nov 17 |
nicklas |
22 |
{ |
4646 |
14 Dec 17 |
nicklas |
23 |
|
4646 |
14 Dec 17 |
nicklas |
24 |
private static final long serialVersionUID = 6467840150789906833L; |
4646 |
14 Dec 17 |
nicklas |
25 |
|
4620 |
16 Nov 17 |
nicklas |
26 |
private final String id; |
7413 |
10 Nov 23 |
nicklas |
27 |
|
7413 |
10 Nov 23 |
nicklas |
28 |
private String filter; |
4620 |
16 Nov 17 |
nicklas |
29 |
private String gt; |
4620 |
16 Nov 17 |
nicklas |
30 |
private GenoType genoType; |
7414 |
10 Nov 23 |
nicklas |
31 |
private boolean phased; |
7414 |
10 Nov 23 |
nicklas |
32 |
private int ps; |
4620 |
16 Nov 17 |
nicklas |
33 |
|
4620 |
16 Nov 17 |
nicklas |
34 |
private int dp; |
6513 |
07 Dec 21 |
nicklas |
35 |
private float gq; |
4620 |
16 Nov 17 |
nicklas |
36 |
private String ad; |
6450 |
21 Oct 21 |
nicklas |
37 |
private float af; |
4620 |
16 Nov 17 |
nicklas |
38 |
|
5039 |
19 Oct 18 |
nicklas |
39 |
private int adRef; |
5039 |
19 Oct 18 |
nicklas |
40 |
private int adAlt; |
5039 |
19 Oct 18 |
nicklas |
41 |
|
4620 |
16 Nov 17 |
nicklas |
42 |
/** |
4620 |
16 Nov 17 |
nicklas |
Create a new data instance for the SNP with the given rsId. |
4620 |
16 Nov 17 |
nicklas |
44 |
*/ |
4620 |
16 Nov 17 |
nicklas |
45 |
public GtData(String id) |
4620 |
16 Nov 17 |
nicklas |
46 |
{ |
4620 |
16 Nov 17 |
nicklas |
47 |
this.id = id; |
4620 |
16 Nov 17 |
nicklas |
48 |
} |
4620 |
16 Nov 17 |
nicklas |
49 |
|
4620 |
16 Nov 17 |
nicklas |
50 |
/** |
4620 |
16 Nov 17 |
nicklas |
Get the rsId for the genotype data. |
4620 |
16 Nov 17 |
nicklas |
52 |
*/ |
4620 |
16 Nov 17 |
nicklas |
53 |
public String getId() |
4620 |
16 Nov 17 |
nicklas |
54 |
{ |
4620 |
16 Nov 17 |
nicklas |
55 |
return id; |
4620 |
16 Nov 17 |
nicklas |
56 |
} |
4620 |
16 Nov 17 |
nicklas |
57 |
|
7413 |
10 Nov 23 |
nicklas |
58 |
|
4620 |
16 Nov 17 |
nicklas |
59 |
/** |
7413 |
10 Nov 23 |
nicklas |
Get the filter flag. |
7413 |
10 Nov 23 |
nicklas |
@since 4.50 |
7413 |
10 Nov 23 |
nicklas |
62 |
*/ |
7413 |
10 Nov 23 |
nicklas |
63 |
public String getFilter() |
7413 |
10 Nov 23 |
nicklas |
64 |
{ |
7413 |
10 Nov 23 |
nicklas |
65 |
return filter; |
7413 |
10 Nov 23 |
nicklas |
66 |
} |
7413 |
10 Nov 23 |
nicklas |
67 |
|
7413 |
10 Nov 23 |
nicklas |
68 |
/** |
7413 |
10 Nov 23 |
nicklas |
Set the filter flag. |
7413 |
10 Nov 23 |
nicklas |
@since 4.50 |
7413 |
10 Nov 23 |
nicklas |
71 |
*/ |
7413 |
10 Nov 23 |
nicklas |
72 |
public void setFilter(String filter) |
7413 |
10 Nov 23 |
nicklas |
73 |
{ |
7413 |
10 Nov 23 |
nicklas |
74 |
this.filter = filter; |
7413 |
10 Nov 23 |
nicklas |
75 |
} |
7413 |
10 Nov 23 |
nicklas |
76 |
|
7413 |
10 Nov 23 |
nicklas |
77 |
/** |
4620 |
16 Nov 17 |
nicklas |
Set the genotype string. It will be parsed by the |
4620 |
16 Nov 17 |
nicklas |
{@link GenoType#fromString(String)} method. |
4620 |
16 Nov 17 |
nicklas |
If {@link #getGenoType()} return null, the GT string |
4620 |
16 Nov 17 |
nicklas |
is not a valid genotype. |
4620 |
16 Nov 17 |
nicklas |
82 |
*/ |
4620 |
16 Nov 17 |
nicklas |
83 |
public void setGT(String gt) |
4620 |
16 Nov 17 |
nicklas |
84 |
{ |
4620 |
16 Nov 17 |
nicklas |
85 |
this.gt = gt; |
4620 |
16 Nov 17 |
nicklas |
86 |
this.genoType = GenoType.fromString(gt); |
7414 |
10 Nov 23 |
nicklas |
87 |
this.phased = gt != null && gt.contains("|"); |
4620 |
16 Nov 17 |
nicklas |
88 |
} |
4620 |
16 Nov 17 |
nicklas |
89 |
|
4620 |
16 Nov 17 |
nicklas |
90 |
/** |
4620 |
16 Nov 17 |
nicklas |
Get the genotype string. |
4620 |
16 Nov 17 |
nicklas |
92 |
*/ |
4620 |
16 Nov 17 |
nicklas |
93 |
public String getGT() |
4620 |
16 Nov 17 |
nicklas |
94 |
{ |
4620 |
16 Nov 17 |
nicklas |
95 |
return gt; |
4620 |
16 Nov 17 |
nicklas |
96 |
} |
4620 |
16 Nov 17 |
nicklas |
97 |
|
4620 |
16 Nov 17 |
nicklas |
98 |
/** |
4620 |
16 Nov 17 |
nicklas |
Get the genotype as parsed from the GT string. |
4620 |
16 Nov 17 |
nicklas |
100 |
*/ |
4620 |
16 Nov 17 |
nicklas |
101 |
public GenoType getGenoType() |
4620 |
16 Nov 17 |
nicklas |
102 |
{ |
4620 |
16 Nov 17 |
nicklas |
103 |
return genoType; |
4620 |
16 Nov 17 |
nicklas |
104 |
} |
4620 |
16 Nov 17 |
nicklas |
105 |
|
4620 |
16 Nov 17 |
nicklas |
106 |
/** |
7414 |
10 Nov 23 |
nicklas |
Is the genotype phased or not? |
7414 |
10 Nov 23 |
nicklas |
@since 4.50 |
7414 |
10 Nov 23 |
nicklas |
109 |
*/ |
7414 |
10 Nov 23 |
nicklas |
110 |
public boolean isPhased() |
7414 |
10 Nov 23 |
nicklas |
111 |
{ |
7414 |
10 Nov 23 |
nicklas |
112 |
return phased; |
7414 |
10 Nov 23 |
nicklas |
113 |
} |
7414 |
10 Nov 23 |
nicklas |
114 |
|
7414 |
10 Nov 23 |
nicklas |
115 |
/** |
7414 |
10 Nov 23 |
nicklas |
Set the phase set identifier. -1 if the genotype is not phased. |
7414 |
10 Nov 23 |
nicklas |
@since 4.50 |
7414 |
10 Nov 23 |
nicklas |
118 |
*/ |
7414 |
10 Nov 23 |
nicklas |
119 |
public void setPS(int ps) |
7414 |
10 Nov 23 |
nicklas |
120 |
{ |
7414 |
10 Nov 23 |
nicklas |
121 |
this.ps = ps; |
7414 |
10 Nov 23 |
nicklas |
122 |
phased = ps > 0; |
7414 |
10 Nov 23 |
nicklas |
123 |
} |
7414 |
10 Nov 23 |
nicklas |
124 |
|
7414 |
10 Nov 23 |
nicklas |
125 |
/** |
7414 |
10 Nov 23 |
nicklas |
Get the phase set identifier. -1 if the genotype is not phased. |
7414 |
10 Nov 23 |
nicklas |
@since 4.50 |
7414 |
10 Nov 23 |
nicklas |
128 |
*/ |
7414 |
10 Nov 23 |
nicklas |
129 |
public int getPS() |
7414 |
10 Nov 23 |
nicklas |
130 |
{ |
7414 |
10 Nov 23 |
nicklas |
131 |
return ps; |
7414 |
10 Nov 23 |
nicklas |
132 |
} |
7414 |
10 Nov 23 |
nicklas |
133 |
|
7414 |
10 Nov 23 |
nicklas |
134 |
/** |
4620 |
16 Nov 17 |
nicklas |
Set the "Genotype quality" value. |
4620 |
16 Nov 17 |
nicklas |
136 |
*/ |
6513 |
07 Dec 21 |
nicklas |
137 |
public void setGQ(float gq) |
4620 |
16 Nov 17 |
nicklas |
138 |
{ |
4620 |
16 Nov 17 |
nicklas |
139 |
this.gq = gq; |
4620 |
16 Nov 17 |
nicklas |
140 |
} |
4620 |
16 Nov 17 |
nicklas |
141 |
|
4620 |
16 Nov 17 |
nicklas |
142 |
/** |
4620 |
16 Nov 17 |
nicklas |
Get the "Genotype quality" value. |
4620 |
16 Nov 17 |
nicklas |
144 |
*/ |
6513 |
07 Dec 21 |
nicklas |
145 |
public float getGQ() |
4620 |
16 Nov 17 |
nicklas |
146 |
{ |
4620 |
16 Nov 17 |
nicklas |
147 |
return gq; |
4620 |
16 Nov 17 |
nicklas |
148 |
} |
4620 |
16 Nov 17 |
nicklas |
149 |
|
4620 |
16 Nov 17 |
nicklas |
150 |
/** |
4620 |
16 Nov 17 |
nicklas |
Set the read depth value. |
4620 |
16 Nov 17 |
nicklas |
152 |
*/ |
4620 |
16 Nov 17 |
nicklas |
153 |
public void setDP(int dp) |
4620 |
16 Nov 17 |
nicklas |
154 |
{ |
4620 |
16 Nov 17 |
nicklas |
155 |
this.dp = dp; |
4620 |
16 Nov 17 |
nicklas |
156 |
} |
4620 |
16 Nov 17 |
nicklas |
157 |
|
4620 |
16 Nov 17 |
nicklas |
158 |
/** |
4620 |
16 Nov 17 |
nicklas |
Get the read depth value. |
4620 |
16 Nov 17 |
nicklas |
160 |
*/ |
4620 |
16 Nov 17 |
nicklas |
161 |
public int getDP() |
4620 |
16 Nov 17 |
nicklas |
162 |
{ |
4620 |
16 Nov 17 |
nicklas |
163 |
return dp; |
4620 |
16 Nov 17 |
nicklas |
164 |
} |
4620 |
16 Nov 17 |
nicklas |
165 |
|
4620 |
16 Nov 17 |
nicklas |
166 |
/** |
4620 |
16 Nov 17 |
nicklas |
Set the allelic depths for the ref and alt alleles. |
4620 |
16 Nov 17 |
nicklas |
168 |
*/ |
4620 |
16 Nov 17 |
nicklas |
169 |
public void setAD(String ad) |
4620 |
16 Nov 17 |
nicklas |
170 |
{ |
4620 |
16 Nov 17 |
nicklas |
171 |
this.ad = ad; |
5039 |
19 Oct 18 |
nicklas |
172 |
String[] tmp = ad.split(","); |
5039 |
19 Oct 18 |
nicklas |
173 |
if (tmp.length == 2) |
5039 |
19 Oct 18 |
nicklas |
174 |
{ |
5039 |
19 Oct 18 |
nicklas |
175 |
this.adRef = Values.getInt(tmp[0]); |
5039 |
19 Oct 18 |
nicklas |
176 |
this.adAlt = Values.getInt(tmp[1]); |
5039 |
19 Oct 18 |
nicklas |
177 |
} |
4620 |
16 Nov 17 |
nicklas |
178 |
} |
4620 |
16 Nov 17 |
nicklas |
179 |
|
4620 |
16 Nov 17 |
nicklas |
180 |
/** |
4620 |
16 Nov 17 |
nicklas |
Get the allelic depths for the ref and alt alleles. |
4620 |
16 Nov 17 |
nicklas |
182 |
*/ |
4620 |
16 Nov 17 |
nicklas |
183 |
public String getAD() |
4620 |
16 Nov 17 |
nicklas |
184 |
{ |
4620 |
16 Nov 17 |
nicklas |
185 |
return ad; |
4620 |
16 Nov 17 |
nicklas |
186 |
} |
4620 |
16 Nov 17 |
nicklas |
187 |
|
4620 |
16 Nov 17 |
nicklas |
188 |
/** |
5039 |
19 Oct 18 |
nicklas |
Get the number of reads for the reference allelle. |
5039 |
19 Oct 18 |
nicklas |
@since 4.20 |
5039 |
19 Oct 18 |
nicklas |
191 |
*/ |
5039 |
19 Oct 18 |
nicklas |
192 |
public int getADRef() |
5039 |
19 Oct 18 |
nicklas |
193 |
{ |
5039 |
19 Oct 18 |
nicklas |
194 |
return adRef; |
5039 |
19 Oct 18 |
nicklas |
195 |
} |
5039 |
19 Oct 18 |
nicklas |
196 |
|
5039 |
19 Oct 18 |
nicklas |
197 |
/** |
5039 |
19 Oct 18 |
nicklas |
Get the number of reads for the alternate allelle. |
5039 |
19 Oct 18 |
nicklas |
@since 4.20 |
5039 |
19 Oct 18 |
nicklas |
200 |
*/ |
5039 |
19 Oct 18 |
nicklas |
201 |
public int getADAlt() |
5039 |
19 Oct 18 |
nicklas |
202 |
{ |
5039 |
19 Oct 18 |
nicklas |
203 |
return adAlt; |
5039 |
19 Oct 18 |
nicklas |
204 |
} |
5039 |
19 Oct 18 |
nicklas |
205 |
|
5039 |
19 Oct 18 |
nicklas |
206 |
/** |
6450 |
21 Oct 21 |
nicklas |
Set the allele frequency of the alternate allele. |
6450 |
21 Oct 21 |
nicklas |
@since 4.34 |
6450 |
21 Oct 21 |
nicklas |
209 |
*/ |
6450 |
21 Oct 21 |
nicklas |
210 |
public void setAF(float af) |
6450 |
21 Oct 21 |
nicklas |
211 |
{ |
6450 |
21 Oct 21 |
nicklas |
212 |
this.af = af; |
6450 |
21 Oct 21 |
nicklas |
213 |
} |
6450 |
21 Oct 21 |
nicklas |
214 |
|
6450 |
21 Oct 21 |
nicklas |
215 |
/** |
6450 |
21 Oct 21 |
nicklas |
Get the allele frequency of the alternate allele. |
6450 |
21 Oct 21 |
nicklas |
@since 4.34 |
6450 |
21 Oct 21 |
nicklas |
218 |
*/ |
6450 |
21 Oct 21 |
nicklas |
219 |
public float getAF() |
6450 |
21 Oct 21 |
nicklas |
220 |
{ |
6450 |
21 Oct 21 |
nicklas |
221 |
return af; |
6450 |
21 Oct 21 |
nicklas |
222 |
} |
6450 |
21 Oct 21 |
nicklas |
223 |
|
6450 |
21 Oct 21 |
nicklas |
224 |
/** |
5039 |
19 Oct 18 |
nicklas |
Get the mBAF value: max(ADRef, ADAlt) / DP |
5039 |
19 Oct 18 |
nicklas |
@since 4.20 |
5039 |
19 Oct 18 |
nicklas |
227 |
*/ |
5039 |
19 Oct 18 |
nicklas |
228 |
public float getMBaf() |
5039 |
19 Oct 18 |
nicklas |
229 |
{ |
5039 |
19 Oct 18 |
nicklas |
230 |
return dp > 0 ? (float)Math.max(adRef, adAlt) / (float)dp : Float.NaN; |
5039 |
19 Oct 18 |
nicklas |
231 |
} |
5039 |
19 Oct 18 |
nicklas |
232 |
|
5039 |
19 Oct 18 |
nicklas |
233 |
/** |
4620 |
16 Nov 17 |
nicklas |
Get all information as a JSON object. A new JSON object |
4620 |
16 Nov 17 |
nicklas |
is created each time this method is called. |
4620 |
16 Nov 17 |
nicklas |
236 |
*/ |
6513 |
07 Dec 21 |
nicklas |
237 |
public JSONObject asJSONObject(GenotypeQualityModel qualityModel) |
4620 |
16 Nov 17 |
nicklas |
238 |
{ |
4620 |
16 Nov 17 |
nicklas |
239 |
JSONObject json = new JSONObject(); |
4620 |
16 Nov 17 |
nicklas |
240 |
json.put("id", getId()); |
4620 |
16 Nov 17 |
nicklas |
241 |
json.put("GT", getGT()); |
4620 |
16 Nov 17 |
nicklas |
242 |
json.put("genoType", getGenoType().name()); |
7414 |
10 Nov 23 |
nicklas |
243 |
if (isPhased()) json.put("PS", getPS()); |
4620 |
16 Nov 17 |
nicklas |
244 |
json.put("GQ", getGQ()); |
4620 |
16 Nov 17 |
nicklas |
245 |
json.put("DP", getDP()); |
4620 |
16 Nov 17 |
nicklas |
246 |
json.put("AD", getAD()); |
6450 |
21 Oct 21 |
nicklas |
247 |
json.put("AF", getAF()); |
6513 |
07 Dec 21 |
nicklas |
248 |
if (qualityModel != null) |
6513 |
07 Dec 21 |
nicklas |
249 |
{ |
6513 |
07 Dec 21 |
nicklas |
250 |
if (qualityModel.isHigh(getGQ())) |
6513 |
07 Dec 21 |
nicklas |
251 |
{ |
6513 |
07 Dec 21 |
nicklas |
252 |
json.put("isHighQuality", 1); |
6513 |
07 Dec 21 |
nicklas |
253 |
} |
6513 |
07 Dec 21 |
nicklas |
254 |
else if (qualityModel.isLow(getGQ())) |
6513 |
07 Dec 21 |
nicklas |
255 |
{ |
6513 |
07 Dec 21 |
nicklas |
256 |
json.put("isLowQuality", 1); |
6513 |
07 Dec 21 |
nicklas |
257 |
} |
6513 |
07 Dec 21 |
nicklas |
258 |
} |
4620 |
16 Nov 17 |
nicklas |
259 |
return json; |
4620 |
16 Nov 17 |
nicklas |
260 |
} |
4620 |
16 Nov 17 |
nicklas |
261 |
|
4620 |
16 Nov 17 |
nicklas |
262 |
} |