4620 |
16 Nov 17 |
nicklas |
1 |
package net.sf.basedb.reggie.vcf; |
4620 |
16 Nov 17 |
nicklas |
2 |
|
4620 |
16 Nov 17 |
nicklas |
3 |
import java.util.HashMap; |
4620 |
16 Nov 17 |
nicklas |
4 |
import java.util.Map; |
4620 |
16 Nov 17 |
nicklas |
5 |
|
4620 |
16 Nov 17 |
nicklas |
6 |
import net.sf.basedb.util.Values; |
4620 |
16 Nov 17 |
nicklas |
7 |
|
4620 |
16 Nov 17 |
nicklas |
8 |
|
4620 |
16 Nov 17 |
nicklas |
9 |
/** |
4620 |
16 Nov 17 |
nicklas |
A helper class for extracting information from the |
4620 |
16 Nov 17 |
nicklas |
sample column with a known given FORMAT value. |
4620 |
16 Nov 17 |
nicklas |
We provide support for the following FORMAT properties: |
4620 |
16 Nov 17 |
nicklas |
13 |
|
4620 |
16 Nov 17 |
nicklas |
GT, DP, GQ, AD |
4620 |
16 Nov 17 |
nicklas |
15 |
|
4620 |
16 Nov 17 |
nicklas |
@author nicklas |
4620 |
16 Nov 17 |
nicklas |
@since 4.13 |
4620 |
16 Nov 17 |
nicklas |
18 |
*/ |
4620 |
16 Nov 17 |
nicklas |
19 |
public class FormatFactory |
4620 |
16 Nov 17 |
nicklas |
20 |
{ |
4620 |
16 Nov 17 |
nicklas |
21 |
|
4620 |
16 Nov 17 |
nicklas |
22 |
private final Map<String, Format> formats; |
4620 |
16 Nov 17 |
nicklas |
23 |
|
4620 |
16 Nov 17 |
nicklas |
24 |
/** |
4620 |
16 Nov 17 |
nicklas |
Create a new format factory instance. |
4620 |
16 Nov 17 |
nicklas |
Use the {@link #getFormat(String)} method to |
4620 |
16 Nov 17 |
nicklas |
get actual format instances. |
4620 |
16 Nov 17 |
nicklas |
28 |
*/ |
4620 |
16 Nov 17 |
nicklas |
29 |
public FormatFactory() |
4620 |
16 Nov 17 |
nicklas |
30 |
{ |
4620 |
16 Nov 17 |
nicklas |
31 |
this.formats = new HashMap<>(); |
4620 |
16 Nov 17 |
nicklas |
32 |
} |
4620 |
16 Nov 17 |
nicklas |
33 |
|
4620 |
16 Nov 17 |
nicklas |
34 |
/** |
4620 |
16 Nov 17 |
nicklas |
Get a Format instance for the given format. |
4620 |
16 Nov 17 |
nicklas |
If an existing format exists it is re-used, |
4620 |
16 Nov 17 |
nicklas |
otherwise a new instance is created. |
4620 |
16 Nov 17 |
nicklas |
@param format A string with colon-separated format codes |
4620 |
16 Nov 17 |
nicklas |
39 |
*/ |
6455 |
25 Oct 21 |
nicklas |
40 |
public Format getFormat(String format, boolean swapRefAlt) |
4620 |
16 Nov 17 |
nicklas |
41 |
{ |
6455 |
25 Oct 21 |
nicklas |
42 |
Format f = formats.get(format+":"+swapRefAlt); |
4620 |
16 Nov 17 |
nicklas |
43 |
if (f == null) |
4620 |
16 Nov 17 |
nicklas |
44 |
{ |
6455 |
25 Oct 21 |
nicklas |
45 |
f = new Format(format, swapRefAlt); |
6455 |
25 Oct 21 |
nicklas |
46 |
formats.put(format+":"+swapRefAlt, f); |
4620 |
16 Nov 17 |
nicklas |
47 |
} |
4620 |
16 Nov 17 |
nicklas |
48 |
return f; |
4620 |
16 Nov 17 |
nicklas |
49 |
} |
4620 |
16 Nov 17 |
nicklas |
50 |
|
4620 |
16 Nov 17 |
nicklas |
51 |
public static class Format |
4620 |
16 Nov 17 |
nicklas |
52 |
{ |
4620 |
16 Nov 17 |
nicklas |
// Number of data columns |
6455 |
25 Oct 21 |
nicklas |
54 |
private boolean swapRefAlt; |
4620 |
16 Nov 17 |
nicklas |
55 |
private int size; |
4620 |
16 Nov 17 |
nicklas |
// Column indexes |
6450 |
21 Oct 21 |
nicklas |
57 |
private int idxGT = -1; |
7414 |
10 Nov 23 |
nicklas |
58 |
private int idxPS = -1; |
6450 |
21 Oct 21 |
nicklas |
59 |
private int idxDP = -1; |
6450 |
21 Oct 21 |
nicklas |
60 |
private int idxGQ = -1; |
6450 |
21 Oct 21 |
nicklas |
61 |
private int idxAD = -1; |
6450 |
21 Oct 21 |
nicklas |
62 |
private int idxBAF = -1; |
4620 |
16 Nov 17 |
nicklas |
63 |
|
6455 |
25 Oct 21 |
nicklas |
64 |
Format(String format, boolean swapRefAlt) |
4620 |
16 Nov 17 |
nicklas |
65 |
{ |
6455 |
25 Oct 21 |
nicklas |
66 |
this.swapRefAlt = swapRefAlt; |
4620 |
16 Nov 17 |
nicklas |
67 |
String[] tmp = format.split(":"); |
6455 |
25 Oct 21 |
nicklas |
68 |
this.size = tmp.length; |
4620 |
16 Nov 17 |
nicklas |
69 |
|
4620 |
16 Nov 17 |
nicklas |
70 |
for (int i = 0; i < tmp.length; i++) |
4620 |
16 Nov 17 |
nicklas |
71 |
{ |
4620 |
16 Nov 17 |
nicklas |
72 |
String fmt = tmp[i]; |
4620 |
16 Nov 17 |
nicklas |
73 |
if ("GT".equals(fmt)) |
4620 |
16 Nov 17 |
nicklas |
74 |
{ |
4620 |
16 Nov 17 |
nicklas |
75 |
idxGT = i; |
4620 |
16 Nov 17 |
nicklas |
76 |
} |
4620 |
16 Nov 17 |
nicklas |
77 |
else if ("DP".equals(fmt)) |
4620 |
16 Nov 17 |
nicklas |
78 |
{ |
4620 |
16 Nov 17 |
nicklas |
79 |
idxDP = i; |
4620 |
16 Nov 17 |
nicklas |
80 |
} |
4620 |
16 Nov 17 |
nicklas |
81 |
else if ("GQ".equals(fmt)) |
4620 |
16 Nov 17 |
nicklas |
82 |
{ |
4620 |
16 Nov 17 |
nicklas |
83 |
idxGQ = i; |
4620 |
16 Nov 17 |
nicklas |
84 |
} |
4620 |
16 Nov 17 |
nicklas |
85 |
else if ("AD".equals(fmt)) |
4620 |
16 Nov 17 |
nicklas |
86 |
{ |
4620 |
16 Nov 17 |
nicklas |
87 |
idxAD = i; |
4620 |
16 Nov 17 |
nicklas |
88 |
} |
6450 |
21 Oct 21 |
nicklas |
89 |
else if ("BAF".equals(fmt)) |
6450 |
21 Oct 21 |
nicklas |
90 |
{ |
6450 |
21 Oct 21 |
nicklas |
91 |
idxBAF = i; |
6450 |
21 Oct 21 |
nicklas |
92 |
} |
7414 |
10 Nov 23 |
nicklas |
93 |
else if ("PS".equals(fmt)) |
7414 |
10 Nov 23 |
nicklas |
94 |
{ |
7414 |
10 Nov 23 |
nicklas |
95 |
idxPS = i; |
7414 |
10 Nov 23 |
nicklas |
96 |
} |
4620 |
16 Nov 17 |
nicklas |
97 |
} |
4620 |
16 Nov 17 |
nicklas |
98 |
} |
4620 |
16 Nov 17 |
nicklas |
99 |
|
4620 |
16 Nov 17 |
nicklas |
100 |
/** |
4620 |
16 Nov 17 |
nicklas |
Extracts data from the 'data' string to |
4620 |
16 Nov 17 |
nicklas |
the 'GtData' instance. |
4620 |
16 Nov 17 |
nicklas |
@returns The 'gt' instance |
4620 |
16 Nov 17 |
nicklas |
104 |
*/ |
4620 |
16 Nov 17 |
nicklas |
105 |
public GtData extractTo(String data, GtData gt) |
4620 |
16 Nov 17 |
nicklas |
106 |
{ |
4620 |
16 Nov 17 |
nicklas |
107 |
if (data != null) |
4620 |
16 Nov 17 |
nicklas |
108 |
{ |
4620 |
16 Nov 17 |
nicklas |
109 |
String[] tmp = data.split(":"); |
4620 |
16 Nov 17 |
nicklas |
110 |
if (tmp.length == size) |
4620 |
16 Nov 17 |
nicklas |
111 |
{ |
6455 |
25 Oct 21 |
nicklas |
112 |
gt.setGT(idxGT == -1 ? "./." : swapRefAlt ? swapGT(tmp[idxGT]) : tmp[idxGT]); |
7414 |
10 Nov 23 |
nicklas |
113 |
gt.setPS(idxPS == -1 ? -1 : Values.getInt(tmp[idxPS])); |
6450 |
21 Oct 21 |
nicklas |
114 |
gt.setDP(idxDP == -1 ? -1 : Values.getInt(tmp[idxDP])); |
6513 |
07 Dec 21 |
nicklas |
115 |
gt.setGQ(idxGQ == -1 ? Float.NaN : Values.getFloat(tmp[idxGQ])); |
6455 |
25 Oct 21 |
nicklas |
116 |
gt.setAD(idxAD == -1 ? "" : swapRefAlt ? swapAD(tmp[idxAD]) : tmp[idxAD]); |
6455 |
25 Oct 21 |
nicklas |
117 |
gt.setAF(idxBAF == -1 ? Float.NaN : swapRefAlt ? 1-Values.getFloat(tmp[idxBAF]) : Values.getFloat(tmp[idxBAF])); |
4620 |
16 Nov 17 |
nicklas |
118 |
} |
4620 |
16 Nov 17 |
nicklas |
119 |
} |
4620 |
16 Nov 17 |
nicklas |
120 |
return gt; |
4620 |
16 Nov 17 |
nicklas |
121 |
} |
4620 |
16 Nov 17 |
nicklas |
122 |
|
6455 |
25 Oct 21 |
nicklas |
123 |
private String swapGT(String val) |
6455 |
25 Oct 21 |
nicklas |
124 |
{ |
6455 |
25 Oct 21 |
nicklas |
125 |
if ("0/0".equals(val)) |
6455 |
25 Oct 21 |
nicklas |
126 |
{ |
6455 |
25 Oct 21 |
nicklas |
127 |
val = "1/1"; |
6455 |
25 Oct 21 |
nicklas |
128 |
} |
6455 |
25 Oct 21 |
nicklas |
129 |
else if ("1/1".equals(val)) |
6455 |
25 Oct 21 |
nicklas |
130 |
{ |
6455 |
25 Oct 21 |
nicklas |
131 |
val = "0/0"; |
6455 |
25 Oct 21 |
nicklas |
132 |
} |
6455 |
25 Oct 21 |
nicklas |
133 |
return val; |
6455 |
25 Oct 21 |
nicklas |
134 |
} |
6455 |
25 Oct 21 |
nicklas |
135 |
|
6455 |
25 Oct 21 |
nicklas |
136 |
private String swapAD(String val) |
6455 |
25 Oct 21 |
nicklas |
137 |
{ |
6455 |
25 Oct 21 |
nicklas |
138 |
int i = val.indexOf(','); |
6455 |
25 Oct 21 |
nicklas |
139 |
return val.substring(i+1)+','+val.substring(0, i); |
6455 |
25 Oct 21 |
nicklas |
140 |
} |
4620 |
16 Nov 17 |
nicklas |
141 |
} |
4620 |
16 Nov 17 |
nicklas |
142 |
} |