6170 |
17 Mar 21 |
nicklas |
1 |
package net.sf.basedb.varsearch.vcf; |
6170 |
17 Mar 21 |
nicklas |
2 |
|
6170 |
17 Mar 21 |
nicklas |
3 |
import java.io.BufferedReader; |
6170 |
17 Mar 21 |
nicklas |
4 |
import java.io.Closeable; |
6170 |
17 Mar 21 |
nicklas |
5 |
import java.io.IOException; |
6170 |
17 Mar 21 |
nicklas |
6 |
import java.io.InputStreamReader; |
6376 |
07 Sep 21 |
nicklas |
7 |
import java.util.ArrayList; |
6376 |
07 Sep 21 |
nicklas |
8 |
import java.util.List; |
6170 |
17 Mar 21 |
nicklas |
9 |
import java.util.regex.Matcher; |
6170 |
17 Mar 21 |
nicklas |
10 |
import java.util.regex.Pattern; |
6170 |
17 Mar 21 |
nicklas |
11 |
|
7303 |
28 Aug 23 |
nicklas |
12 |
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; |
7303 |
28 Aug 23 |
nicklas |
13 |
|
6170 |
17 Mar 21 |
nicklas |
14 |
import net.sf.basedb.core.File; |
6170 |
17 Mar 21 |
nicklas |
15 |
import net.sf.basedb.util.FileUtil; |
6556 |
28 Jan 22 |
nicklas |
16 |
import net.sf.basedb.util.InputStreamTracker; |
6376 |
07 Sep 21 |
nicklas |
17 |
import net.sf.basedb.util.Values; |
6551 |
26 Jan 22 |
nicklas |
18 |
import net.sf.basedb.util.filter.Filter; |
6170 |
17 Mar 21 |
nicklas |
19 |
|
6170 |
17 Mar 21 |
nicklas |
20 |
/** |
6170 |
17 Mar 21 |
nicklas |
Parser for VCF files. |
6170 |
17 Mar 21 |
nicklas |
22 |
*/ |
6170 |
17 Mar 21 |
nicklas |
23 |
public class VcfParser |
6170 |
17 Mar 21 |
nicklas |
24 |
implements Closeable |
6170 |
17 Mar 21 |
nicklas |
25 |
{ |
6170 |
17 Mar 21 |
nicklas |
26 |
|
7303 |
28 Aug 23 |
nicklas |
27 |
private final InputStreamTracker vcfIn; |
7303 |
28 Aug 23 |
nicklas |
28 |
private final BufferedReader vcfReader; |
6170 |
17 Mar 21 |
nicklas |
29 |
|
6170 |
17 Mar 21 |
nicklas |
30 |
private VcfHeader header; |
6170 |
17 Mar 21 |
nicklas |
31 |
private int lineNo; |
6170 |
17 Mar 21 |
nicklas |
32 |
private int infoCol; |
6170 |
17 Mar 21 |
nicklas |
33 |
private int formatCol; |
6170 |
17 Mar 21 |
nicklas |
34 |
|
6170 |
17 Mar 21 |
nicklas |
35 |
public VcfParser(File vcf) |
6170 |
17 Mar 21 |
nicklas |
36 |
throws IOException |
6170 |
17 Mar 21 |
nicklas |
37 |
{ |
6556 |
28 Jan 22 |
nicklas |
38 |
vcfIn = new InputStreamTracker(vcf.getDownloadStream(0)); |
6556 |
28 Jan 22 |
nicklas |
39 |
vcfReader = new BufferedReader(new InputStreamReader( |
7303 |
28 Aug 23 |
nicklas |
40 |
vcf.getName().endsWith(".gz") ? new GzipCompressorInputStream(vcfIn, true) : vcfIn)); |
6170 |
17 Mar 21 |
nicklas |
41 |
} |
6170 |
17 Mar 21 |
nicklas |
42 |
|
6556 |
28 Jan 22 |
nicklas |
43 |
/** |
6556 |
28 Jan 22 |
nicklas |
Get the number of bytes that has been parsed from the VCF file so far. |
6556 |
28 Jan 22 |
nicklas |
@since 1.5 |
6556 |
28 Jan 22 |
nicklas |
46 |
*/ |
6556 |
28 Jan 22 |
nicklas |
47 |
public long getParsedBytes() |
6556 |
28 Jan 22 |
nicklas |
48 |
{ |
6556 |
28 Jan 22 |
nicklas |
49 |
return vcfIn.getNumRead(); |
6556 |
28 Jan 22 |
nicklas |
50 |
} |
6556 |
28 Jan 22 |
nicklas |
51 |
|
6170 |
17 Mar 21 |
nicklas |
52 |
@Override |
6170 |
17 Mar 21 |
nicklas |
53 |
public void close() |
6170 |
17 Mar 21 |
nicklas |
54 |
{ |
6170 |
17 Mar 21 |
nicklas |
55 |
FileUtil.close(vcfReader); |
6170 |
17 Mar 21 |
nicklas |
56 |
FileUtil.close(vcfIn); |
6170 |
17 Mar 21 |
nicklas |
57 |
} |
6170 |
17 Mar 21 |
nicklas |
58 |
|
6170 |
17 Mar 21 |
nicklas |
59 |
/** |
6170 |
17 Mar 21 |
nicklas |
Parse the file until the header line is found. |
6170 |
17 Mar 21 |
nicklas |
@return A VcfHeader instance or null if not header is found |
6170 |
17 Mar 21 |
nicklas |
62 |
*/ |
6170 |
17 Mar 21 |
nicklas |
63 |
public VcfHeader parseHeaders() |
6170 |
17 Mar 21 |
nicklas |
64 |
throws IOException |
6170 |
17 Mar 21 |
nicklas |
65 |
{ |
6170 |
17 Mar 21 |
nicklas |
// Read until we find #CHROM |
6170 |
17 Mar 21 |
nicklas |
67 |
String line; |
6170 |
17 Mar 21 |
nicklas |
68 |
String annHeader = null; |
6170 |
17 Mar 21 |
nicklas |
69 |
do |
6170 |
17 Mar 21 |
nicklas |
70 |
{ |
6170 |
17 Mar 21 |
nicklas |
71 |
line = vcfReader.readLine(); |
6170 |
17 Mar 21 |
nicklas |
72 |
if (line != null && line.contains("ID=ANN")) annHeader = line; |
6170 |
17 Mar 21 |
nicklas |
73 |
} while (line != null && !line.startsWith("#CHROM")); |
6170 |
17 Mar 21 |
nicklas |
74 |
|
6170 |
17 Mar 21 |
nicklas |
75 |
if (line != null) |
6170 |
17 Mar 21 |
nicklas |
76 |
{ |
6170 |
17 Mar 21 |
nicklas |
77 |
header = new VcfHeader(line, annHeader); |
6170 |
17 Mar 21 |
nicklas |
78 |
lineNo = 0; |
6170 |
17 Mar 21 |
nicklas |
79 |
infoCol = header.indexOf("INFO"); |
6170 |
17 Mar 21 |
nicklas |
80 |
formatCol = header.indexOf("FORMAT"); |
6170 |
17 Mar 21 |
nicklas |
81 |
} |
6170 |
17 Mar 21 |
nicklas |
82 |
|
6170 |
17 Mar 21 |
nicklas |
83 |
return header; |
6170 |
17 Mar 21 |
nicklas |
84 |
} |
6170 |
17 Mar 21 |
nicklas |
85 |
|
6170 |
17 Mar 21 |
nicklas |
86 |
/** |
6170 |
17 Mar 21 |
nicklas |
Parse the next line of data. |
6170 |
17 Mar 21 |
nicklas |
@return A VcfLine instance or null if there are no more data |
6170 |
17 Mar 21 |
nicklas |
89 |
*/ |
6170 |
17 Mar 21 |
nicklas |
90 |
public VcfLine nextLine() |
6170 |
17 Mar 21 |
nicklas |
91 |
throws IOException |
6170 |
17 Mar 21 |
nicklas |
92 |
{ |
6170 |
17 Mar 21 |
nicklas |
93 |
|
6170 |
17 Mar 21 |
nicklas |
94 |
String line = vcfReader.readLine(); |
6170 |
17 Mar 21 |
nicklas |
95 |
if (line == null) return null; |
6170 |
17 Mar 21 |
nicklas |
96 |
|
6170 |
17 Mar 21 |
nicklas |
97 |
lineNo++; |
6170 |
17 Mar 21 |
nicklas |
98 |
VcfLine variant = new VcfLine(lineNo, line, infoCol, formatCol); |
6170 |
17 Mar 21 |
nicklas |
99 |
|
6170 |
17 Mar 21 |
nicklas |
100 |
return variant; |
6170 |
17 Mar 21 |
nicklas |
101 |
} |
6170 |
17 Mar 21 |
nicklas |
102 |
|
6170 |
17 Mar 21 |
nicklas |
103 |
/** |
6551 |
26 Jan 22 |
nicklas |
Parse the VCF file until a line that is matching the filter is found. |
6170 |
17 Mar 21 |
nicklas |
@return A VcfLine instance or null if the line doesn't exists |
6170 |
17 Mar 21 |
nicklas |
106 |
*/ |
6551 |
26 Jan 22 |
nicklas |
107 |
public VcfLine parseToLine(Filter<VcfLine> toLine) |
6170 |
17 Mar 21 |
nicklas |
108 |
throws IOException |
6170 |
17 Mar 21 |
nicklas |
109 |
{ |
6170 |
17 Mar 21 |
nicklas |
110 |
VcfLine line = null; |
6170 |
17 Mar 21 |
nicklas |
111 |
do |
6170 |
17 Mar 21 |
nicklas |
112 |
{ |
6170 |
17 Mar 21 |
nicklas |
113 |
line = nextLine(); |
6551 |
26 Jan 22 |
nicklas |
114 |
} while (line != null && !toLine.evaluate(line)); |
6170 |
17 Mar 21 |
nicklas |
115 |
return line; |
6170 |
17 Mar 21 |
nicklas |
116 |
} |
6170 |
17 Mar 21 |
nicklas |
117 |
|
6170 |
17 Mar 21 |
nicklas |
118 |
private static final Info[] EMPTY_INFO = new Info[0]; |
6170 |
17 Mar 21 |
nicklas |
119 |
private static final String[] EMPTY = new String[0]; |
6170 |
17 Mar 21 |
nicklas |
120 |
|
6170 |
17 Mar 21 |
nicklas |
121 |
/** |
6170 |
17 Mar 21 |
nicklas |
Hold information about the VCF header |
6170 |
17 Mar 21 |
nicklas |
123 |
*/ |
6170 |
17 Mar 21 |
nicklas |
124 |
public static class VcfHeader |
6170 |
17 Mar 21 |
nicklas |
125 |
{ |
6170 |
17 Mar 21 |
nicklas |
126 |
|
6170 |
17 Mar 21 |
nicklas |
127 |
private final String[] cols; |
6170 |
17 Mar 21 |
nicklas |
128 |
private String[] ann = EMPTY; |
6170 |
17 Mar 21 |
nicklas |
129 |
|
6170 |
17 Mar 21 |
nicklas |
130 |
/** |
6170 |
17 Mar 21 |
nicklas |
Create header information for the VCF. |
6170 |
17 Mar 21 |
nicklas |
@param header The header line in the VCF (starting with #CHROM) |
6170 |
17 Mar 21 |
nicklas |
@param annHeader The ANN header is needed to parse columns in the ANN annotation |
6170 |
17 Mar 21 |
nicklas |
134 |
*/ |
6170 |
17 Mar 21 |
nicklas |
135 |
VcfHeader(String header, String annHeader) |
6170 |
17 Mar 21 |
nicklas |
136 |
{ |
6170 |
17 Mar 21 |
nicklas |
137 |
this.cols = header.split("\\t"); |
6170 |
17 Mar 21 |
nicklas |
138 |
if (annHeader != null) |
6170 |
17 Mar 21 |
nicklas |
139 |
{ |
6170 |
17 Mar 21 |
nicklas |
140 |
Matcher m = Pattern.compile("Description=.*\\'(.*)\\'").matcher(annHeader); |
6170 |
17 Mar 21 |
nicklas |
141 |
if (m.find()) this.ann = m.group(1).split("\\s\\|\\s"); |
6170 |
17 Mar 21 |
nicklas |
142 |
} |
6170 |
17 Mar 21 |
nicklas |
143 |
} |
6170 |
17 Mar 21 |
nicklas |
144 |
|
6170 |
17 Mar 21 |
nicklas |
145 |
/** |
6174 |
19 Mar 21 |
nicklas |
Get all column headers. |
6174 |
19 Mar 21 |
nicklas |
147 |
*/ |
6174 |
19 Mar 21 |
nicklas |
148 |
public String[] headers() |
6174 |
19 Mar 21 |
nicklas |
149 |
{ |
6174 |
19 Mar 21 |
nicklas |
150 |
return cols; |
6174 |
19 Mar 21 |
nicklas |
151 |
} |
6174 |
19 Mar 21 |
nicklas |
152 |
|
6174 |
19 Mar 21 |
nicklas |
153 |
/** |
6170 |
17 Mar 21 |
nicklas |
Find the column index of the given header. |
6170 |
17 Mar 21 |
nicklas |
@return -1 if the header is not found |
6170 |
17 Mar 21 |
nicklas |
156 |
*/ |
6170 |
17 Mar 21 |
nicklas |
157 |
public int indexOf(String header) |
6170 |
17 Mar 21 |
nicklas |
158 |
{ |
6170 |
17 Mar 21 |
nicklas |
159 |
int idx = 0; |
6170 |
17 Mar 21 |
nicklas |
160 |
while (idx < cols.length) |
6170 |
17 Mar 21 |
nicklas |
161 |
{ |
6170 |
17 Mar 21 |
nicklas |
162 |
if (header.equals(cols[idx])) return idx; |
6170 |
17 Mar 21 |
nicklas |
163 |
idx++; |
6170 |
17 Mar 21 |
nicklas |
164 |
} |
6170 |
17 Mar 21 |
nicklas |
165 |
return -1; |
6170 |
17 Mar 21 |
nicklas |
166 |
} |
6170 |
17 Mar 21 |
nicklas |
167 |
|
6170 |
17 Mar 21 |
nicklas |
168 |
/** |
6174 |
19 Mar 21 |
nicklas |
Get all ANN column headers. |
6174 |
19 Mar 21 |
nicklas |
170 |
*/ |
6174 |
19 Mar 21 |
nicklas |
171 |
public String[] annHeaders() |
6174 |
19 Mar 21 |
nicklas |
172 |
{ |
6174 |
19 Mar 21 |
nicklas |
173 |
return ann; |
6174 |
19 Mar 21 |
nicklas |
174 |
} |
6174 |
19 Mar 21 |
nicklas |
175 |
|
6174 |
19 Mar 21 |
nicklas |
176 |
/** |
6170 |
17 Mar 21 |
nicklas |
Find the column index of the given ANN header. |
6170 |
17 Mar 21 |
nicklas |
@return -1 if the header is not found |
6170 |
17 Mar 21 |
nicklas |
179 |
*/ |
6170 |
17 Mar 21 |
nicklas |
180 |
public int annIndexOf(String header) |
6170 |
17 Mar 21 |
nicklas |
181 |
{ |
6170 |
17 Mar 21 |
nicklas |
182 |
int idx = 0; |
6170 |
17 Mar 21 |
nicklas |
183 |
while (idx < ann.length) |
6170 |
17 Mar 21 |
nicklas |
184 |
{ |
6170 |
17 Mar 21 |
nicklas |
185 |
if (header.equals(ann[idx])) return idx; |
6170 |
17 Mar 21 |
nicklas |
186 |
idx++; |
6170 |
17 Mar 21 |
nicklas |
187 |
} |
6170 |
17 Mar 21 |
nicklas |
188 |
return -1; |
6170 |
17 Mar 21 |
nicklas |
189 |
} |
6170 |
17 Mar 21 |
nicklas |
190 |
} |
6170 |
17 Mar 21 |
nicklas |
191 |
|
6170 |
17 Mar 21 |
nicklas |
192 |
/** |
6170 |
17 Mar 21 |
nicklas |
Hold information about a VCF data line |
6170 |
17 Mar 21 |
nicklas |
194 |
*/ |
6170 |
17 Mar 21 |
nicklas |
195 |
public static class VcfLine |
6170 |
17 Mar 21 |
nicklas |
196 |
{ |
6170 |
17 Mar 21 |
nicklas |
197 |
private final String line; |
6170 |
17 Mar 21 |
nicklas |
198 |
private final int lineNo; |
6170 |
17 Mar 21 |
nicklas |
199 |
private final String[] cols; |
6170 |
17 Mar 21 |
nicklas |
200 |
private Info[] info = EMPTY_INFO; |
6170 |
17 Mar 21 |
nicklas |
201 |
private String[] ann = EMPTY; |
6170 |
17 Mar 21 |
nicklas |
202 |
private Info[] format = EMPTY_INFO; |
6170 |
17 Mar 21 |
nicklas |
203 |
|
6170 |
17 Mar 21 |
nicklas |
204 |
/** |
6170 |
17 Mar 21 |
nicklas |
Create line information. |
6170 |
17 Mar 21 |
nicklas |
206 |
|
6170 |
17 Mar 21 |
nicklas |
@param line The text line |
6170 |
17 Mar 21 |
nicklas |
@param infoCol Column index for the INFO column |
6170 |
17 Mar 21 |
nicklas |
209 |
*/ |
6170 |
17 Mar 21 |
nicklas |
210 |
VcfLine(int lineNo, String line, int infoCol, int formatCol) |
6170 |
17 Mar 21 |
nicklas |
211 |
{ |
6170 |
17 Mar 21 |
nicklas |
212 |
this.lineNo = lineNo; |
6170 |
17 Mar 21 |
nicklas |
213 |
this.line = line; |
6170 |
17 Mar 21 |
nicklas |
214 |
this.cols = line.split("\\t"); |
6170 |
17 Mar 21 |
nicklas |
215 |
if (0 <= infoCol && infoCol < cols.length) parseInfo(cols[infoCol]); |
6170 |
17 Mar 21 |
nicklas |
216 |
if (0 <= formatCol && formatCol < cols.length-1) parseFormat(cols[formatCol], cols[formatCol+1]); |
6170 |
17 Mar 21 |
nicklas |
217 |
} |
6170 |
17 Mar 21 |
nicklas |
218 |
|
6170 |
17 Mar 21 |
nicklas |
219 |
|
6170 |
17 Mar 21 |
nicklas |
220 |
/** |
6170 |
17 Mar 21 |
nicklas |
The INFO string is formatted as key-value pairs |
6170 |
17 Mar 21 |
nicklas |
separated with semi-colon, or flags with only a key: |
6170 |
17 Mar 21 |
nicklas |
223 |
|
6170 |
17 Mar 21 |
nicklas |
KEY1=VALUE1;KEY2=VALUE2;FLAG1;KEY4=VALUE4;FLAG2;.... |
6170 |
17 Mar 21 |
nicklas |
225 |
*/ |
6170 |
17 Mar 21 |
nicklas |
226 |
private void parseInfo(String info) |
6170 |
17 Mar 21 |
nicklas |
227 |
{ |
6170 |
17 Mar 21 |
nicklas |
228 |
|
6170 |
17 Mar 21 |
nicklas |
229 |
String[] tmp = info.split("\\;"); |
6170 |
17 Mar 21 |
nicklas |
230 |
Info[] result = new Info[tmp.length]; |
6170 |
17 Mar 21 |
nicklas |
231 |
|
6170 |
17 Mar 21 |
nicklas |
232 |
int r = 0; |
6170 |
17 Mar 21 |
nicklas |
233 |
for (String t : tmp) |
6170 |
17 Mar 21 |
nicklas |
234 |
{ |
6170 |
17 Mar 21 |
nicklas |
235 |
String[] kv = t.split("\\=", 2); |
6170 |
17 Mar 21 |
nicklas |
236 |
if (kv.length == 1 || kv[1].length() == 0) continue; // We don't care about flags (so far) |
6170 |
17 Mar 21 |
nicklas |
237 |
|
6170 |
17 Mar 21 |
nicklas |
238 |
if ("ANN".equals(kv[0])) |
6170 |
17 Mar 21 |
nicklas |
239 |
{ |
6170 |
17 Mar 21 |
nicklas |
// The ANN field is a table with rows separated by comma (,) |
6383 |
15 Sep 21 |
nicklas |
// and columns separated by pipe (|). We get the FIRST ROW |
6383 |
15 Sep 21 |
nicklas |
// that is not one of (unless there are no other rows): |
6383 |
15 Sep 21 |
nicklas |
// * structural_interaction_variant. |
6383 |
15 Sep 21 |
nicklas |
// * protein_protein_contact |
6383 |
15 Sep 21 |
nicklas |
245 |
String[] annRows = kv[1].split("\\,"); |
6383 |
15 Sep 21 |
nicklas |
246 |
String annTmp = annRows[0]; |
6383 |
15 Sep 21 |
nicklas |
247 |
for (String annRow : annRows) |
6383 |
15 Sep 21 |
nicklas |
248 |
{ |
6383 |
15 Sep 21 |
nicklas |
// TODO -- if we find more effects that should be ignored a better solution is needed |
6383 |
15 Sep 21 |
nicklas |
250 |
if (annRow.contains("structural_interaction") || annRow.contains("protein_protein_contact")) |
6383 |
15 Sep 21 |
nicklas |
251 |
{ |
6383 |
15 Sep 21 |
nicklas |
252 |
continue; |
6383 |
15 Sep 21 |
nicklas |
253 |
} |
6383 |
15 Sep 21 |
nicklas |
254 |
annTmp = annRow; |
6383 |
15 Sep 21 |
nicklas |
255 |
break; |
6383 |
15 Sep 21 |
nicklas |
256 |
} |
6383 |
15 Sep 21 |
nicklas |
257 |
ann = annTmp.split("\\|", -1); |
6170 |
17 Mar 21 |
nicklas |
258 |
} |
6170 |
17 Mar 21 |
nicklas |
259 |
else |
6170 |
17 Mar 21 |
nicklas |
260 |
{ |
6170 |
17 Mar 21 |
nicklas |
261 |
result[r] = new Info(kv[0], kv[1]); |
6170 |
17 Mar 21 |
nicklas |
262 |
r++; |
6170 |
17 Mar 21 |
nicklas |
263 |
} |
6170 |
17 Mar 21 |
nicklas |
264 |
} |
6170 |
17 Mar 21 |
nicklas |
265 |
|
6170 |
17 Mar 21 |
nicklas |
266 |
if (r < result.length) |
6170 |
17 Mar 21 |
nicklas |
267 |
{ |
6170 |
17 Mar 21 |
nicklas |
268 |
Info[] result2 = new Info[r]; |
6170 |
17 Mar 21 |
nicklas |
269 |
System.arraycopy(result, 0, result2, 0, r); |
6170 |
17 Mar 21 |
nicklas |
270 |
result = result2; |
6170 |
17 Mar 21 |
nicklas |
271 |
} |
6170 |
17 Mar 21 |
nicklas |
272 |
this.info = result; |
6170 |
17 Mar 21 |
nicklas |
273 |
} |
6170 |
17 Mar 21 |
nicklas |
274 |
|
6170 |
17 Mar 21 |
nicklas |
275 |
/** |
6170 |
17 Mar 21 |
nicklas |
The FORMAT string is formatted as list with keys |
6170 |
17 Mar 21 |
nicklas |
separated with colon. VALUES is a separate string |
6170 |
17 Mar 21 |
nicklas |
with values also separated with colon. The two |
6170 |
17 Mar 21 |
nicklas |
strings should have the same number of elements. |
6170 |
17 Mar 21 |
nicklas |
280 |
|
6170 |
17 Mar 21 |
nicklas |
KEY1:KEY2:KEY3 VALUE1:VALUE2:VALUE3 |
6170 |
17 Mar 21 |
nicklas |
282 |
*/ |
6170 |
17 Mar 21 |
nicklas |
283 |
private void parseFormat(String format, String values) |
6170 |
17 Mar 21 |
nicklas |
284 |
{ |
6170 |
17 Mar 21 |
nicklas |
285 |
String[] keys = format.split("\\:"); |
6170 |
17 Mar 21 |
nicklas |
286 |
String[] vals = values.split("\\:"); |
6376 |
07 Sep 21 |
nicklas |
287 |
List<Info> result = new ArrayList<>(); |
6376 |
07 Sep 21 |
nicklas |
288 |
String ad = null; |
6376 |
07 Sep 21 |
nicklas |
289 |
String af = null; |
6376 |
07 Sep 21 |
nicklas |
290 |
String vd = null; |
6376 |
07 Sep 21 |
nicklas |
291 |
for (int i = 0; i < Math.min(keys.length, vals.length); i++) |
6170 |
17 Mar 21 |
nicklas |
292 |
{ |
6376 |
07 Sep 21 |
nicklas |
293 |
result.add(new Info(keys[i], vals[i])); |
6376 |
07 Sep 21 |
nicklas |
294 |
if ("AD".equals(keys[i])) ad = vals[i]; |
6376 |
07 Sep 21 |
nicklas |
295 |
if ("AF".equals(keys[i])) af = vals[i]; |
6376 |
07 Sep 21 |
nicklas |
296 |
if ("VD".equals(keys[i])) vd = vals[i]; |
6170 |
17 Mar 21 |
nicklas |
297 |
} |
6376 |
07 Sep 21 |
nicklas |
// If the AF and VD fields are missing calculate them from the AD field |
6376 |
07 Sep 21 |
nicklas |
299 |
if (ad != null) |
6376 |
07 Sep 21 |
nicklas |
300 |
{ |
6376 |
07 Sep 21 |
nicklas |
301 |
String[] tmp = ad.split(","); |
6404 |
17 Sep 21 |
nicklas |
302 |
if (af == null && tmp.length >= 2) |
6376 |
07 Sep 21 |
nicklas |
303 |
{ |
6376 |
07 Sep 21 |
nicklas |
304 |
float ref = Values.getFloat(tmp[0]); |
6376 |
07 Sep 21 |
nicklas |
305 |
float alt = Values.getFloat(tmp[1]); |
6376 |
07 Sep 21 |
nicklas |
306 |
if (ref > 0 || alt > 0) |
6376 |
07 Sep 21 |
nicklas |
307 |
{ |
6376 |
07 Sep 21 |
nicklas |
308 |
result.add(new Info("AF", Values.formatNumber(alt / (ref+alt), 3))); |
6376 |
07 Sep 21 |
nicklas |
309 |
} |
6376 |
07 Sep 21 |
nicklas |
310 |
} |
6404 |
17 Sep 21 |
nicklas |
311 |
if (vd == null && tmp.length >= 2) |
6376 |
07 Sep 21 |
nicklas |
312 |
{ |
6376 |
07 Sep 21 |
nicklas |
313 |
result.add(new Info("VD", tmp[1])); |
6376 |
07 Sep 21 |
nicklas |
314 |
} |
6376 |
07 Sep 21 |
nicklas |
315 |
} |
6376 |
07 Sep 21 |
nicklas |
316 |
this.format = result.toArray(new Info[result.size()]); |
6170 |
17 Mar 21 |
nicklas |
317 |
} |
6170 |
17 Mar 21 |
nicklas |
318 |
|
6170 |
17 Mar 21 |
nicklas |
319 |
/** |
6170 |
17 Mar 21 |
nicklas |
The raw line data. |
6170 |
17 Mar 21 |
nicklas |
321 |
*/ |
6170 |
17 Mar 21 |
nicklas |
322 |
public String line() |
6170 |
17 Mar 21 |
nicklas |
323 |
{ |
6170 |
17 Mar 21 |
nicklas |
324 |
return line; |
6170 |
17 Mar 21 |
nicklas |
325 |
} |
6170 |
17 Mar 21 |
nicklas |
326 |
|
6170 |
17 Mar 21 |
nicklas |
327 |
/** |
6170 |
17 Mar 21 |
nicklas |
Line number in the VCF file. |
6170 |
17 Mar 21 |
nicklas |
329 |
*/ |
6170 |
17 Mar 21 |
nicklas |
330 |
public int lineNo() |
6170 |
17 Mar 21 |
nicklas |
331 |
{ |
6170 |
17 Mar 21 |
nicklas |
332 |
return lineNo; |
6170 |
17 Mar 21 |
nicklas |
333 |
} |
6170 |
17 Mar 21 |
nicklas |
334 |
|
6170 |
17 Mar 21 |
nicklas |
335 |
/** |
6170 |
17 Mar 21 |
nicklas |
Get the value from the given column. |
6170 |
17 Mar 21 |
nicklas |
@return null if the column doesn't exists |
6170 |
17 Mar 21 |
nicklas |
338 |
*/ |
6170 |
17 Mar 21 |
nicklas |
339 |
public String col(int index) |
6170 |
17 Mar 21 |
nicklas |
340 |
{ |
6170 |
17 Mar 21 |
nicklas |
341 |
return 0 <= index && index < cols.length ? cols[index] : null; |
6170 |
17 Mar 21 |
nicklas |
342 |
} |
6170 |
17 Mar 21 |
nicklas |
343 |
|
6170 |
17 Mar 21 |
nicklas |
344 |
/** |
6170 |
17 Mar 21 |
nicklas |
Get the value from the given column as a numeric Long value. |
6170 |
17 Mar 21 |
nicklas |
@return null if the column doesn't exists or if the value can't be parsed as a number |
6170 |
17 Mar 21 |
nicklas |
347 |
*/ |
6170 |
17 Mar 21 |
nicklas |
348 |
public Long longValue(int index) |
6170 |
17 Mar 21 |
nicklas |
349 |
{ |
6170 |
17 Mar 21 |
nicklas |
350 |
String val = col(index); |
6170 |
17 Mar 21 |
nicklas |
351 |
if (val == null) return null; |
6170 |
17 Mar 21 |
nicklas |
352 |
try |
6170 |
17 Mar 21 |
nicklas |
353 |
{ |
6170 |
17 Mar 21 |
nicklas |
354 |
return Long.parseLong(val); |
6170 |
17 Mar 21 |
nicklas |
355 |
} |
6170 |
17 Mar 21 |
nicklas |
356 |
catch (NumberFormatException ex) |
6170 |
17 Mar 21 |
nicklas |
357 |
{} |
6170 |
17 Mar 21 |
nicklas |
358 |
return null; |
6170 |
17 Mar 21 |
nicklas |
359 |
} |
6170 |
17 Mar 21 |
nicklas |
360 |
|
6170 |
17 Mar 21 |
nicklas |
361 |
/** |
6170 |
17 Mar 21 |
nicklas |
Get the value from the given ANN column. |
6170 |
17 Mar 21 |
nicklas |
@return null if the column doesn't exists |
6170 |
17 Mar 21 |
nicklas |
364 |
*/ |
6170 |
17 Mar 21 |
nicklas |
365 |
public String ann(int index) |
6170 |
17 Mar 21 |
nicklas |
366 |
{ |
6170 |
17 Mar 21 |
nicklas |
367 |
return 0 <= index && index < ann.length ? ann[index] : null; |
6170 |
17 Mar 21 |
nicklas |
368 |
} |
6170 |
17 Mar 21 |
nicklas |
369 |
|
6170 |
17 Mar 21 |
nicklas |
370 |
/** |
6170 |
17 Mar 21 |
nicklas |
Get all INFO annotations. |
6170 |
17 Mar 21 |
nicklas |
372 |
*/ |
6170 |
17 Mar 21 |
nicklas |
373 |
public Info[] info() |
6170 |
17 Mar 21 |
nicklas |
374 |
{ |
6170 |
17 Mar 21 |
nicklas |
375 |
return info; |
6170 |
17 Mar 21 |
nicklas |
376 |
} |
6170 |
17 Mar 21 |
nicklas |
377 |
|
6170 |
17 Mar 21 |
nicklas |
378 |
/** |
6170 |
17 Mar 21 |
nicklas |
Get the INFO annotation with the given key. |
6170 |
17 Mar 21 |
nicklas |
@return null if the column doesn't exists |
6170 |
17 Mar 21 |
nicklas |
381 |
*/ |
6170 |
17 Mar 21 |
nicklas |
382 |
public String info(String key) |
6170 |
17 Mar 21 |
nicklas |
383 |
{ |
6170 |
17 Mar 21 |
nicklas |
384 |
for (Info i : info) |
6170 |
17 Mar 21 |
nicklas |
385 |
{ |
6170 |
17 Mar 21 |
nicklas |
386 |
if (i.key.equals(key)) return i.value; |
6170 |
17 Mar 21 |
nicklas |
387 |
} |
6170 |
17 Mar 21 |
nicklas |
388 |
return null; |
6170 |
17 Mar 21 |
nicklas |
389 |
} |
6170 |
17 Mar 21 |
nicklas |
390 |
|
6170 |
17 Mar 21 |
nicklas |
391 |
/** |
6170 |
17 Mar 21 |
nicklas |
Get all FORMAT annotations. |
6170 |
17 Mar 21 |
nicklas |
393 |
*/ |
6170 |
17 Mar 21 |
nicklas |
394 |
public Info[] format() |
6170 |
17 Mar 21 |
nicklas |
395 |
{ |
6170 |
17 Mar 21 |
nicklas |
396 |
return format; |
6170 |
17 Mar 21 |
nicklas |
397 |
} |
6170 |
17 Mar 21 |
nicklas |
398 |
|
6170 |
17 Mar 21 |
nicklas |
399 |
/** |
6170 |
17 Mar 21 |
nicklas |
Get the FORMAT annotation with the given key. |
6170 |
17 Mar 21 |
nicklas |
@return null if the column doesn't exists |
6170 |
17 Mar 21 |
nicklas |
402 |
*/ |
6170 |
17 Mar 21 |
nicklas |
403 |
public String format(String key) |
6170 |
17 Mar 21 |
nicklas |
404 |
{ |
6170 |
17 Mar 21 |
nicklas |
405 |
for (Info i : format) |
6170 |
17 Mar 21 |
nicklas |
406 |
{ |
6170 |
17 Mar 21 |
nicklas |
407 |
if (i.key.equals(key)) return i.value; |
6170 |
17 Mar 21 |
nicklas |
408 |
} |
6170 |
17 Mar 21 |
nicklas |
409 |
return null; |
6170 |
17 Mar 21 |
nicklas |
410 |
} |
6170 |
17 Mar 21 |
nicklas |
411 |
|
6170 |
17 Mar 21 |
nicklas |
412 |
} |
6170 |
17 Mar 21 |
nicklas |
413 |
|
6170 |
17 Mar 21 |
nicklas |
414 |
/** |
6170 |
17 Mar 21 |
nicklas |
Holds key/value pairs for an INFO and FORMAT annotation. |
6170 |
17 Mar 21 |
nicklas |
416 |
*/ |
6170 |
17 Mar 21 |
nicklas |
417 |
public static class Info |
6170 |
17 Mar 21 |
nicklas |
418 |
{ |
6170 |
17 Mar 21 |
nicklas |
419 |
public final String key; |
6170 |
17 Mar 21 |
nicklas |
420 |
public final String value; |
6170 |
17 Mar 21 |
nicklas |
421 |
|
6170 |
17 Mar 21 |
nicklas |
422 |
Info(String key, String value) |
6170 |
17 Mar 21 |
nicklas |
423 |
{ |
6170 |
17 Mar 21 |
nicklas |
424 |
this.key = key; |
6170 |
17 Mar 21 |
nicklas |
425 |
this.value = value; |
6170 |
17 Mar 21 |
nicklas |
426 |
} |
6171 |
18 Mar 21 |
nicklas |
427 |
|
6171 |
18 Mar 21 |
nicklas |
428 |
/** |
6171 |
18 Mar 21 |
nicklas |
Get the value as an integer. |
6171 |
18 Mar 21 |
nicklas |
@return A numeric value or null if the value is not a number |
6171 |
18 Mar 21 |
nicklas |
431 |
*/ |
6171 |
18 Mar 21 |
nicklas |
432 |
public Integer intValue() |
6171 |
18 Mar 21 |
nicklas |
433 |
{ |
6171 |
18 Mar 21 |
nicklas |
434 |
if (value == null) return null; |
6171 |
18 Mar 21 |
nicklas |
435 |
try |
6171 |
18 Mar 21 |
nicklas |
436 |
{ |
6171 |
18 Mar 21 |
nicklas |
437 |
return Integer.parseInt(value); |
6171 |
18 Mar 21 |
nicklas |
438 |
} |
6171 |
18 Mar 21 |
nicklas |
439 |
catch (NumberFormatException ex) |
6171 |
18 Mar 21 |
nicklas |
440 |
{} |
6171 |
18 Mar 21 |
nicklas |
441 |
return null; |
6171 |
18 Mar 21 |
nicklas |
442 |
} |
6171 |
18 Mar 21 |
nicklas |
443 |
|
6171 |
18 Mar 21 |
nicklas |
444 |
/** |
6171 |
18 Mar 21 |
nicklas |
Get the value as a long. |
6171 |
18 Mar 21 |
nicklas |
@return A numeric value or null if the value is not a number |
6171 |
18 Mar 21 |
nicklas |
447 |
*/ |
6171 |
18 Mar 21 |
nicklas |
448 |
public Long longValue() |
6171 |
18 Mar 21 |
nicklas |
449 |
{ |
6171 |
18 Mar 21 |
nicklas |
450 |
if (value == null) return null; |
6171 |
18 Mar 21 |
nicklas |
451 |
try |
6171 |
18 Mar 21 |
nicklas |
452 |
{ |
6171 |
18 Mar 21 |
nicklas |
453 |
return Long.parseLong(value); |
6171 |
18 Mar 21 |
nicklas |
454 |
} |
6171 |
18 Mar 21 |
nicklas |
455 |
catch (NumberFormatException ex) |
6171 |
18 Mar 21 |
nicklas |
456 |
{} |
6171 |
18 Mar 21 |
nicklas |
457 |
return null; |
6171 |
18 Mar 21 |
nicklas |
458 |
} |
6171 |
18 Mar 21 |
nicklas |
459 |
|
6171 |
18 Mar 21 |
nicklas |
460 |
/** |
6171 |
18 Mar 21 |
nicklas |
Get the value as a float. |
6171 |
18 Mar 21 |
nicklas |
@return A numeric value or null if the value is not a number |
6171 |
18 Mar 21 |
nicklas |
463 |
*/ |
6171 |
18 Mar 21 |
nicklas |
464 |
public Float floatValue() |
6171 |
18 Mar 21 |
nicklas |
465 |
{ |
6171 |
18 Mar 21 |
nicklas |
466 |
if (value == null) return null; |
6171 |
18 Mar 21 |
nicklas |
467 |
try |
6171 |
18 Mar 21 |
nicklas |
468 |
{ |
6171 |
18 Mar 21 |
nicklas |
469 |
return Float.parseFloat(value); |
6171 |
18 Mar 21 |
nicklas |
470 |
} |
6171 |
18 Mar 21 |
nicklas |
471 |
catch (NumberFormatException ex) |
6171 |
18 Mar 21 |
nicklas |
472 |
{} |
6171 |
18 Mar 21 |
nicklas |
473 |
return null; |
6171 |
18 Mar 21 |
nicklas |
474 |
} |
6171 |
18 Mar 21 |
nicklas |
475 |
|
6170 |
17 Mar 21 |
nicklas |
476 |
} |
6551 |
26 Jan 22 |
nicklas |
477 |
|
6551 |
26 Jan 22 |
nicklas |
478 |
/** |
6551 |
26 Jan 22 |
nicklas |
A filter implementation that matches a given line number. |
6551 |
26 Jan 22 |
nicklas |
480 |
*/ |
6551 |
26 Jan 22 |
nicklas |
481 |
public static class LineNoFilter |
6551 |
26 Jan 22 |
nicklas |
482 |
implements Filter<VcfLine> |
6551 |
26 Jan 22 |
nicklas |
483 |
{ |
6551 |
26 Jan 22 |
nicklas |
484 |
private final int lineNo; |
6551 |
26 Jan 22 |
nicklas |
485 |
public LineNoFilter(int lineNo) |
6551 |
26 Jan 22 |
nicklas |
486 |
{ |
6551 |
26 Jan 22 |
nicklas |
487 |
this.lineNo = lineNo; |
6551 |
26 Jan 22 |
nicklas |
488 |
} |
6551 |
26 Jan 22 |
nicklas |
489 |
@Override |
6551 |
26 Jan 22 |
nicklas |
490 |
public boolean evaluate(VcfLine line) |
6551 |
26 Jan 22 |
nicklas |
491 |
{ |
6551 |
26 Jan 22 |
nicklas |
492 |
return line.lineNo() == lineNo; |
6551 |
26 Jan 22 |
nicklas |
493 |
} |
6551 |
26 Jan 22 |
nicklas |
494 |
} |
6170 |
17 Mar 21 |
nicklas |
495 |
|
6551 |
26 Jan 22 |
nicklas |
496 |
/** |
6551 |
26 Jan 22 |
nicklas |
A filter implementation that matches the ID value. |
6551 |
26 Jan 22 |
nicklas |
498 |
*/ |
6551 |
26 Jan 22 |
nicklas |
499 |
public static class IdFilter |
6551 |
26 Jan 22 |
nicklas |
500 |
implements Filter<VcfLine> |
6551 |
26 Jan 22 |
nicklas |
501 |
{ |
6551 |
26 Jan 22 |
nicklas |
502 |
private final String snpId; |
6551 |
26 Jan 22 |
nicklas |
503 |
private final int idCol; |
6551 |
26 Jan 22 |
nicklas |
504 |
|
6551 |
26 Jan 22 |
nicklas |
505 |
public IdFilter(String snpId, int idCol) |
6551 |
26 Jan 22 |
nicklas |
506 |
{ |
6551 |
26 Jan 22 |
nicklas |
507 |
this.snpId = snpId; |
6551 |
26 Jan 22 |
nicklas |
508 |
this.idCol = idCol; |
6551 |
26 Jan 22 |
nicklas |
509 |
} |
6551 |
26 Jan 22 |
nicklas |
510 |
@Override |
6551 |
26 Jan 22 |
nicklas |
511 |
public boolean evaluate(VcfLine line) |
6551 |
26 Jan 22 |
nicklas |
512 |
{ |
6551 |
26 Jan 22 |
nicklas |
513 |
return snpId.equals(line.col(idCol)); |
6551 |
26 Jan 22 |
nicklas |
514 |
} |
6551 |
26 Jan 22 |
nicklas |
515 |
} |
6551 |
26 Jan 22 |
nicklas |
516 |
|
6170 |
17 Mar 21 |
nicklas |
517 |
} |