6547 |
24 Jan 22 |
nicklas |
1 |
package net.sf.basedb.varsearch.query; |
6547 |
24 Jan 22 |
nicklas |
2 |
|
6547 |
24 Jan 22 |
nicklas |
3 |
import java.io.IOException; |
6547 |
24 Jan 22 |
nicklas |
4 |
import java.util.Collection; |
6548 |
24 Jan 22 |
nicklas |
5 |
import java.util.HashMap; |
6548 |
24 Jan 22 |
nicklas |
6 |
import java.util.Map; |
6547 |
24 Jan 22 |
nicklas |
7 |
|
6547 |
24 Jan 22 |
nicklas |
8 |
import org.apache.lucene.document.Document; |
6551 |
26 Jan 22 |
nicklas |
9 |
import org.apache.lucene.document.StoredField; |
6550 |
25 Jan 22 |
nicklas |
10 |
import org.apache.lucene.index.LeafReader; |
6547 |
24 Jan 22 |
nicklas |
11 |
import org.apache.lucene.index.LeafReaderContext; |
6559 |
01 Feb 22 |
nicklas |
12 |
import org.apache.lucene.search.BooleanClause.Occur; |
6547 |
24 Jan 22 |
nicklas |
13 |
import org.apache.lucene.search.ScoreMode; |
6547 |
24 Jan 22 |
nicklas |
14 |
import org.apache.lucene.search.SimpleCollector; |
6547 |
24 Jan 22 |
nicklas |
15 |
|
6547 |
24 Jan 22 |
nicklas |
16 |
/** |
6547 |
24 Jan 22 |
nicklas |
Collect SNP ID values and all matching documents from a query. |
6547 |
24 Jan 22 |
nicklas |
18 |
*/ |
6547 |
24 Jan 22 |
nicklas |
19 |
public class SnpCollector |
6547 |
24 Jan 22 |
nicklas |
20 |
extends SimpleCollector |
6547 |
24 Jan 22 |
nicklas |
21 |
{ |
6547 |
24 Jan 22 |
nicklas |
22 |
|
6550 |
25 Jan 22 |
nicklas |
23 |
private final Map<String, SnpDocument> snpDocuments; |
6549 |
25 Jan 22 |
nicklas |
24 |
private final int maxHits; |
6555 |
27 Jan 22 |
nicklas |
25 |
private final String requestedGt; |
6559 |
01 Feb 22 |
nicklas |
26 |
private final Occur requestedGtOccur; |
6549 |
25 Jan 22 |
nicklas |
27 |
private int totalHits = 0; |
6550 |
25 Jan 22 |
nicklas |
28 |
private LeafReader reader; |
6547 |
24 Jan 22 |
nicklas |
29 |
|
6559 |
01 Feb 22 |
nicklas |
30 |
public SnpCollector(int maxHits, String requestedGt, Occur requestedGtOccur) |
6547 |
24 Jan 22 |
nicklas |
31 |
{ |
6549 |
25 Jan 22 |
nicklas |
32 |
this.maxHits = maxHits; |
6555 |
27 Jan 22 |
nicklas |
33 |
this.requestedGt = requestedGt; |
6559 |
01 Feb 22 |
nicklas |
34 |
this.requestedGtOccur = requestedGtOccur; |
6548 |
24 Jan 22 |
nicklas |
35 |
this.snpDocuments = new HashMap<>(); |
6547 |
24 Jan 22 |
nicklas |
36 |
} |
6547 |
24 Jan 22 |
nicklas |
37 |
|
6547 |
24 Jan 22 |
nicklas |
38 |
/** |
6555 |
27 Jan 22 |
nicklas |
Get the total number of matching SNPs. |
6555 |
27 Jan 22 |
nicklas |
40 |
*/ |
6555 |
27 Jan 22 |
nicklas |
41 |
public int getNumSnps() |
6548 |
24 Jan 22 |
nicklas |
42 |
{ |
6555 |
27 Jan 22 |
nicklas |
43 |
return totalHits; |
6548 |
24 Jan 22 |
nicklas |
44 |
} |
6548 |
24 Jan 22 |
nicklas |
45 |
|
6548 |
24 Jan 22 |
nicklas |
46 |
/** |
6555 |
27 Jan 22 |
nicklas |
Get the genotype that the query requested for all hits. |
6555 |
27 Jan 22 |
nicklas |
If null, the hits should have either 0/1 or 1/1. |
6547 |
24 Jan 22 |
nicklas |
49 |
*/ |
6555 |
27 Jan 22 |
nicklas |
50 |
public String getRequstedGt() |
6547 |
24 Jan 22 |
nicklas |
51 |
{ |
6555 |
27 Jan 22 |
nicklas |
52 |
return requestedGt; |
6547 |
24 Jan 22 |
nicklas |
53 |
} |
6547 |
24 Jan 22 |
nicklas |
54 |
|
6547 |
24 Jan 22 |
nicklas |
55 |
/** |
6559 |
01 Feb 22 |
nicklas |
MUST or MUST_NOT depening on if the requested genotype should |
6559 |
01 Feb 22 |
nicklas |
match or not. |
6559 |
01 Feb 22 |
nicklas |
58 |
*/ |
6559 |
01 Feb 22 |
nicklas |
59 |
public Occur getRequstedGtOccur() |
6559 |
01 Feb 22 |
nicklas |
60 |
{ |
6559 |
01 Feb 22 |
nicklas |
61 |
return requestedGtOccur; |
6559 |
01 Feb 22 |
nicklas |
62 |
} |
6559 |
01 Feb 22 |
nicklas |
63 |
|
6559 |
01 Feb 22 |
nicklas |
64 |
/** |
6547 |
24 Jan 22 |
nicklas |
Get all matching SNP ID values. |
6547 |
24 Jan 22 |
nicklas |
66 |
*/ |
6547 |
24 Jan 22 |
nicklas |
67 |
public Collection<String> getSnpList() |
6547 |
24 Jan 22 |
nicklas |
68 |
{ |
6548 |
24 Jan 22 |
nicklas |
69 |
return snpDocuments.keySet(); |
6547 |
24 Jan 22 |
nicklas |
70 |
} |
6547 |
24 Jan 22 |
nicklas |
71 |
/** |
6547 |
24 Jan 22 |
nicklas |
Get all matching documents. |
6547 |
24 Jan 22 |
nicklas |
73 |
*/ |
6550 |
25 Jan 22 |
nicklas |
74 |
public Collection<SnpDocument> getDocuments() |
6547 |
24 Jan 22 |
nicklas |
75 |
{ |
6548 |
24 Jan 22 |
nicklas |
76 |
return snpDocuments.values(); |
6547 |
24 Jan 22 |
nicklas |
77 |
} |
6547 |
24 Jan 22 |
nicklas |
78 |
|
6547 |
24 Jan 22 |
nicklas |
79 |
@Override |
6547 |
24 Jan 22 |
nicklas |
80 |
public void collect(int docId) |
6547 |
24 Jan 22 |
nicklas |
81 |
throws IOException |
6547 |
24 Jan 22 |
nicklas |
82 |
{ |
6549 |
25 Jan 22 |
nicklas |
83 |
if (totalHits < maxHits) |
6549 |
25 Jan 22 |
nicklas |
84 |
{ |
6550 |
25 Jan 22 |
nicklas |
85 |
Document doc = reader.document(docId); |
6549 |
25 Jan 22 |
nicklas |
86 |
String val = doc.get("snpId"); |
6550 |
25 Jan 22 |
nicklas |
87 |
if (val != null) snpDocuments.put(val, new SnpDocument(val, doc)); |
6549 |
25 Jan 22 |
nicklas |
88 |
} |
6549 |
25 Jan 22 |
nicklas |
89 |
totalHits++; |
6547 |
24 Jan 22 |
nicklas |
90 |
} |
6547 |
24 Jan 22 |
nicklas |
91 |
@Override |
6547 |
24 Jan 22 |
nicklas |
92 |
protected void doSetNextReader(LeafReaderContext context) |
6547 |
24 Jan 22 |
nicklas |
93 |
throws IOException |
6547 |
24 Jan 22 |
nicklas |
94 |
{ |
6550 |
25 Jan 22 |
nicklas |
95 |
reader = context.reader(); |
6547 |
24 Jan 22 |
nicklas |
96 |
} |
6547 |
24 Jan 22 |
nicklas |
97 |
@Override |
6547 |
24 Jan 22 |
nicklas |
98 |
public ScoreMode scoreMode() |
6547 |
24 Jan 22 |
nicklas |
99 |
{ |
6547 |
24 Jan 22 |
nicklas |
100 |
return ScoreMode.COMPLETE_NO_SCORES; |
6547 |
24 Jan 22 |
nicklas |
101 |
} |
6547 |
24 Jan 22 |
nicklas |
102 |
|
6550 |
25 Jan 22 |
nicklas |
103 |
/** |
6550 |
25 Jan 22 |
nicklas |
Wrapper around a Document instance that also holds |
6553 |
27 Jan 22 |
nicklas |
a shortcut to the 'gt' and 'file2' fields so that we |
6553 |
27 Jan 22 |
nicklas |
can set it when displaying hits for raw bioassays. |
6550 |
25 Jan 22 |
nicklas |
107 |
*/ |
6550 |
25 Jan 22 |
nicklas |
108 |
public static class SnpDocument |
6550 |
25 Jan 22 |
nicklas |
109 |
{ |
6550 |
25 Jan 22 |
nicklas |
110 |
public final Document document; |
6550 |
25 Jan 22 |
nicklas |
111 |
public final String snpId; |
6553 |
27 Jan 22 |
nicklas |
112 |
public final StoredField gt; |
6550 |
25 Jan 22 |
nicklas |
113 |
|
6550 |
25 Jan 22 |
nicklas |
114 |
public SnpDocument(String snpId, Document doc) |
6550 |
25 Jan 22 |
nicklas |
115 |
{ |
6550 |
25 Jan 22 |
nicklas |
116 |
this.snpId = snpId; |
6550 |
25 Jan 22 |
nicklas |
117 |
this.document = doc; |
6553 |
27 Jan 22 |
nicklas |
118 |
this.gt = new StoredField("gt", ""); |
6553 |
27 Jan 22 |
nicklas |
119 |
document.add(gt); |
6550 |
25 Jan 22 |
nicklas |
120 |
} |
6550 |
25 Jan 22 |
nicklas |
121 |
|
6550 |
25 Jan 22 |
nicklas |
122 |
/** |
6550 |
25 Jan 22 |
nicklas |
Get the 'gt' value. |
6550 |
25 Jan 22 |
nicklas |
124 |
*/ |
6550 |
25 Jan 22 |
nicklas |
125 |
public String gt() |
6550 |
25 Jan 22 |
nicklas |
126 |
{ |
6553 |
27 Jan 22 |
nicklas |
127 |
return gt.stringValue(); |
6550 |
25 Jan 22 |
nicklas |
128 |
} |
6550 |
25 Jan 22 |
nicklas |
129 |
} |
6547 |
24 Jan 22 |
nicklas |
130 |
} |