6133 |
16 Feb 21 |
nicklas |
1 |
package net.sf.basedb.varsearch.query; |
6133 |
16 Feb 21 |
nicklas |
2 |
|
6133 |
16 Feb 21 |
nicklas |
3 |
import java.io.IOException; |
6233 |
17 May 21 |
nicklas |
4 |
import java.util.HashSet; |
6133 |
16 Feb 21 |
nicklas |
5 |
import java.util.Set; |
6133 |
16 Feb 21 |
nicklas |
6 |
|
6248 |
25 May 21 |
nicklas |
7 |
import org.apache.lucene.index.DocValues; |
6133 |
16 Feb 21 |
nicklas |
8 |
import org.apache.lucene.index.LeafReaderContext; |
6248 |
25 May 21 |
nicklas |
9 |
import org.apache.lucene.index.NumericDocValues; |
6133 |
16 Feb 21 |
nicklas |
10 |
import org.apache.lucene.search.ScoreMode; |
6133 |
16 Feb 21 |
nicklas |
11 |
import org.apache.lucene.search.SimpleCollector; |
6260 |
27 May 21 |
nicklas |
12 |
import org.slf4j.LoggerFactory; |
6133 |
16 Feb 21 |
nicklas |
13 |
|
7074 |
24 Mar 23 |
nicklas |
14 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7074 |
24 Mar 23 |
nicklas |
15 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
7074 |
24 Mar 23 |
nicklas |
16 |
|
6133 |
16 Feb 21 |
nicklas |
17 |
/** |
6133 |
16 Feb 21 |
nicklas |
Collect all RawBioAssay ID from the search results. The searcher |
6133 |
16 Feb 21 |
nicklas |
will call {@link #collect(int)} for each matched document and we |
6133 |
16 Feb 21 |
nicklas |
load it and get the ID value. |
6133 |
16 Feb 21 |
nicklas |
21 |
*/ |
6133 |
16 Feb 21 |
nicklas |
22 |
public class RawBioAssayIdCollector |
6133 |
16 Feb 21 |
nicklas |
23 |
extends SimpleCollector |
6133 |
16 Feb 21 |
nicklas |
24 |
{ |
6133 |
16 Feb 21 |
nicklas |
25 |
|
7074 |
24 Mar 23 |
nicklas |
26 |
private static final ExtensionsLogger logger = |
7074 |
24 Mar 23 |
nicklas |
27 |
ExtensionsLog.getLogger(LuceneQueryFactory.ID, true).wrap(LoggerFactory.getLogger(RawBioAssayIdCollector.class)); |
6260 |
27 May 21 |
nicklas |
28 |
|
6141 |
19 Feb 21 |
nicklas |
29 |
private final String idField; |
6133 |
16 Feb 21 |
nicklas |
30 |
private final Set<Integer> rbaIds; |
6241 |
21 May 21 |
nicklas |
31 |
private final long timeLimit; |
6248 |
25 May 21 |
nicklas |
32 |
|
6241 |
21 May 21 |
nicklas |
33 |
private int totalCollect; |
6248 |
25 May 21 |
nicklas |
34 |
private int collectAfterTimeout; |
6248 |
25 May 21 |
nicklas |
35 |
private NumericDocValues idValues; |
6133 |
16 Feb 21 |
nicklas |
36 |
|
6553 |
27 Jan 22 |
nicklas |
37 |
public RawBioAssayIdCollector(String idField, long timeLimit) |
6133 |
16 Feb 21 |
nicklas |
38 |
{ |
6141 |
19 Feb 21 |
nicklas |
39 |
this.idField = idField; |
6233 |
17 May 21 |
nicklas |
40 |
this.rbaIds = new HashSet<>(); |
6248 |
25 May 21 |
nicklas |
41 |
this.timeLimit = timeLimit <= 0 ? Long.MAX_VALUE : timeLimit; |
6133 |
16 Feb 21 |
nicklas |
42 |
} |
6133 |
16 Feb 21 |
nicklas |
43 |
|
6133 |
16 Feb 21 |
nicklas |
44 |
@Override |
6133 |
16 Feb 21 |
nicklas |
45 |
public void collect(int docId) |
6133 |
16 Feb 21 |
nicklas |
46 |
throws IOException |
6133 |
16 Feb 21 |
nicklas |
47 |
{ |
6241 |
21 May 21 |
nicklas |
48 |
totalCollect++; |
6241 |
21 May 21 |
nicklas |
49 |
if (System.currentTimeMillis() < timeLimit) |
6241 |
21 May 21 |
nicklas |
50 |
{ |
6248 |
25 May 21 |
nicklas |
51 |
if (idValues.advanceExact(docId)) |
6248 |
25 May 21 |
nicklas |
52 |
{ |
6248 |
25 May 21 |
nicklas |
53 |
rbaIds.add((int)idValues.longValue()); |
6248 |
25 May 21 |
nicklas |
54 |
} |
6248 |
25 May 21 |
nicklas |
55 |
else |
6248 |
25 May 21 |
nicklas |
56 |
{ |
6260 |
27 May 21 |
nicklas |
57 |
logger.warn("Could not advance while collecting search results: "+docId); |
6248 |
25 May 21 |
nicklas |
58 |
} |
6241 |
21 May 21 |
nicklas |
59 |
} |
6241 |
21 May 21 |
nicklas |
60 |
else |
6241 |
21 May 21 |
nicklas |
61 |
{ |
6241 |
21 May 21 |
nicklas |
62 |
collectAfterTimeout++; |
6241 |
21 May 21 |
nicklas |
63 |
} |
6133 |
16 Feb 21 |
nicklas |
64 |
} |
6133 |
16 Feb 21 |
nicklas |
65 |
@Override |
6133 |
16 Feb 21 |
nicklas |
66 |
protected void doSetNextReader(LeafReaderContext context) |
6133 |
16 Feb 21 |
nicklas |
67 |
throws IOException |
6133 |
16 Feb 21 |
nicklas |
68 |
{ |
6248 |
25 May 21 |
nicklas |
69 |
idValues = DocValues.getNumeric(context.reader(), idField); |
6133 |
16 Feb 21 |
nicklas |
70 |
} |
6133 |
16 Feb 21 |
nicklas |
71 |
@Override |
6133 |
16 Feb 21 |
nicklas |
72 |
public ScoreMode scoreMode() |
6133 |
16 Feb 21 |
nicklas |
73 |
{ |
6133 |
16 Feb 21 |
nicklas |
74 |
return ScoreMode.COMPLETE_NO_SCORES; |
6133 |
16 Feb 21 |
nicklas |
75 |
} |
6133 |
16 Feb 21 |
nicklas |
76 |
|
6233 |
17 May 21 |
nicklas |
77 |
/** |
6233 |
17 May 21 |
nicklas |
Get all collected raw bioassay ids. |
6233 |
17 May 21 |
nicklas |
79 |
*/ |
6233 |
17 May 21 |
nicklas |
80 |
public Set<Integer> getRbaIds() |
6233 |
17 May 21 |
nicklas |
81 |
{ |
6233 |
17 May 21 |
nicklas |
82 |
return rbaIds; |
6233 |
17 May 21 |
nicklas |
83 |
} |
6233 |
17 May 21 |
nicklas |
84 |
|
6233 |
17 May 21 |
nicklas |
85 |
/** |
6241 |
21 May 21 |
nicklas |
Get the total number of calls to collect. |
6241 |
21 May 21 |
nicklas |
87 |
*/ |
6241 |
21 May 21 |
nicklas |
88 |
public int getNumTotalCollect() |
6241 |
21 May 21 |
nicklas |
89 |
{ |
6241 |
21 May 21 |
nicklas |
90 |
return totalCollect; |
6241 |
21 May 21 |
nicklas |
91 |
} |
6241 |
21 May 21 |
nicklas |
92 |
|
6241 |
21 May 21 |
nicklas |
93 |
/** |
6241 |
21 May 21 |
nicklas |
Get the number of calls to collect after timeout. |
6241 |
21 May 21 |
nicklas |
95 |
*/ |
6241 |
21 May 21 |
nicklas |
96 |
public int getNumCollectAfterTimeout() |
6241 |
21 May 21 |
nicklas |
97 |
{ |
6241 |
21 May 21 |
nicklas |
98 |
return collectAfterTimeout; |
6241 |
21 May 21 |
nicklas |
99 |
} |
6233 |
17 May 21 |
nicklas |
100 |
|
6133 |
16 Feb 21 |
nicklas |
101 |
} |