6132 |
16 Feb 21 |
nicklas |
1 |
package net.sf.basedb.varsearch.index; |
6111 |
29 Jan 21 |
nicklas |
2 |
|
6111 |
29 Jan 21 |
nicklas |
3 |
import java.io.Closeable; |
6111 |
29 Jan 21 |
nicklas |
4 |
import java.io.IOException; |
6241 |
21 May 21 |
nicklas |
5 |
import java.util.ArrayList; |
6112 |
02 Feb 21 |
nicklas |
6 |
import java.util.Collection; |
6241 |
21 May 21 |
nicklas |
7 |
import java.util.Collections; |
6163 |
05 Mar 21 |
nicklas |
8 |
import java.util.List; |
6163 |
05 Mar 21 |
nicklas |
9 |
import java.util.Set; |
6241 |
21 May 21 |
nicklas |
10 |
import java.util.concurrent.CountDownLatch; |
6112 |
02 Feb 21 |
nicklas |
11 |
import java.util.concurrent.ExecutorCompletionService; |
6111 |
29 Jan 21 |
nicklas |
12 |
import java.util.concurrent.ExecutorService; |
6112 |
02 Feb 21 |
nicklas |
13 |
import java.util.concurrent.Executors; |
6112 |
02 Feb 21 |
nicklas |
14 |
import java.util.concurrent.Future; |
6168 |
12 Mar 21 |
nicklas |
15 |
import java.util.concurrent.ThreadFactory; |
6241 |
21 May 21 |
nicklas |
16 |
import java.util.concurrent.TimeUnit; |
6135 |
17 Feb 21 |
nicklas |
17 |
import java.util.concurrent.locks.ReentrantReadWriteLock; |
6111 |
29 Jan 21 |
nicklas |
18 |
|
6114 |
04 Feb 21 |
nicklas |
19 |
import org.apache.lucene.analysis.Analyzer; |
6541 |
17 Jan 22 |
nicklas |
20 |
import org.apache.lucene.document.Document; |
6112 |
02 Feb 21 |
nicklas |
21 |
import org.apache.lucene.document.IntPoint; |
6241 |
21 May 21 |
nicklas |
22 |
import org.apache.lucene.index.ConcurrentMergeScheduler; |
6111 |
29 Jan 21 |
nicklas |
23 |
import org.apache.lucene.index.DirectoryReader; |
6111 |
29 Jan 21 |
nicklas |
24 |
import org.apache.lucene.index.IndexReader; |
6112 |
02 Feb 21 |
nicklas |
25 |
import org.apache.lucene.index.IndexWriter; |
6112 |
02 Feb 21 |
nicklas |
26 |
import org.apache.lucene.index.IndexWriterConfig; |
6112 |
02 Feb 21 |
nicklas |
27 |
import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
6241 |
21 May 21 |
nicklas |
28 |
import org.apache.lucene.index.TieredMergePolicy; |
6119 |
05 Feb 21 |
nicklas |
29 |
import org.apache.lucene.queryparser.classic.QueryParser; |
6111 |
29 Jan 21 |
nicklas |
30 |
import org.apache.lucene.search.IndexSearcher; |
6112 |
02 Feb 21 |
nicklas |
31 |
import org.apache.lucene.search.Query; |
6111 |
29 Jan 21 |
nicklas |
32 |
import org.apache.lucene.store.Directory; |
6111 |
29 Jan 21 |
nicklas |
33 |
import org.apache.lucene.store.NIOFSDirectory; |
6112 |
02 Feb 21 |
nicklas |
34 |
import org.slf4j.LoggerFactory; |
6111 |
29 Jan 21 |
nicklas |
35 |
|
6241 |
21 May 21 |
nicklas |
36 |
import net.sf.basedb.core.Application; |
6241 |
21 May 21 |
nicklas |
37 |
import net.sf.basedb.core.Application.Pinger; |
6112 |
02 Feb 21 |
nicklas |
38 |
import net.sf.basedb.core.DbControl; |
6163 |
05 Mar 21 |
nicklas |
39 |
import net.sf.basedb.core.Include; |
6163 |
05 Mar 21 |
nicklas |
40 |
import net.sf.basedb.core.ItemQuery; |
6164 |
05 Mar 21 |
nicklas |
41 |
import net.sf.basedb.core.ProgressReporter; |
6112 |
02 Feb 21 |
nicklas |
42 |
import net.sf.basedb.core.RawBioAssay; |
6164 |
05 Mar 21 |
nicklas |
43 |
import net.sf.basedb.core.SimpleProgressReporter; |
6163 |
05 Mar 21 |
nicklas |
44 |
import net.sf.basedb.core.query.IdListRestriction; |
6546 |
24 Jan 22 |
nicklas |
45 |
import net.sf.basedb.core.query.QueryFilterAction; |
6111 |
29 Jan 21 |
nicklas |
46 |
import net.sf.basedb.util.FileUtil; |
6541 |
17 Jan 22 |
nicklas |
47 |
import net.sf.basedb.util.Values; |
6546 |
24 Jan 22 |
nicklas |
48 |
import net.sf.basedb.util.extensions.InvokationContext; |
7074 |
24 Mar 23 |
nicklas |
49 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7074 |
24 Mar 23 |
nicklas |
50 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
6552 |
26 Jan 22 |
nicklas |
51 |
import net.sf.basedb.varsearch.LuceneColumnFactory.LuceneColumnAction; |
6140 |
19 Feb 21 |
nicklas |
52 |
import net.sf.basedb.varsearch.dao.Itemlist; |
6541 |
17 Jan 22 |
nicklas |
53 |
import net.sf.basedb.varsearch.query.AllDocsCollector; |
6546 |
24 Jan 22 |
nicklas |
54 |
import net.sf.basedb.varsearch.query.LuceneQueryFactory.LuceneFilterAction; |
6137 |
18 Feb 21 |
nicklas |
55 |
import net.sf.basedb.varsearch.query.QueryCache; |
6246 |
24 May 21 |
nicklas |
56 |
import net.sf.basedb.varsearch.query.RawBioAssayIdCollector; |
6168 |
12 Mar 21 |
nicklas |
57 |
import net.sf.basedb.varsearch.service.VarSearchService; |
6111 |
29 Jan 21 |
nicklas |
58 |
|
6111 |
29 Jan 21 |
nicklas |
59 |
/** |
6541 |
17 Jan 22 |
nicklas |
Represents a Lucene index database. Subclasses are required to implement |
6541 |
17 Jan 22 |
nicklas |
some methods. |
6541 |
17 Jan 22 |
nicklas |
62 |
|
6111 |
29 Jan 21 |
nicklas |
@author nicklas |
6111 |
29 Jan 21 |
nicklas |
64 |
*/ |
6541 |
17 Jan 22 |
nicklas |
65 |
public abstract class LuceneIndex |
6111 |
29 Jan 21 |
nicklas |
66 |
implements Closeable |
6111 |
29 Jan 21 |
nicklas |
67 |
{ |
6112 |
02 Feb 21 |
nicklas |
68 |
|
7074 |
24 Mar 23 |
nicklas |
69 |
private static final ExtensionsLogger logger = |
7074 |
24 Mar 23 |
nicklas |
70 |
ExtensionsLog.getLogger(VarSearchService.ID, true).wrap(LoggerFactory.getLogger(LuceneIndex.class)); |
6111 |
29 Jan 21 |
nicklas |
71 |
|
6111 |
29 Jan 21 |
nicklas |
72 |
private final String id; |
6111 |
29 Jan 21 |
nicklas |
73 |
private String name; |
6140 |
19 Feb 21 |
nicklas |
74 |
private Itemlist itemList; |
6111 |
29 Jan 21 |
nicklas |
75 |
|
6377 |
08 Sep 21 |
nicklas |
76 |
private boolean indexAllGenotypes; |
6374 |
07 Sep 21 |
nicklas |
77 |
private VcfFileLocator vcfLocator; |
6140 |
19 Feb 21 |
nicklas |
78 |
private long extendedWaitUntil; |
6141 |
19 Feb 21 |
nicklas |
79 |
private int maxToIndex; |
6525 |
20 Dec 21 |
nicklas |
80 |
private boolean viewAllVariantsEnabled; |
6544 |
19 Jan 22 |
nicklas |
81 |
private Status queryStatus; |
6164 |
05 Mar 21 |
nicklas |
82 |
private Status status; |
6167 |
09 Mar 21 |
nicklas |
83 |
private AutoUpdate autoUpdateAction; |
6544 |
19 Jan 22 |
nicklas |
84 |
private String customUpdateAction; |
6164 |
05 Mar 21 |
nicklas |
85 |
private SimpleProgressReporter progress; |
6241 |
21 May 21 |
nicklas |
86 |
private volatile CountDownLatch abort; |
6111 |
29 Jan 21 |
nicklas |
87 |
|
6135 |
17 Feb 21 |
nicklas |
88 |
private final ReentrantReadWriteLock rwLock; |
6168 |
12 Mar 21 |
nicklas |
89 |
private java.io.File pathPrefix; |
6163 |
05 Mar 21 |
nicklas |
90 |
private java.io.File path; |
6111 |
29 Jan 21 |
nicklas |
91 |
private Directory directory; |
6111 |
29 Jan 21 |
nicklas |
92 |
private IndexReader reader; |
6111 |
29 Jan 21 |
nicklas |
93 |
private IndexSearcher searcher; |
6111 |
29 Jan 21 |
nicklas |
94 |
private ExecutorService executor; |
6114 |
04 Feb 21 |
nicklas |
95 |
private Analyzer analyzer; |
6137 |
18 Feb 21 |
nicklas |
96 |
private QueryCache cache; |
6111 |
29 Jan 21 |
nicklas |
97 |
|
6556 |
28 Jan 22 |
nicklas |
98 |
private Throwable error; |
6556 |
28 Jan 22 |
nicklas |
99 |
|
6167 |
09 Mar 21 |
nicklas |
// Set to TRUE to let the code wait here and there for easier |
6167 |
09 Mar 21 |
nicklas |
// testing of progress reporting in the GUI |
6167 |
09 Mar 21 |
nicklas |
102 |
public static boolean SLOW_MODE = false; |
6167 |
09 Mar 21 |
nicklas |
103 |
|
6132 |
16 Feb 21 |
nicklas |
104 |
public LuceneIndex(String id) |
6111 |
29 Jan 21 |
nicklas |
105 |
{ |
6111 |
29 Jan 21 |
nicklas |
106 |
this.id = id; |
6141 |
19 Feb 21 |
nicklas |
107 |
this.maxToIndex = 500; |
6525 |
20 Dec 21 |
nicklas |
108 |
this.viewAllVariantsEnabled = true; |
6164 |
05 Mar 21 |
nicklas |
109 |
this.status = Status.DISABLED; |
6544 |
19 Jan 22 |
nicklas |
110 |
this.queryStatus = Status.DISABLED; |
6167 |
09 Mar 21 |
nicklas |
111 |
this.autoUpdateAction = AutoUpdate.DISABLED; |
6135 |
17 Feb 21 |
nicklas |
112 |
this.rwLock = new ReentrantReadWriteLock(); |
6111 |
29 Jan 21 |
nicklas |
113 |
} |
6111 |
29 Jan 21 |
nicklas |
114 |
|
6541 |
17 Jan 22 |
nicklas |
115 |
/** |
6541 |
17 Jan 22 |
nicklas |
Get the ID of the index. |
6541 |
17 Jan 22 |
nicklas |
117 |
*/ |
6111 |
29 Jan 21 |
nicklas |
118 |
public String getId() |
6111 |
29 Jan 21 |
nicklas |
119 |
{ |
6111 |
29 Jan 21 |
nicklas |
120 |
return id; |
6111 |
29 Jan 21 |
nicklas |
121 |
} |
6111 |
29 Jan 21 |
nicklas |
122 |
|
6132 |
16 Feb 21 |
nicklas |
123 |
/** |
6132 |
16 Feb 21 |
nicklas |
Display name of the database. |
6132 |
16 Feb 21 |
nicklas |
125 |
*/ |
6111 |
29 Jan 21 |
nicklas |
126 |
public String getName() |
6111 |
29 Jan 21 |
nicklas |
127 |
{ |
6111 |
29 Jan 21 |
nicklas |
128 |
return name; |
6111 |
29 Jan 21 |
nicklas |
129 |
} |
6111 |
29 Jan 21 |
nicklas |
130 |
|
6132 |
16 Feb 21 |
nicklas |
131 |
public void setName(String name) |
6111 |
29 Jan 21 |
nicklas |
132 |
{ |
6111 |
29 Jan 21 |
nicklas |
133 |
this.name = name; |
6111 |
29 Jan 21 |
nicklas |
134 |
} |
6111 |
29 Jan 21 |
nicklas |
135 |
|
6132 |
16 Feb 21 |
nicklas |
136 |
/** |
6140 |
19 Feb 21 |
nicklas |
Item list that contains items that should be indexed. |
6140 |
19 Feb 21 |
nicklas |
138 |
*/ |
6140 |
19 Feb 21 |
nicklas |
139 |
public void setItemList(Itemlist itemList) |
6140 |
19 Feb 21 |
nicklas |
140 |
{ |
6140 |
19 Feb 21 |
nicklas |
141 |
this.itemList = itemList; |
6140 |
19 Feb 21 |
nicklas |
142 |
} |
6140 |
19 Feb 21 |
nicklas |
143 |
|
6140 |
19 Feb 21 |
nicklas |
144 |
public Itemlist getItemList() |
6140 |
19 Feb 21 |
nicklas |
145 |
{ |
6140 |
19 Feb 21 |
nicklas |
146 |
return itemList; |
6140 |
19 Feb 21 |
nicklas |
147 |
} |
6140 |
19 Feb 21 |
nicklas |
148 |
|
6140 |
19 Feb 21 |
nicklas |
149 |
/** |
6141 |
19 Feb 21 |
nicklas |
Maximum number of raw bioassays to index in a single |
6141 |
19 Feb 21 |
nicklas |
transaction. The default value is 500. |
6141 |
19 Feb 21 |
nicklas |
152 |
*/ |
6141 |
19 Feb 21 |
nicklas |
153 |
public void setMaxToIndex(int maxToIndex) |
6141 |
19 Feb 21 |
nicklas |
154 |
{ |
6141 |
19 Feb 21 |
nicklas |
155 |
this.maxToIndex = maxToIndex; |
6141 |
19 Feb 21 |
nicklas |
156 |
} |
6141 |
19 Feb 21 |
nicklas |
157 |
|
6141 |
19 Feb 21 |
nicklas |
158 |
public int getMaxToIndex() |
6141 |
19 Feb 21 |
nicklas |
159 |
{ |
6141 |
19 Feb 21 |
nicklas |
160 |
return maxToIndex; |
6141 |
19 Feb 21 |
nicklas |
161 |
} |
6141 |
19 Feb 21 |
nicklas |
162 |
|
6141 |
19 Feb 21 |
nicklas |
163 |
/** |
6525 |
20 Dec 21 |
nicklas |
Set a flag that controls if the GUI should display a link |
6525 |
20 Dec 21 |
nicklas |
for viewing all variants for a single raw bioassay. The default |
6525 |
20 Dec 21 |
nicklas |
is TRUE. |
6525 |
20 Dec 21 |
nicklas |
@since 1.4 |
6525 |
20 Dec 21 |
nicklas |
168 |
*/ |
6525 |
20 Dec 21 |
nicklas |
169 |
public void setViewAllVariantsEnabled(boolean enabled) |
6525 |
20 Dec 21 |
nicklas |
170 |
{ |
6525 |
20 Dec 21 |
nicklas |
171 |
this.viewAllVariantsEnabled = enabled; |
6525 |
20 Dec 21 |
nicklas |
172 |
} |
6525 |
20 Dec 21 |
nicklas |
173 |
|
6525 |
20 Dec 21 |
nicklas |
174 |
/** |
6525 |
20 Dec 21 |
nicklas |
@since 1.4 |
6525 |
20 Dec 21 |
nicklas |
176 |
*/ |
6525 |
20 Dec 21 |
nicklas |
177 |
public boolean viewAllVariantsEnabled() |
6525 |
20 Dec 21 |
nicklas |
178 |
{ |
6525 |
20 Dec 21 |
nicklas |
179 |
return viewAllVariantsEnabled; |
6525 |
20 Dec 21 |
nicklas |
180 |
} |
6525 |
20 Dec 21 |
nicklas |
181 |
|
6525 |
20 Dec 21 |
nicklas |
182 |
/** |
6374 |
07 Sep 21 |
nicklas |
Set implementation that locate VCF files that should be indexed. |
6374 |
07 Sep 21 |
nicklas |
@since 1.2 |
6132 |
16 Feb 21 |
nicklas |
185 |
*/ |
6374 |
07 Sep 21 |
nicklas |
186 |
public void setVcfFileLocator(VcfFileLocator vcfLocator) |
6112 |
02 Feb 21 |
nicklas |
187 |
{ |
6374 |
07 Sep 21 |
nicklas |
188 |
this.vcfLocator = vcfLocator; |
6112 |
02 Feb 21 |
nicklas |
189 |
} |
6112 |
02 Feb 21 |
nicklas |
190 |
|
6132 |
16 Feb 21 |
nicklas |
191 |
/** |
6553 |
27 Jan 22 |
nicklas |
Get the VCF file locator implementation. |
6553 |
27 Jan 22 |
nicklas |
@since 1.5 |
6553 |
27 Jan 22 |
nicklas |
194 |
*/ |
6553 |
27 Jan 22 |
nicklas |
195 |
public VcfFileLocator getVcfFileLocator() |
6553 |
27 Jan 22 |
nicklas |
196 |
{ |
6553 |
27 Jan 22 |
nicklas |
197 |
return vcfLocator; |
6553 |
27 Jan 22 |
nicklas |
198 |
} |
6553 |
27 Jan 22 |
nicklas |
199 |
|
6553 |
27 Jan 22 |
nicklas |
200 |
/** |
6377 |
08 Sep 21 |
nicklas |
A flag indicating if all genotypes should be indexed or not. |
6377 |
08 Sep 21 |
nicklas |
If not set (the default), variants with a 0/0 genotype are excluded |
6377 |
08 Sep 21 |
nicklas |
from the index. |
6377 |
08 Sep 21 |
nicklas |
@since 1.2 |
6377 |
08 Sep 21 |
nicklas |
205 |
*/ |
6377 |
08 Sep 21 |
nicklas |
206 |
public void setIndexAllGenotypes(boolean indexAllGenotypes) |
6377 |
08 Sep 21 |
nicklas |
207 |
{ |
6377 |
08 Sep 21 |
nicklas |
208 |
this.indexAllGenotypes = indexAllGenotypes; |
6377 |
08 Sep 21 |
nicklas |
209 |
} |
6377 |
08 Sep 21 |
nicklas |
210 |
|
6377 |
08 Sep 21 |
nicklas |
211 |
/** |
6377 |
08 Sep 21 |
nicklas |
@since 1.2 |
6377 |
08 Sep 21 |
nicklas |
213 |
*/ |
6377 |
08 Sep 21 |
nicklas |
214 |
public boolean getIndexAllGenotypes() |
6377 |
08 Sep 21 |
nicklas |
215 |
{ |
6377 |
08 Sep 21 |
nicklas |
216 |
return indexAllGenotypes; |
6377 |
08 Sep 21 |
nicklas |
217 |
} |
6377 |
08 Sep 21 |
nicklas |
218 |
|
6377 |
08 Sep 21 |
nicklas |
219 |
/** |
6164 |
05 Mar 21 |
nicklas |
Get status of the index. |
6164 |
05 Mar 21 |
nicklas |
221 |
*/ |
6164 |
05 Mar 21 |
nicklas |
222 |
public Status getWorkingStatus() |
6164 |
05 Mar 21 |
nicklas |
223 |
{ |
6164 |
05 Mar 21 |
nicklas |
224 |
return status; |
6164 |
05 Mar 21 |
nicklas |
225 |
} |
6164 |
05 Mar 21 |
nicklas |
226 |
|
6164 |
05 Mar 21 |
nicklas |
227 |
/** |
6544 |
19 Jan 22 |
nicklas |
Change the status of the index. |
6544 |
19 Jan 22 |
nicklas |
@since 1.5 |
6544 |
19 Jan 22 |
nicklas |
230 |
*/ |
6544 |
19 Jan 22 |
nicklas |
231 |
protected void setWorkingStatus(Status status) |
6544 |
19 Jan 22 |
nicklas |
232 |
{ |
6544 |
19 Jan 22 |
nicklas |
233 |
this.status = status; |
6544 |
19 Jan 22 |
nicklas |
234 |
} |
6544 |
19 Jan 22 |
nicklas |
235 |
|
6544 |
19 Jan 22 |
nicklas |
236 |
/** |
6544 |
19 Jan 22 |
nicklas |
Get the status of the index for using it in a query. |
6544 |
19 Jan 22 |
nicklas |
The query status is typically, ENABLED or DISABLED |
6544 |
19 Jan 22 |
nicklas |
@since 1.5 |
6544 |
19 Jan 22 |
nicklas |
240 |
*/ |
6544 |
19 Jan 22 |
nicklas |
241 |
public Status getQueryStatus() |
6544 |
19 Jan 22 |
nicklas |
242 |
{ |
6544 |
19 Jan 22 |
nicklas |
243 |
return queryStatus; |
6544 |
19 Jan 22 |
nicklas |
244 |
} |
6544 |
19 Jan 22 |
nicklas |
245 |
|
6544 |
19 Jan 22 |
nicklas |
246 |
/** |
6544 |
19 Jan 22 |
nicklas |
Change the query status of the index. |
6544 |
19 Jan 22 |
nicklas |
@since 1.5 |
6544 |
19 Jan 22 |
nicklas |
249 |
*/ |
6605 |
23 Feb 22 |
nicklas |
250 |
public void setQueryStatus(Status queryStatus) |
6544 |
19 Jan 22 |
nicklas |
251 |
{ |
6544 |
19 Jan 22 |
nicklas |
252 |
this.queryStatus = queryStatus; |
6544 |
19 Jan 22 |
nicklas |
253 |
} |
6544 |
19 Jan 22 |
nicklas |
254 |
|
6544 |
19 Jan 22 |
nicklas |
255 |
/** |
6605 |
23 Feb 22 |
nicklas |
Automatically set the query status for the index. The default implementation |
6605 |
23 Feb 22 |
nicklas |
will ENABLE it if there is at least one document in the index and DISABLE it |
6605 |
23 Feb 22 |
nicklas |
otherwise. |
6605 |
23 Feb 22 |
nicklas |
@return The current query status |
6605 |
23 Feb 22 |
nicklas |
@since 1.6 |
6605 |
23 Feb 22 |
nicklas |
261 |
*/ |
6605 |
23 Feb 22 |
nicklas |
262 |
public Status autoSetQueryStatus() |
6605 |
23 Feb 22 |
nicklas |
263 |
{ |
6605 |
23 Feb 22 |
nicklas |
264 |
queryStatus = reader != null && reader.numDocs() > 0 ? Status.ENABLED : Status.DISABLED; |
6605 |
23 Feb 22 |
nicklas |
265 |
return queryStatus; |
6605 |
23 Feb 22 |
nicklas |
266 |
} |
6605 |
23 Feb 22 |
nicklas |
267 |
|
6605 |
23 Feb 22 |
nicklas |
268 |
/** |
6167 |
09 Mar 21 |
nicklas |
Get the next auto-update action. |
6167 |
09 Mar 21 |
nicklas |
270 |
*/ |
6167 |
09 Mar 21 |
nicklas |
271 |
public AutoUpdate getNextAutoUpdateAction() |
6167 |
09 Mar 21 |
nicklas |
272 |
{ |
6167 |
09 Mar 21 |
nicklas |
273 |
return autoUpdateAction; |
6167 |
09 Mar 21 |
nicklas |
274 |
} |
6167 |
09 Mar 21 |
nicklas |
275 |
|
6544 |
19 Jan 22 |
nicklas |
276 |
public String getCustomUpdateAction() |
6167 |
09 Mar 21 |
nicklas |
277 |
{ |
6544 |
19 Jan 22 |
nicklas |
278 |
return customUpdateAction; |
6544 |
19 Jan 22 |
nicklas |
279 |
} |
6544 |
19 Jan 22 |
nicklas |
280 |
|
6544 |
19 Jan 22 |
nicklas |
281 |
public void setNextAutoUpdateAction(AutoUpdate action, String customAction) |
6544 |
19 Jan 22 |
nicklas |
282 |
{ |
6167 |
09 Mar 21 |
nicklas |
283 |
this.autoUpdateAction = action; |
6544 |
19 Jan 22 |
nicklas |
284 |
this.customUpdateAction = customAction; |
6167 |
09 Mar 21 |
nicklas |
285 |
} |
6167 |
09 Mar 21 |
nicklas |
286 |
|
6167 |
09 Mar 21 |
nicklas |
287 |
/** |
6556 |
28 Jan 22 |
nicklas |
Get the last error. |
6556 |
28 Jan 22 |
nicklas |
@since 1.5 |
6556 |
28 Jan 22 |
nicklas |
290 |
*/ |
6556 |
28 Jan 22 |
nicklas |
291 |
public Throwable getError() |
6556 |
28 Jan 22 |
nicklas |
292 |
{ |
6556 |
28 Jan 22 |
nicklas |
293 |
return error; |
6556 |
28 Jan 22 |
nicklas |
294 |
} |
6556 |
28 Jan 22 |
nicklas |
295 |
|
6556 |
28 Jan 22 |
nicklas |
296 |
/** |
6556 |
28 Jan 22 |
nicklas |
Save an error so that it can be displayed for the admin. |
6556 |
28 Jan 22 |
nicklas |
@since 1.5 |
6556 |
28 Jan 22 |
nicklas |
299 |
*/ |
6556 |
28 Jan 22 |
nicklas |
300 |
public void setError(Throwable error) |
6556 |
28 Jan 22 |
nicklas |
301 |
{ |
6556 |
28 Jan 22 |
nicklas |
302 |
this.error = error; |
6556 |
28 Jan 22 |
nicklas |
303 |
} |
6556 |
28 Jan 22 |
nicklas |
304 |
|
6556 |
28 Jan 22 |
nicklas |
305 |
/** |
6132 |
16 Feb 21 |
nicklas |
Opens the index for searching. If the specified directory |
6132 |
16 Feb 21 |
nicklas |
doesn't exists it is created and an empty index database is |
6132 |
16 Feb 21 |
nicklas |
generated. |
6132 |
16 Feb 21 |
nicklas |
@param path Full path to directory with the index |
6132 |
16 Feb 21 |
nicklas |
@param executor Optional executor that can be used to execute queries in multiple threads |
6132 |
16 Feb 21 |
nicklas |
311 |
*/ |
6168 |
12 Mar 21 |
nicklas |
312 |
public void open(java.io.File pathPrefix, ExecutorService executor) |
6111 |
29 Jan 21 |
nicklas |
313 |
throws IOException |
6111 |
29 Jan 21 |
nicklas |
314 |
{ |
6138 |
18 Feb 21 |
nicklas |
315 |
logger.debug("Opening index: " + name + " (" + path + ")"); |
6138 |
18 Feb 21 |
nicklas |
316 |
|
6111 |
29 Jan 21 |
nicklas |
317 |
this.executor = executor; |
6168 |
12 Mar 21 |
nicklas |
318 |
this.pathPrefix = pathPrefix; |
6168 |
12 Mar 21 |
nicklas |
319 |
this.path = getExistingOrNewPath(pathPrefix); |
6169 |
15 Mar 21 |
nicklas |
320 |
this.directory = createIndexIfNeeded(path); |
6541 |
17 Jan 22 |
nicklas |
321 |
this.reader = createIndexReader(directory); |
6541 |
17 Jan 22 |
nicklas |
322 |
this.searcher = createIndexSearcher(reader); |
6541 |
17 Jan 22 |
nicklas |
323 |
this.analyzer = createAnalyzer(); |
6541 |
17 Jan 22 |
nicklas |
324 |
this.cache = createQueryCache(); |
6164 |
05 Mar 21 |
nicklas |
325 |
this.status = Status.IDLE; |
6605 |
23 Feb 22 |
nicklas |
326 |
autoSetQueryStatus(); |
6167 |
09 Mar 21 |
nicklas |
327 |
this.autoUpdateAction = AutoUpdate.DEFAULT; |
6111 |
29 Jan 21 |
nicklas |
328 |
} |
6111 |
29 Jan 21 |
nicklas |
329 |
|
6541 |
17 Jan 22 |
nicklas |
330 |
/** |
6545 |
21 Jan 22 |
nicklas |
Re-open the current index with new instances of IndexReader, IndexSearcher, etc. |
6545 |
21 Jan 22 |
nicklas |
This is requried after the index has been updated with new documents or if |
6545 |
21 Jan 22 |
nicklas |
documents have been deleted. |
6545 |
21 Jan 22 |
nicklas |
334 |
*/ |
6545 |
21 Jan 22 |
nicklas |
335 |
protected void reOpen() |
6545 |
21 Jan 22 |
nicklas |
336 |
throws IOException |
6545 |
21 Jan 22 |
nicklas |
337 |
{ |
6545 |
21 Jan 22 |
nicklas |
338 |
if (isClosing()) return; |
6545 |
21 Jan 22 |
nicklas |
339 |
rwLock.writeLock().lock(); |
6545 |
21 Jan 22 |
nicklas |
340 |
try |
6545 |
21 Jan 22 |
nicklas |
341 |
{ |
6545 |
21 Jan 22 |
nicklas |
342 |
cache = createQueryCache(); |
6545 |
21 Jan 22 |
nicklas |
343 |
reader = createIndexReader(directory); |
6545 |
21 Jan 22 |
nicklas |
344 |
searcher = createIndexSearcher(reader); |
6545 |
21 Jan 22 |
nicklas |
345 |
} |
6545 |
21 Jan 22 |
nicklas |
346 |
finally |
6545 |
21 Jan 22 |
nicklas |
347 |
{ |
6545 |
21 Jan 22 |
nicklas |
348 |
rwLock.writeLock().unlock(); |
6545 |
21 Jan 22 |
nicklas |
349 |
} |
6545 |
21 Jan 22 |
nicklas |
350 |
} |
6545 |
21 Jan 22 |
nicklas |
351 |
|
6545 |
21 Jan 22 |
nicklas |
352 |
/** |
6541 |
17 Jan 22 |
nicklas |
Create a reader for reading documents and information from |
6541 |
17 Jan 22 |
nicklas |
the given index directory. |
6541 |
17 Jan 22 |
nicklas |
355 |
*/ |
6541 |
17 Jan 22 |
nicklas |
356 |
protected IndexReader createIndexReader(Directory directory) |
6541 |
17 Jan 22 |
nicklas |
357 |
throws IOException |
6541 |
17 Jan 22 |
nicklas |
358 |
{ |
6541 |
17 Jan 22 |
nicklas |
359 |
return DirectoryReader.open(directory); |
6541 |
17 Jan 22 |
nicklas |
360 |
} |
6541 |
17 Jan 22 |
nicklas |
361 |
|
6541 |
17 Jan 22 |
nicklas |
362 |
/** |
6541 |
17 Jan 22 |
nicklas |
Create a searcher for executing queries against the given index. |
6541 |
17 Jan 22 |
nicklas |
364 |
*/ |
6541 |
17 Jan 22 |
nicklas |
365 |
protected IndexSearcher createIndexSearcher(IndexReader reader) |
6541 |
17 Jan 22 |
nicklas |
366 |
{ |
6541 |
17 Jan 22 |
nicklas |
367 |
return new IndexSearcher(reader, executor); |
6541 |
17 Jan 22 |
nicklas |
368 |
} |
6541 |
17 Jan 22 |
nicklas |
369 |
|
6541 |
17 Jan 22 |
nicklas |
370 |
/** |
6545 |
21 Jan 22 |
nicklas |
Create a writer for adding documents to the index. |
6545 |
21 Jan 22 |
nicklas |
372 |
*/ |
6545 |
21 Jan 22 |
nicklas |
373 |
protected IndexWriter createIndexWriter(IndexWriterConfig config) |
6545 |
21 Jan 22 |
nicklas |
374 |
throws IOException |
6545 |
21 Jan 22 |
nicklas |
375 |
{ |
6545 |
21 Jan 22 |
nicklas |
376 |
return new IndexWriter(directory, config); |
6545 |
21 Jan 22 |
nicklas |
377 |
} |
6545 |
21 Jan 22 |
nicklas |
378 |
|
6545 |
21 Jan 22 |
nicklas |
379 |
/** |
6541 |
17 Jan 22 |
nicklas |
Create an Analyzer implementation that is used for analyzing |
6541 |
17 Jan 22 |
nicklas |
text that is going into the index. The analyzer is also used for |
6541 |
17 Jan 22 |
nicklas |
parsing query strings. |
6541 |
17 Jan 22 |
nicklas |
383 |
*/ |
6541 |
17 Jan 22 |
nicklas |
384 |
protected abstract Analyzer createAnalyzer(); |
6541 |
17 Jan 22 |
nicklas |
385 |
|
6541 |
17 Jan 22 |
nicklas |
386 |
/** |
6541 |
17 Jan 22 |
nicklas |
Create a cache for storing results from queries that takes |
6541 |
17 Jan 22 |
nicklas |
a long time to execute. |
6541 |
17 Jan 22 |
nicklas |
389 |
*/ |
6541 |
17 Jan 22 |
nicklas |
390 |
protected QueryCache createQueryCache() |
6541 |
17 Jan 22 |
nicklas |
391 |
{ |
6541 |
17 Jan 22 |
nicklas |
// TODO -- add support for implementations that return null |
6541 |
17 Jan 22 |
nicklas |
// and implement a setting for what is considered a long time |
6541 |
17 Jan 22 |
nicklas |
394 |
return new QueryCache(60); // 1 hour |
6541 |
17 Jan 22 |
nicklas |
395 |
} |
6541 |
17 Jan 22 |
nicklas |
396 |
|
6111 |
29 Jan 21 |
nicklas |
397 |
@Override |
6111 |
29 Jan 21 |
nicklas |
398 |
public void close() |
6111 |
29 Jan 21 |
nicklas |
399 |
{ |
6138 |
18 Feb 21 |
nicklas |
400 |
logger.debug("Closing index: " + name); |
6241 |
21 May 21 |
nicklas |
401 |
if (status != Status.IDLE && status != Status.DISABLED) |
6241 |
21 May 21 |
nicklas |
402 |
{ |
6241 |
21 May 21 |
nicklas |
// There are other threads updating things |
6241 |
21 May 21 |
nicklas |
// we need to signal them to abort |
6241 |
21 May 21 |
nicklas |
405 |
logger.debug("Need to wait before closing: Status="+status); |
6241 |
21 May 21 |
nicklas |
406 |
abort = new CountDownLatch(1); |
6241 |
21 May 21 |
nicklas |
407 |
boolean aborted = false; |
6241 |
21 May 21 |
nicklas |
408 |
try |
6241 |
21 May 21 |
nicklas |
409 |
{ |
6241 |
21 May 21 |
nicklas |
// Wait at most 60 seconds, returns TRUE if aborted successfully, FALSE if timeout |
6241 |
21 May 21 |
nicklas |
411 |
aborted = abort.await(60, TimeUnit.SECONDS); |
6241 |
21 May 21 |
nicklas |
412 |
} |
6241 |
21 May 21 |
nicklas |
413 |
catch (InterruptedException ex) |
6241 |
21 May 21 |
nicklas |
414 |
{} |
6241 |
21 May 21 |
nicklas |
415 |
logger.debug("Proceeding with close: Status="+status+"; aborted="+aborted); |
6241 |
21 May 21 |
nicklas |
416 |
abort = null; |
6241 |
21 May 21 |
nicklas |
417 |
} |
6111 |
29 Jan 21 |
nicklas |
418 |
if (executor != null) |
6111 |
29 Jan 21 |
nicklas |
419 |
{ |
6111 |
29 Jan 21 |
nicklas |
420 |
executor.shutdown(); |
6111 |
29 Jan 21 |
nicklas |
421 |
executor = null; |
6111 |
29 Jan 21 |
nicklas |
422 |
} |
6241 |
21 May 21 |
nicklas |
423 |
status = Status.DISABLED; |
6544 |
19 Jan 22 |
nicklas |
424 |
queryStatus = Status.DISABLED; |
6111 |
29 Jan 21 |
nicklas |
425 |
FileUtil.close(reader); |
6111 |
29 Jan 21 |
nicklas |
426 |
FileUtil.close(directory); |
6111 |
29 Jan 21 |
nicklas |
427 |
searcher = null; |
6111 |
29 Jan 21 |
nicklas |
428 |
reader = null; |
6114 |
04 Feb 21 |
nicklas |
429 |
analyzer = null; |
6137 |
18 Feb 21 |
nicklas |
430 |
cache = null; |
6111 |
29 Jan 21 |
nicklas |
431 |
} |
6111 |
29 Jan 21 |
nicklas |
432 |
|
6132 |
16 Feb 21 |
nicklas |
433 |
/** |
6241 |
21 May 21 |
nicklas |
Check if this index is in the process of being closed. |
6241 |
21 May 21 |
nicklas |
435 |
*/ |
6241 |
21 May 21 |
nicklas |
436 |
public boolean isClosing() |
6241 |
21 May 21 |
nicklas |
437 |
{ |
6241 |
21 May 21 |
nicklas |
438 |
return abort != null; |
6241 |
21 May 21 |
nicklas |
439 |
} |
6241 |
21 May 21 |
nicklas |
440 |
|
6241 |
21 May 21 |
nicklas |
441 |
/** |
6241 |
21 May 21 |
nicklas |
Release the lock that is preventing the close() method from |
6241 |
21 May 21 |
nicklas |
shutting down. |
6241 |
21 May 21 |
nicklas |
444 |
*/ |
6241 |
21 May 21 |
nicklas |
445 |
public void releaseCloseWait() |
6241 |
21 May 21 |
nicklas |
446 |
{ |
6241 |
21 May 21 |
nicklas |
447 |
if (abort != null) |
6241 |
21 May 21 |
nicklas |
448 |
{ |
6241 |
21 May 21 |
nicklas |
449 |
logger.debug("Releasing close wait latch"); |
6241 |
21 May 21 |
nicklas |
450 |
abort.countDown(); |
6241 |
21 May 21 |
nicklas |
451 |
} |
6241 |
21 May 21 |
nicklas |
452 |
} |
6241 |
21 May 21 |
nicklas |
453 |
|
6241 |
21 May 21 |
nicklas |
454 |
/** |
6168 |
12 Mar 21 |
nicklas |
Search for a directory that starts with the given prefix. |
6168 |
12 Mar 21 |
nicklas |
If no directory exists, a new one is created with '.1' |
6168 |
12 Mar 21 |
nicklas |
added to the prefix. |
6168 |
12 Mar 21 |
nicklas |
458 |
*/ |
6545 |
21 Jan 22 |
nicklas |
459 |
protected java.io.File getExistingOrNewPath(java.io.File pathPrefix) |
6168 |
12 Mar 21 |
nicklas |
460 |
throws IOException |
6168 |
12 Mar 21 |
nicklas |
461 |
{ |
6168 |
12 Mar 21 |
nicklas |
462 |
java.io.File dir = pathPrefix.getParentFile(); |
6168 |
12 Mar 21 |
nicklas |
463 |
String prefix = pathPrefix.getName(); |
6168 |
12 Mar 21 |
nicklas |
464 |
if (dir.exists()) |
6168 |
12 Mar 21 |
nicklas |
465 |
{ |
6168 |
12 Mar 21 |
nicklas |
466 |
for (java.io.File subDir : dir.listFiles()) |
6168 |
12 Mar 21 |
nicklas |
467 |
{ |
6168 |
12 Mar 21 |
nicklas |
468 |
if (subDir.isDirectory() && subDir.getName().startsWith(prefix)) |
6168 |
12 Mar 21 |
nicklas |
469 |
{ |
6168 |
12 Mar 21 |
nicklas |
470 |
return subDir; |
6168 |
12 Mar 21 |
nicklas |
471 |
} |
6168 |
12 Mar 21 |
nicklas |
472 |
} |
6168 |
12 Mar 21 |
nicklas |
473 |
} |
6241 |
21 May 21 |
nicklas |
474 |
java.io.File subDir = new java.io.File(dir, prefix+".1"); |
6545 |
21 Jan 22 |
nicklas |
475 |
if (subDir.mkdirs() && !subDir.exists()) |
6168 |
12 Mar 21 |
nicklas |
476 |
{ |
6241 |
21 May 21 |
nicklas |
477 |
throw new IOException("Could not create: "+ subDir.getAbsolutePath()); |
6168 |
12 Mar 21 |
nicklas |
478 |
} |
6241 |
21 May 21 |
nicklas |
479 |
return subDir; |
6168 |
12 Mar 21 |
nicklas |
480 |
} |
6168 |
12 Mar 21 |
nicklas |
481 |
|
6168 |
12 Mar 21 |
nicklas |
482 |
/** |
6168 |
12 Mar 21 |
nicklas |
Create a new path with the given prefix. A numeric suffix |
6168 |
12 Mar 21 |
nicklas |
is added ('.1', '.2', and so on) until a non-existing path |
6168 |
12 Mar 21 |
nicklas |
is found, in which case the directory is created. |
6168 |
12 Mar 21 |
nicklas |
486 |
*/ |
6545 |
21 Jan 22 |
nicklas |
487 |
protected java.io.File getNewPath(java.io.File pathPrefix) |
6168 |
12 Mar 21 |
nicklas |
488 |
throws IOException |
6168 |
12 Mar 21 |
nicklas |
489 |
{ |
6168 |
12 Mar 21 |
nicklas |
490 |
int suffix = 1; |
6168 |
12 Mar 21 |
nicklas |
491 |
java.io.File dir = pathPrefix.getParentFile(); |
6168 |
12 Mar 21 |
nicklas |
492 |
String prefix = pathPrefix.getName() + "."; |
6168 |
12 Mar 21 |
nicklas |
493 |
while (true && suffix < 100) |
6168 |
12 Mar 21 |
nicklas |
494 |
{ |
6168 |
12 Mar 21 |
nicklas |
495 |
java.io.File path = new java.io.File(dir, prefix+suffix); |
6168 |
12 Mar 21 |
nicklas |
496 |
if (!path.exists()) |
6168 |
12 Mar 21 |
nicklas |
497 |
{ |
6168 |
12 Mar 21 |
nicklas |
498 |
if (!path.mkdirs()) |
6168 |
12 Mar 21 |
nicklas |
499 |
{ |
6168 |
12 Mar 21 |
nicklas |
500 |
throw new IOException("Could not create: "+ path.getAbsolutePath()); |
6168 |
12 Mar 21 |
nicklas |
501 |
} |
6168 |
12 Mar 21 |
nicklas |
502 |
return path; |
6168 |
12 Mar 21 |
nicklas |
503 |
} |
6168 |
12 Mar 21 |
nicklas |
504 |
suffix++; |
6168 |
12 Mar 21 |
nicklas |
505 |
} |
6168 |
12 Mar 21 |
nicklas |
// Should not really happen! |
6168 |
12 Mar 21 |
nicklas |
507 |
throw new IOException("Could not create new path for prefix: "+ pathPrefix); |
6168 |
12 Mar 21 |
nicklas |
508 |
} |
6168 |
12 Mar 21 |
nicklas |
509 |
|
6169 |
15 Mar 21 |
nicklas |
510 |
/** |
6169 |
15 Mar 21 |
nicklas |
Create an new index database in the given directory unless an |
6169 |
15 Mar 21 |
nicklas |
index exists already. |
6169 |
15 Mar 21 |
nicklas |
513 |
*/ |
6545 |
21 Jan 22 |
nicklas |
514 |
protected Directory createIndexIfNeeded(java.io.File indexDir) |
6169 |
15 Mar 21 |
nicklas |
515 |
throws IOException |
6169 |
15 Mar 21 |
nicklas |
516 |
{ |
6169 |
15 Mar 21 |
nicklas |
517 |
if (!indexDir.isDirectory()) |
6169 |
15 Mar 21 |
nicklas |
518 |
{ |
6169 |
15 Mar 21 |
nicklas |
519 |
throw new IOException("Directory doesn't exists: " + indexDir); |
6169 |
15 Mar 21 |
nicklas |
520 |
} |
6169 |
15 Mar 21 |
nicklas |
521 |
Directory dir = new NIOFSDirectory(indexDir.toPath()); |
6169 |
15 Mar 21 |
nicklas |
522 |
if (!DirectoryReader.indexExists(dir)) |
6169 |
15 Mar 21 |
nicklas |
523 |
{ |
6169 |
15 Mar 21 |
nicklas |
524 |
logger.debug("Creating index: " + name + " (" + indexDir + ")"); |
6169 |
15 Mar 21 |
nicklas |
525 |
IndexWriter create = new IndexWriter(dir, new IndexWriterConfig().setOpenMode(OpenMode.CREATE)); |
6169 |
15 Mar 21 |
nicklas |
526 |
create.close(); |
6169 |
15 Mar 21 |
nicklas |
527 |
} |
6169 |
15 Mar 21 |
nicklas |
528 |
return dir; |
6169 |
15 Mar 21 |
nicklas |
529 |
} |
6169 |
15 Mar 21 |
nicklas |
530 |
|
6168 |
12 Mar 21 |
nicklas |
531 |
/** |
6132 |
16 Feb 21 |
nicklas |
Add new RawBioassays to the index. Indexing is automatically |
6132 |
16 Feb 21 |
nicklas |
using multiple threads (up to 1 thread per cpu on the server). |
6132 |
16 Feb 21 |
nicklas |
534 |
*/ |
6241 |
21 May 21 |
nicklas |
535 |
public List<RawBioAssay> addToIndex(DbControl dc, Collection<RawBioAssay> rawBioAssays, ProgressReporter p) |
6112 |
02 Feb 21 |
nicklas |
536 |
{ |
6241 |
21 May 21 |
nicklas |
537 |
if (status == Status.DISABLED) return null; |
6112 |
02 Feb 21 |
nicklas |
538 |
logger.debug("Adding " + rawBioAssays.size() + " rawbioassays to index: " + name); |
6114 |
04 Feb 21 |
nicklas |
539 |
|
6140 |
19 Feb 21 |
nicklas |
540 |
extendedWaitUntil = 0; // Reset waiting time |
6112 |
02 Feb 21 |
nicklas |
541 |
IndexWriter writer = null; |
6241 |
21 May 21 |
nicklas |
542 |
List<RawBioAssay> processed = null; |
6112 |
02 Feb 21 |
nicklas |
543 |
try |
6112 |
02 Feb 21 |
nicklas |
544 |
{ |
6164 |
05 Mar 21 |
nicklas |
545 |
this.progress = new SimpleProgressReporter(p); |
6164 |
05 Mar 21 |
nicklas |
546 |
this.status = Status.INDEXING; |
6140 |
19 Feb 21 |
nicklas |
547 |
IndexWriterConfig config = new IndexWriterConfig(analyzer); |
6140 |
19 Feb 21 |
nicklas |
548 |
config.setOpenMode(OpenMode.CREATE_OR_APPEND); |
6241 |
21 May 21 |
nicklas |
549 |
|
6241 |
21 May 21 |
nicklas |
// Create a multi-threader MergeScheduler with 1-8 threads |
6241 |
21 May 21 |
nicklas |
551 |
int numMergeThreads = getNumThreads(1, 8); |
6241 |
21 May 21 |
nicklas |
552 |
ConcurrentMergeScheduler mergeSceduler = new ConcurrentMergeScheduler(); |
6241 |
21 May 21 |
nicklas |
553 |
mergeSceduler.setMaxMergesAndThreads(numMergeThreads*2, numMergeThreads); |
6241 |
21 May 21 |
nicklas |
554 |
config.setMergeScheduler(mergeSceduler); |
6241 |
21 May 21 |
nicklas |
555 |
|
6241 |
21 May 21 |
nicklas |
// Create a MergePolicy for 1GB segments |
6241 |
21 May 21 |
nicklas |
557 |
TieredMergePolicy mergePolicy = new TieredMergePolicy(); |
6241 |
21 May 21 |
nicklas |
558 |
mergePolicy.setMaxMergedSegmentMB(1000); // The default is 5000, but a lower value will be faster |
6241 |
21 May 21 |
nicklas |
559 |
config.setMergePolicy(mergePolicy); |
6241 |
21 May 21 |
nicklas |
560 |
|
6545 |
21 Jan 22 |
nicklas |
561 |
writer = createIndexWriter(config); |
6241 |
21 May 21 |
nicklas |
562 |
processed = addToIndex(dc, rawBioAssays, writer); |
6112 |
02 Feb 21 |
nicklas |
563 |
|
6241 |
21 May 21 |
nicklas |
564 |
if (isClosing()) throw new RuntimeException("Index is closing"); |
6241 |
21 May 21 |
nicklas |
565 |
|
6168 |
12 Mar 21 |
nicklas |
566 |
logger.debug("Committing index to disk ("+name+")"); |
6241 |
21 May 21 |
nicklas |
567 |
progress.display(98, "Committing changes..."); |
6168 |
12 Mar 21 |
nicklas |
568 |
writer.commit(); |
6241 |
21 May 21 |
nicklas |
569 |
logger.debug("Commit completed ("+name+")"); |
6241 |
21 May 21 |
nicklas |
570 |
progress.display(99, "Commit complete. Cleaning up..."); |
6541 |
17 Jan 22 |
nicklas |
571 |
|
6545 |
21 Jan 22 |
nicklas |
572 |
reOpen(); |
6168 |
12 Mar 21 |
nicklas |
573 |
} |
6168 |
12 Mar 21 |
nicklas |
574 |
catch (Exception ex) |
6168 |
12 Mar 21 |
nicklas |
575 |
{ |
6556 |
28 Jan 22 |
nicklas |
576 |
setError(new RuntimeException("Indexing failed: "+ex.getMessage(), ex)); |
6168 |
12 Mar 21 |
nicklas |
577 |
logger.error("Indexing failed ("+name+")", ex); |
6241 |
21 May 21 |
nicklas |
578 |
processed = null; |
6168 |
12 Mar 21 |
nicklas |
579 |
if (writer != null) |
6168 |
12 Mar 21 |
nicklas |
580 |
{ |
6168 |
12 Mar 21 |
nicklas |
581 |
try |
6112 |
02 Feb 21 |
nicklas |
582 |
{ |
6168 |
12 Mar 21 |
nicklas |
583 |
writer.rollback(); |
6112 |
02 Feb 21 |
nicklas |
584 |
} |
6168 |
12 Mar 21 |
nicklas |
585 |
catch (Exception ex2) |
6112 |
02 Feb 21 |
nicklas |
586 |
{ |
6168 |
12 Mar 21 |
nicklas |
587 |
logger.warn("Exception during rollback", ex2); |
6112 |
02 Feb 21 |
nicklas |
588 |
} |
6168 |
12 Mar 21 |
nicklas |
589 |
} |
6168 |
12 Mar 21 |
nicklas |
590 |
} |
6168 |
12 Mar 21 |
nicklas |
591 |
finally |
6168 |
12 Mar 21 |
nicklas |
592 |
{ |
6241 |
21 May 21 |
nicklas |
593 |
logger.debug("Closing writer ("+name+")"); |
6241 |
21 May 21 |
nicklas |
// This can take several minutes after commit() as noted in the Lucene API docs |
6241 |
21 May 21 |
nicklas |
// It is bad if we call LuceneIndex.close() while waiting here since |
6241 |
21 May 21 |
nicklas |
// that will close the directory which leaves a lock file that needs to be removed |
6241 |
21 May 21 |
nicklas |
// before we can start-up again |
6241 |
21 May 21 |
nicklas |
// TODO - It would be nice if this could be solved, though I don't know how... |
6241 |
21 May 21 |
nicklas |
599 |
Pinger pinger = Application.newPinger(dc.getSessionControl()); |
6168 |
12 Mar 21 |
nicklas |
600 |
FileUtil.close(writer); |
6241 |
21 May 21 |
nicklas |
601 |
pinger.stop(); |
6241 |
21 May 21 |
nicklas |
602 |
logger.debug("Closed writer ("+name+")"); |
6168 |
12 Mar 21 |
nicklas |
603 |
this.status = Status.IDLE; |
6168 |
12 Mar 21 |
nicklas |
604 |
this.progress = null; |
6168 |
12 Mar 21 |
nicklas |
605 |
} |
6241 |
21 May 21 |
nicklas |
606 |
return processed; |
6168 |
12 Mar 21 |
nicklas |
607 |
} |
6168 |
12 Mar 21 |
nicklas |
608 |
|
6241 |
21 May 21 |
nicklas |
609 |
/** |
6241 |
21 May 21 |
nicklas |
Get number of threads to use.The implementation will try to use |
6241 |
21 May 21 |
nicklas |
half of the available processors but not more than maxThreads |
6241 |
21 May 21 |
nicklas |
and not less than minThreads. |
6241 |
21 May 21 |
nicklas |
613 |
*/ |
6545 |
21 Jan 22 |
nicklas |
614 |
protected int getNumThreads(int minThreads, int maxThreads) |
6241 |
21 May 21 |
nicklas |
615 |
{ |
6241 |
21 May 21 |
nicklas |
616 |
int numProc = Runtime.getRuntime().availableProcessors(); |
6241 |
21 May 21 |
nicklas |
617 |
int numThreads = Math.min(maxThreads, Math.max(1, numProc / 2)); |
6241 |
21 May 21 |
nicklas |
618 |
if (SLOW_MODE && numThreads > 2) numThreads = 2; |
6241 |
21 May 21 |
nicklas |
619 |
if (numThreads < minThreads) numThreads = minThreads; |
6241 |
21 May 21 |
nicklas |
620 |
return numThreads; |
6241 |
21 May 21 |
nicklas |
621 |
} |
6241 |
21 May 21 |
nicklas |
622 |
|
6241 |
21 May 21 |
nicklas |
623 |
/** |
6241 |
21 May 21 |
nicklas |
Index the given collection of raw bioassays. |
6241 |
21 May 21 |
nicklas |
@return The raw bioassays that was processed. Note that all may not have been |
6241 |
21 May 21 |
nicklas |
indexed due to missing VCF file or other issues |
6241 |
21 May 21 |
nicklas |
627 |
*/ |
6241 |
21 May 21 |
nicklas |
628 |
private List<RawBioAssay> addToIndex(DbControl dc, Collection<RawBioAssay> rawBioAssays, IndexWriter writer) |
6168 |
12 Mar 21 |
nicklas |
629 |
throws IOException, InterruptedException |
6168 |
12 Mar 21 |
nicklas |
630 |
{ |
6168 |
12 Mar 21 |
nicklas |
631 |
ExecutorService threadPool = null; |
6168 |
12 Mar 21 |
nicklas |
632 |
int numRba = rawBioAssays.size(); |
6241 |
21 May 21 |
nicklas |
633 |
int numThreads = getNumThreads(1, 8); |
6168 |
12 Mar 21 |
nicklas |
634 |
|
6168 |
12 Mar 21 |
nicklas |
635 |
progress.display(1, "Indexing... (0 of " + numRba + "; " + numThreads + " threads)"); |
6241 |
21 May 21 |
nicklas |
636 |
List<RawBioAssay> processedRba = new ArrayList<>(numRba); |
7074 |
24 Mar 23 |
nicklas |
637 |
|
7074 |
24 Mar 23 |
nicklas |
638 |
logger.info("Adding " + numRba + " raw bioassays to index; Using " + numThreads + " threads: "+name); |
6168 |
12 Mar 21 |
nicklas |
639 |
try |
6168 |
12 Mar 21 |
nicklas |
640 |
{ |
6168 |
12 Mar 21 |
nicklas |
641 |
ThreadFactory threadFactory = new IndexThreadFactory(id); |
6168 |
12 Mar 21 |
nicklas |
642 |
threadPool = Executors.newFixedThreadPool(numThreads, threadFactory); |
6541 |
17 Jan 22 |
nicklas |
643 |
ExecutorCompletionService<Indexer> executor = new ExecutorCompletionService<>(threadPool); |
6168 |
12 Mar 21 |
nicklas |
644 |
|
6168 |
12 Mar 21 |
nicklas |
645 |
numRba = 0; |
6541 |
17 Jan 22 |
nicklas |
646 |
int allRba = rawBioAssays.size(); |
6168 |
12 Mar 21 |
nicklas |
647 |
for (RawBioAssay rba : rawBioAssays) |
6168 |
12 Mar 21 |
nicklas |
648 |
{ |
6528 |
20 Dec 21 |
nicklas |
649 |
List<VcfFile> vcfFiles = vcfLocator.getVcfFiles(dc, rba); |
6375 |
07 Sep 21 |
nicklas |
650 |
if (vcfFiles != null && vcfFiles.size() > 0) |
6112 |
02 Feb 21 |
nicklas |
651 |
{ |
6112 |
02 Feb 21 |
nicklas |
652 |
numRba++; |
6541 |
17 Jan 22 |
nicklas |
653 |
executor.submit(createIndexer(writer, numRba, rba, vcfFiles)); |
6531 |
20 Dec 21 |
nicklas |
654 |
if (numRba % 100 == 0) |
6531 |
20 Dec 21 |
nicklas |
655 |
{ |
6531 |
20 Dec 21 |
nicklas |
656 |
progress.display(1+(10*numRba)/allRba, "Submitting... (" + numRba+ " of " + allRba + ")"); |
6531 |
20 Dec 21 |
nicklas |
657 |
} |
6112 |
02 Feb 21 |
nicklas |
658 |
} |
6241 |
21 May 21 |
nicklas |
659 |
else |
6241 |
21 May 21 |
nicklas |
660 |
{ |
6241 |
21 May 21 |
nicklas |
661 |
processedRba.add(rba); |
6241 |
21 May 21 |
nicklas |
662 |
} |
6112 |
02 Feb 21 |
nicklas |
663 |
} |
6112 |
02 Feb 21 |
nicklas |
664 |
|
6112 |
02 Feb 21 |
nicklas |
665 |
logger.debug("Submitted " + numRba + " rawbioassays for indexing ("+name+")"); |
6112 |
02 Feb 21 |
nicklas |
666 |
|
6112 |
02 Feb 21 |
nicklas |
667 |
int numVariants = 0; |
6377 |
08 Sep 21 |
nicklas |
668 |
int numGenotypes = 0; |
6135 |
17 Feb 21 |
nicklas |
669 |
int numIndexed = 0; |
6135 |
17 Feb 21 |
nicklas |
670 |
int numFailed = 0; |
6241 |
21 May 21 |
nicklas |
671 |
int numAborted = 0; |
6541 |
17 Jan 22 |
nicklas |
672 |
|
6112 |
02 Feb 21 |
nicklas |
673 |
for (int i = 0; i < numRba; i++) |
6112 |
02 Feb 21 |
nicklas |
674 |
{ |
6541 |
17 Jan 22 |
nicklas |
675 |
Future<Indexer> result = executor.take(); |
6234 |
17 May 21 |
nicklas |
676 |
dc.getSessionControl().updateLastAccess(); // To avoid session timeout |
6241 |
21 May 21 |
nicklas |
677 |
if (result != null && !result.isCancelled()) |
6135 |
17 Feb 21 |
nicklas |
678 |
{ |
6135 |
17 Feb 21 |
nicklas |
679 |
try |
6135 |
17 Feb 21 |
nicklas |
680 |
{ |
6541 |
17 Jan 22 |
nicklas |
681 |
Indexer indexer = result.get(); |
6241 |
21 May 21 |
nicklas |
682 |
if (!indexer.wasAborted()) |
6241 |
21 May 21 |
nicklas |
683 |
{ |
6241 |
21 May 21 |
nicklas |
684 |
numVariants += indexer.getNumVariants(); |
6377 |
08 Sep 21 |
nicklas |
685 |
numGenotypes += indexer.getNumGenotypes(); |
6241 |
21 May 21 |
nicklas |
686 |
processedRba.add(indexer.getRawBioAssay()); |
6241 |
21 May 21 |
nicklas |
687 |
numIndexed++; |
6531 |
20 Dec 21 |
nicklas |
688 |
progress.display(11+(85*numIndexed)/numRba, "Indexing... (" + numIndexed+ " of " + numRba + "; " + numThreads + " threads)"); |
6241 |
21 May 21 |
nicklas |
689 |
} |
6241 |
21 May 21 |
nicklas |
690 |
else |
6241 |
21 May 21 |
nicklas |
691 |
{ |
6241 |
21 May 21 |
nicklas |
692 |
numAborted++; |
6241 |
21 May 21 |
nicklas |
693 |
} |
6135 |
17 Feb 21 |
nicklas |
694 |
} |
6135 |
17 Feb 21 |
nicklas |
695 |
catch (Exception ex) |
6135 |
17 Feb 21 |
nicklas |
696 |
{ |
6135 |
17 Feb 21 |
nicklas |
// TODO -- we should keep track of this in some way |
6135 |
17 Feb 21 |
nicklas |
// so that an admin may take a look at it |
6135 |
17 Feb 21 |
nicklas |
699 |
numFailed++; |
6135 |
17 Feb 21 |
nicklas |
700 |
} |
6135 |
17 Feb 21 |
nicklas |
701 |
} |
6112 |
02 Feb 21 |
nicklas |
702 |
} |
7074 |
24 Mar 23 |
nicklas |
703 |
logger.info("Indexing complete, " +numIndexed + " rawbioassays (" + |
6241 |
21 May 21 |
nicklas |
704 |
numFailed + " failed, " + numAborted + " aborted); " + |
6377 |
08 Sep 21 |
nicklas |
705 |
numVariants + " variants; " + numGenotypes + " genotypes ("+name+")"); |
6168 |
12 Mar 21 |
nicklas |
706 |
} |
6168 |
12 Mar 21 |
nicklas |
707 |
finally |
6168 |
12 Mar 21 |
nicklas |
708 |
{ |
6168 |
12 Mar 21 |
nicklas |
709 |
if (threadPool != null) threadPool.shutdown(); |
6168 |
12 Mar 21 |
nicklas |
710 |
} |
6241 |
21 May 21 |
nicklas |
711 |
return processedRba; |
6168 |
12 Mar 21 |
nicklas |
712 |
} |
6168 |
12 Mar 21 |
nicklas |
713 |
|
6168 |
12 Mar 21 |
nicklas |
714 |
/** |
6541 |
17 Jan 22 |
nicklas |
Create an Indexer implementation that knows how to index the information |
6541 |
17 Jan 22 |
nicklas |
in the given raw bioassay. |
6541 |
17 Jan 22 |
nicklas |
717 |
*/ |
6541 |
17 Jan 22 |
nicklas |
718 |
protected abstract Indexer createIndexer(IndexWriter writer, int num, RawBioAssay rba, List<VcfFile> vcfFiles); |
6541 |
17 Jan 22 |
nicklas |
719 |
|
6541 |
17 Jan 22 |
nicklas |
720 |
/** |
6168 |
12 Mar 21 |
nicklas |
Remove RawBioassays from the index. |
6168 |
12 Mar 21 |
nicklas |
722 |
*/ |
6168 |
12 Mar 21 |
nicklas |
723 |
public void removeFromIndex(DbControl dc, Collection<Integer> rawBioAssays, ProgressReporter p) |
6168 |
12 Mar 21 |
nicklas |
724 |
{ |
6241 |
21 May 21 |
nicklas |
725 |
if (status == Status.DISABLED) return; |
7074 |
24 Mar 23 |
nicklas |
726 |
logger.info("Removing " + rawBioAssays.size() + " rawbioassays from index: " + name); |
6168 |
12 Mar 21 |
nicklas |
727 |
|
6168 |
12 Mar 21 |
nicklas |
728 |
IndexWriter writer = null; |
6168 |
12 Mar 21 |
nicklas |
729 |
try |
6168 |
12 Mar 21 |
nicklas |
730 |
{ |
6168 |
12 Mar 21 |
nicklas |
731 |
this.progress = new SimpleProgressReporter(p); |
6168 |
12 Mar 21 |
nicklas |
732 |
this.status = Status.REMOVING; |
6168 |
12 Mar 21 |
nicklas |
733 |
IndexWriterConfig config = new IndexWriterConfig(analyzer); |
6168 |
12 Mar 21 |
nicklas |
734 |
config.setOpenMode(OpenMode.CREATE_OR_APPEND); |
6545 |
21 Jan 22 |
nicklas |
735 |
writer = createIndexWriter(config); |
6168 |
12 Mar 21 |
nicklas |
736 |
|
6168 |
12 Mar 21 |
nicklas |
737 |
int numRba = rawBioAssays.size(); |
6168 |
12 Mar 21 |
nicklas |
738 |
int numRemoved = 0; |
6168 |
12 Mar 21 |
nicklas |
739 |
|
6168 |
12 Mar 21 |
nicklas |
740 |
for (int rbaId : rawBioAssays) |
6168 |
12 Mar 21 |
nicklas |
741 |
{ |
6168 |
12 Mar 21 |
nicklas |
// delete existing information about this raw bioassay id |
6541 |
17 Jan 22 |
nicklas |
// TODO -- maybe this should also be in the subclasses |
6168 |
12 Mar 21 |
nicklas |
744 |
writer.deleteDocuments(IntPoint.newExactQuery("rbaId", rbaId)); |
6168 |
12 Mar 21 |
nicklas |
745 |
writer.deleteDocuments(IntPoint.newExactQuery("mainId", rbaId)); |
6168 |
12 Mar 21 |
nicklas |
746 |
numRemoved++; |
6168 |
12 Mar 21 |
nicklas |
747 |
if (SLOW_MODE) Thread.sleep(5); |
6168 |
12 Mar 21 |
nicklas |
748 |
progress.display(100*numRemoved/numRba, "Removing... (" + numRemoved+ " of " + numRba+")"); |
6241 |
21 May 21 |
nicklas |
749 |
if (isClosing()) break; |
6168 |
12 Mar 21 |
nicklas |
750 |
} |
6168 |
12 Mar 21 |
nicklas |
751 |
|
6168 |
12 Mar 21 |
nicklas |
752 |
logger.debug("Removed " + numRemoved + " variants ("+name+")"); |
6112 |
02 Feb 21 |
nicklas |
753 |
logger.debug("Committing index to disk ("+name+")"); |
6164 |
05 Mar 21 |
nicklas |
754 |
progress.display(100, "Comitting changes..."); |
6112 |
02 Feb 21 |
nicklas |
755 |
writer.commit(); |
6164 |
05 Mar 21 |
nicklas |
756 |
|
7074 |
24 Mar 23 |
nicklas |
757 |
logger.info("Indexing complete, " +numRemoved + " rawbioassays removed"); |
6112 |
02 Feb 21 |
nicklas |
758 |
|
6545 |
21 Jan 22 |
nicklas |
759 |
reOpen(); |
6112 |
02 Feb 21 |
nicklas |
760 |
} |
6112 |
02 Feb 21 |
nicklas |
761 |
catch (Exception ex) |
6112 |
02 Feb 21 |
nicklas |
762 |
{ |
6556 |
28 Jan 22 |
nicklas |
763 |
setError(new RuntimeException("Remove failed: "+ex.getMessage(), ex)); |
6556 |
28 Jan 22 |
nicklas |
764 |
logger.error("Remove failed ("+name+")", ex); |
6112 |
02 Feb 21 |
nicklas |
765 |
if (writer != null) |
6112 |
02 Feb 21 |
nicklas |
766 |
{ |
6112 |
02 Feb 21 |
nicklas |
767 |
try |
6112 |
02 Feb 21 |
nicklas |
768 |
{ |
6112 |
02 Feb 21 |
nicklas |
769 |
writer.rollback(); |
6112 |
02 Feb 21 |
nicklas |
770 |
} |
6112 |
02 Feb 21 |
nicklas |
771 |
catch (Exception ex2) |
6112 |
02 Feb 21 |
nicklas |
772 |
{ |
6112 |
02 Feb 21 |
nicklas |
773 |
logger.warn("Exception during rollback", ex2); |
6112 |
02 Feb 21 |
nicklas |
774 |
} |
6112 |
02 Feb 21 |
nicklas |
775 |
} |
6112 |
02 Feb 21 |
nicklas |
776 |
} |
6112 |
02 Feb 21 |
nicklas |
777 |
finally |
6112 |
02 Feb 21 |
nicklas |
778 |
{ |
6112 |
02 Feb 21 |
nicklas |
779 |
FileUtil.close(writer); |
6164 |
05 Mar 21 |
nicklas |
780 |
this.status = Status.IDLE; |
6164 |
05 Mar 21 |
nicklas |
781 |
this.progress = null; |
6112 |
02 Feb 21 |
nicklas |
782 |
} |
6112 |
02 Feb 21 |
nicklas |
783 |
} |
6168 |
12 Mar 21 |
nicklas |
784 |
|
6167 |
09 Mar 21 |
nicklas |
785 |
/** |
6169 |
15 Mar 21 |
nicklas |
Delete the index and create a new empty index. |
6169 |
15 Mar 21 |
nicklas |
787 |
*/ |
6169 |
15 Mar 21 |
nicklas |
788 |
public void deleteIndex() |
6169 |
15 Mar 21 |
nicklas |
789 |
{ |
6241 |
21 May 21 |
nicklas |
790 |
if (status == Status.DISABLED) return; |
6169 |
15 Mar 21 |
nicklas |
791 |
logger.debug("Deleting index: " + name); |
6169 |
15 Mar 21 |
nicklas |
792 |
|
6169 |
15 Mar 21 |
nicklas |
793 |
Directory newDir = null; |
6169 |
15 Mar 21 |
nicklas |
// A directory that we need to get rid of at the end -- either the old index, or the rebuild-directory (if something failed) |
6169 |
15 Mar 21 |
nicklas |
795 |
java.io.File toDelete = null; |
6169 |
15 Mar 21 |
nicklas |
796 |
boolean failed = false; |
6169 |
15 Mar 21 |
nicklas |
797 |
try |
6169 |
15 Mar 21 |
nicklas |
798 |
{ |
6169 |
15 Mar 21 |
nicklas |
799 |
this.status = Status.REMOVING; |
6169 |
15 Mar 21 |
nicklas |
800 |
|
6169 |
15 Mar 21 |
nicklas |
// Create a new empty index |
6169 |
15 Mar 21 |
nicklas |
802 |
java.io.File newPath = getNewPath(pathPrefix); |
6169 |
15 Mar 21 |
nicklas |
803 |
toDelete = newPath; // If we fail after this, the newPath should be removed |
6169 |
15 Mar 21 |
nicklas |
804 |
newDir = createIndexIfNeeded(newPath); |
6169 |
15 Mar 21 |
nicklas |
805 |
FileUtil.close(newDir); |
6169 |
15 Mar 21 |
nicklas |
806 |
|
6169 |
15 Mar 21 |
nicklas |
807 |
toDelete = path; // The new index was created, it is now safe to delete the old one |
6169 |
15 Mar 21 |
nicklas |
808 |
rwLock.writeLock().lock(); |
6169 |
15 Mar 21 |
nicklas |
809 |
try |
6169 |
15 Mar 21 |
nicklas |
810 |
{ |
6169 |
15 Mar 21 |
nicklas |
811 |
logger.debug("Switching to new index ("+name+"): " + newDir); |
6169 |
15 Mar 21 |
nicklas |
812 |
FileUtil.close(reader); |
6169 |
15 Mar 21 |
nicklas |
813 |
FileUtil.close(directory); |
6169 |
15 Mar 21 |
nicklas |
814 |
|
6169 |
15 Mar 21 |
nicklas |
815 |
path = newPath; |
6169 |
15 Mar 21 |
nicklas |
816 |
directory = new NIOFSDirectory(path.toPath()); |
6541 |
17 Jan 22 |
nicklas |
817 |
reader = createIndexReader(directory); |
6541 |
17 Jan 22 |
nicklas |
818 |
searcher = createIndexSearcher(reader); |
6541 |
17 Jan 22 |
nicklas |
819 |
cache = createQueryCache(); |
6169 |
15 Mar 21 |
nicklas |
820 |
} |
6169 |
15 Mar 21 |
nicklas |
821 |
finally |
6169 |
15 Mar 21 |
nicklas |
822 |
{ |
6169 |
15 Mar 21 |
nicklas |
823 |
rwLock.writeLock().unlock(); |
6169 |
15 Mar 21 |
nicklas |
824 |
} |
6169 |
15 Mar 21 |
nicklas |
825 |
|
6169 |
15 Mar 21 |
nicklas |
826 |
} |
6169 |
15 Mar 21 |
nicklas |
827 |
catch (Exception ex) |
6169 |
15 Mar 21 |
nicklas |
828 |
{ |
6556 |
28 Jan 22 |
nicklas |
829 |
setError(new RuntimeException("Delete index failed: "+ex.getMessage(), ex)); |
6169 |
15 Mar 21 |
nicklas |
830 |
failed = true; |
6556 |
28 Jan 22 |
nicklas |
831 |
logger.error("Delete index failed ("+name+")", ex); |
6169 |
15 Mar 21 |
nicklas |
832 |
} |
6169 |
15 Mar 21 |
nicklas |
833 |
finally |
6169 |
15 Mar 21 |
nicklas |
834 |
{ |
6169 |
15 Mar 21 |
nicklas |
835 |
FileUtil.close(newDir); |
6169 |
15 Mar 21 |
nicklas |
836 |
if (toDelete != null) |
6169 |
15 Mar 21 |
nicklas |
837 |
{ |
6169 |
15 Mar 21 |
nicklas |
838 |
logger.debug("Deleting "+(failed?"new":"old")+" index ("+name+"): " + toDelete); |
6169 |
15 Mar 21 |
nicklas |
839 |
int numDeleted = FileUtil.deleteTempDirectory(toDelete); |
6169 |
15 Mar 21 |
nicklas |
840 |
if (toDelete.exists()) |
6169 |
15 Mar 21 |
nicklas |
841 |
{ |
6169 |
15 Mar 21 |
nicklas |
842 |
logger.warn((failed?"New":"Old")+" index could not be deleted ("+name+"): " + toDelete); |
6169 |
15 Mar 21 |
nicklas |
843 |
} |
6169 |
15 Mar 21 |
nicklas |
844 |
else |
6169 |
15 Mar 21 |
nicklas |
845 |
{ |
6169 |
15 Mar 21 |
nicklas |
846 |
logger.debug((failed?"New":"Old")+" index deleted ("+name+"): " + toDelete); |
6169 |
15 Mar 21 |
nicklas |
847 |
} |
6169 |
15 Mar 21 |
nicklas |
848 |
} |
6169 |
15 Mar 21 |
nicklas |
849 |
this.status = Status.IDLE; |
6169 |
15 Mar 21 |
nicklas |
850 |
} |
6169 |
15 Mar 21 |
nicklas |
851 |
|
6169 |
15 Mar 21 |
nicklas |
852 |
} |
6169 |
15 Mar 21 |
nicklas |
853 |
|
6544 |
19 Jan 22 |
nicklas |
854 |
/** |
6544 |
19 Jan 22 |
nicklas |
Perform a custom action on the index. The default |
6544 |
19 Jan 22 |
nicklas |
implementation do nothing. |
6544 |
19 Jan 22 |
nicklas |
@since 1.5 |
6544 |
19 Jan 22 |
nicklas |
858 |
*/ |
6544 |
19 Jan 22 |
nicklas |
859 |
public void doCustomAction(String customAction) |
6544 |
19 Jan 22 |
nicklas |
860 |
{} |
6169 |
15 Mar 21 |
nicklas |
861 |
|
6169 |
15 Mar 21 |
nicklas |
862 |
/** |
6168 |
12 Mar 21 |
nicklas |
Fully rebuild the index by re-indexing the VCF files |
6168 |
12 Mar 21 |
nicklas |
again. The old index will be usable while the rebuilding is |
6168 |
12 Mar 21 |
nicklas |
going on. This method should be called from a separate thread. |
6167 |
09 Mar 21 |
nicklas |
866 |
*/ |
6168 |
12 Mar 21 |
nicklas |
867 |
public void fullRebuild(DbControl dc, ProgressReporter p) |
6168 |
12 Mar 21 |
nicklas |
868 |
{ |
6241 |
21 May 21 |
nicklas |
869 |
if (status == Status.DISABLED) return; |
7074 |
24 Mar 23 |
nicklas |
870 |
logger.info("Full rebuild of index: " + name); |
6167 |
09 Mar 21 |
nicklas |
871 |
|
6167 |
09 Mar 21 |
nicklas |
872 |
IndexWriter writer = null; |
6168 |
12 Mar 21 |
nicklas |
873 |
Directory rebuildDir = null; |
6168 |
12 Mar 21 |
nicklas |
// A directory that we need to get rid of at the end -- either the old index, or the rebuild-directory (if something failed) |
6168 |
12 Mar 21 |
nicklas |
875 |
java.io.File toDelete = null; |
6168 |
12 Mar 21 |
nicklas |
876 |
boolean failed = false; |
6167 |
09 Mar 21 |
nicklas |
877 |
try |
6167 |
09 Mar 21 |
nicklas |
878 |
{ |
6167 |
09 Mar 21 |
nicklas |
879 |
this.progress = new SimpleProgressReporter(p); |
6168 |
12 Mar 21 |
nicklas |
880 |
this.status = Status.REBUILDING; |
6168 |
12 Mar 21 |
nicklas |
881 |
|
6168 |
12 Mar 21 |
nicklas |
882 |
ItemQuery<RawBioAssay> rbaQuery = RawBioAssay.getQuery(); |
6168 |
12 Mar 21 |
nicklas |
883 |
rbaQuery.setIncludes(Include.ALL); |
6168 |
12 Mar 21 |
nicklas |
884 |
rbaQuery.exclude(Include.REMOVED); |
6168 |
12 Mar 21 |
nicklas |
885 |
rbaQuery.restrict(new IdListRestriction(getIndexedRawBioAssays())); |
6168 |
12 Mar 21 |
nicklas |
//rbaQuery.setMaxResults(2000); |
6168 |
12 Mar 21 |
nicklas |
887 |
List<RawBioAssay> existing = rbaQuery.list(dc); |
6168 |
12 Mar 21 |
nicklas |
888 |
if (existing.size() == 0) return; |
6168 |
12 Mar 21 |
nicklas |
889 |
|
6168 |
12 Mar 21 |
nicklas |
890 |
java.io.File rebuildPath = getNewPath(pathPrefix); |
6168 |
12 Mar 21 |
nicklas |
891 |
toDelete = rebuildPath; |
6168 |
12 Mar 21 |
nicklas |
892 |
rebuildDir = new NIOFSDirectory(rebuildPath.toPath()); |
6167 |
09 Mar 21 |
nicklas |
893 |
IndexWriterConfig config = new IndexWriterConfig(analyzer); |
6167 |
09 Mar 21 |
nicklas |
894 |
config.setOpenMode(OpenMode.CREATE_OR_APPEND); |
6241 |
21 May 21 |
nicklas |
895 |
|
6241 |
21 May 21 |
nicklas |
// Create a multi-threader MergeScheduler with 1-8 threads |
6241 |
21 May 21 |
nicklas |
897 |
int numMergeThreads = getNumThreads(1, 8); |
6241 |
21 May 21 |
nicklas |
898 |
ConcurrentMergeScheduler mergeSceduler = new ConcurrentMergeScheduler(); |
6241 |
21 May 21 |
nicklas |
899 |
mergeSceduler.setMaxMergesAndThreads(numMergeThreads*2, numMergeThreads); |
6241 |
21 May 21 |
nicklas |
900 |
config.setMergeScheduler(mergeSceduler); |
6241 |
21 May 21 |
nicklas |
901 |
|
6241 |
21 May 21 |
nicklas |
// Create a MergePolicy for default 5GB segments |
6241 |
21 May 21 |
nicklas |
903 |
TieredMergePolicy mergePolicy = new TieredMergePolicy(); |
6241 |
21 May 21 |
nicklas |
904 |
config.setMergePolicy(mergePolicy); |
6241 |
21 May 21 |
nicklas |
905 |
|
6168 |
12 Mar 21 |
nicklas |
906 |
writer = new IndexWriter(rebuildDir, config); |
6241 |
21 May 21 |
nicklas |
907 |
|
6241 |
21 May 21 |
nicklas |
908 |
List<RawBioAssay> indexed = addToIndex(dc, existing, writer); |
6241 |
21 May 21 |
nicklas |
909 |
if (isClosing()) throw new RuntimeException("Index is closing"); |
6241 |
21 May 21 |
nicklas |
910 |
|
7074 |
24 Mar 23 |
nicklas |
911 |
logger.debug("Committing index to disk: "+name); |
6241 |
21 May 21 |
nicklas |
912 |
progress.display(98, "Comitting changes..."); |
6167 |
09 Mar 21 |
nicklas |
913 |
writer.commit(); |
7074 |
24 Mar 23 |
nicklas |
914 |
logger.debug("Commit completed: "+name); |
6241 |
21 May 21 |
nicklas |
915 |
progress.display(99, "Commit complete. Cleaning up..."); |
6241 |
21 May 21 |
nicklas |
916 |
|
7074 |
24 Mar 23 |
nicklas |
917 |
logger.debug("Closing writer: "+name); |
6241 |
21 May 21 |
nicklas |
// This can take several minutes after commit() as noted in the Lucene API docs |
6241 |
21 May 21 |
nicklas |
// It is bad if we call LuceneIndex.close() while waiting here since |
6241 |
21 May 21 |
nicklas |
// that will close the directory which leaves a lock file that needs to be removed |
6241 |
21 May 21 |
nicklas |
// before we can start-up again |
6241 |
21 May 21 |
nicklas |
// TODO - It would be nice if this could be solved, though I don't know how... |
6241 |
21 May 21 |
nicklas |
923 |
Pinger pinger = Application.newPinger(dc.getSessionControl()); |
6168 |
12 Mar 21 |
nicklas |
924 |
FileUtil.close(writer); |
6241 |
21 May 21 |
nicklas |
925 |
pinger.stop(); |
6241 |
21 May 21 |
nicklas |
926 |
writer = null; |
7074 |
24 Mar 23 |
nicklas |
927 |
logger.debug("Closed writer: "+name); |
6241 |
21 May 21 |
nicklas |
928 |
|
6168 |
12 Mar 21 |
nicklas |
929 |
FileUtil.close(rebuildDir); |
6168 |
12 Mar 21 |
nicklas |
930 |
toDelete = path; |
7074 |
24 Mar 23 |
nicklas |
931 |
logger.info("Index rebuilt successfully: "+name); |
6167 |
09 Mar 21 |
nicklas |
932 |
|
6167 |
09 Mar 21 |
nicklas |
933 |
rwLock.writeLock().lock(); |
6167 |
09 Mar 21 |
nicklas |
934 |
try |
6167 |
09 Mar 21 |
nicklas |
935 |
{ |
6168 |
12 Mar 21 |
nicklas |
936 |
logger.debug("Switching to new index ("+name+"): " + rebuildPath); |
6168 |
12 Mar 21 |
nicklas |
937 |
FileUtil.close(reader); |
6168 |
12 Mar 21 |
nicklas |
938 |
FileUtil.close(directory); |
6168 |
12 Mar 21 |
nicklas |
939 |
|
6168 |
12 Mar 21 |
nicklas |
940 |
path = rebuildPath; |
6168 |
12 Mar 21 |
nicklas |
941 |
directory = new NIOFSDirectory(path.toPath()); |
6541 |
17 Jan 22 |
nicklas |
942 |
reader = createIndexReader(directory); |
6541 |
17 Jan 22 |
nicklas |
943 |
searcher = createIndexSearcher(reader); |
6541 |
17 Jan 22 |
nicklas |
944 |
cache = createQueryCache(); |
6167 |
09 Mar 21 |
nicklas |
945 |
} |
6167 |
09 Mar 21 |
nicklas |
946 |
finally |
6167 |
09 Mar 21 |
nicklas |
947 |
{ |
6167 |
09 Mar 21 |
nicklas |
948 |
rwLock.writeLock().unlock(); |
6167 |
09 Mar 21 |
nicklas |
949 |
} |
6168 |
12 Mar 21 |
nicklas |
950 |
|
6167 |
09 Mar 21 |
nicklas |
951 |
} |
6167 |
09 Mar 21 |
nicklas |
952 |
catch (Exception ex) |
6167 |
09 Mar 21 |
nicklas |
953 |
{ |
6556 |
28 Jan 22 |
nicklas |
954 |
setError(new RuntimeException("Full rebuild failed: "+ex.getMessage(), ex)); |
6168 |
12 Mar 21 |
nicklas |
955 |
failed = true; |
7074 |
24 Mar 23 |
nicklas |
956 |
logger.error("Full rebuild failed: "+name, ex); |
6167 |
09 Mar 21 |
nicklas |
957 |
if (writer != null) |
6167 |
09 Mar 21 |
nicklas |
958 |
{ |
6167 |
09 Mar 21 |
nicklas |
959 |
try |
6167 |
09 Mar 21 |
nicklas |
960 |
{ |
6167 |
09 Mar 21 |
nicklas |
961 |
writer.rollback(); |
6167 |
09 Mar 21 |
nicklas |
962 |
} |
6167 |
09 Mar 21 |
nicklas |
963 |
catch (Exception ex2) |
6167 |
09 Mar 21 |
nicklas |
964 |
{ |
6167 |
09 Mar 21 |
nicklas |
965 |
logger.warn("Exception during rollback", ex2); |
6167 |
09 Mar 21 |
nicklas |
966 |
} |
6167 |
09 Mar 21 |
nicklas |
967 |
} |
6167 |
09 Mar 21 |
nicklas |
968 |
} |
6167 |
09 Mar 21 |
nicklas |
969 |
finally |
6167 |
09 Mar 21 |
nicklas |
970 |
{ |
6167 |
09 Mar 21 |
nicklas |
971 |
FileUtil.close(writer); |
6168 |
12 Mar 21 |
nicklas |
972 |
FileUtil.close(rebuildDir); |
6168 |
12 Mar 21 |
nicklas |
973 |
if (toDelete != null) |
6168 |
12 Mar 21 |
nicklas |
974 |
{ |
6168 |
12 Mar 21 |
nicklas |
975 |
logger.debug("Deleting "+(failed?"rebuild":"old")+" index ("+name+"): " + toDelete); |
6168 |
12 Mar 21 |
nicklas |
976 |
int numDeleted = FileUtil.deleteTempDirectory(toDelete); |
6168 |
12 Mar 21 |
nicklas |
977 |
if (toDelete.exists()) |
6168 |
12 Mar 21 |
nicklas |
978 |
{ |
6168 |
12 Mar 21 |
nicklas |
979 |
logger.warn((failed?"Rebuild":"Old")+" index could not be deleted ("+name+"): " + toDelete); |
6168 |
12 Mar 21 |
nicklas |
980 |
} |
6168 |
12 Mar 21 |
nicklas |
981 |
else |
6168 |
12 Mar 21 |
nicklas |
982 |
{ |
6168 |
12 Mar 21 |
nicklas |
983 |
logger.debug((failed?"Rebuild":"Old")+" index deleted ("+name+"): " + toDelete); |
6168 |
12 Mar 21 |
nicklas |
984 |
} |
6168 |
12 Mar 21 |
nicklas |
985 |
} |
6167 |
09 Mar 21 |
nicklas |
986 |
this.status = Status.IDLE; |
6167 |
09 Mar 21 |
nicklas |
987 |
this.progress = null; |
6167 |
09 Mar 21 |
nicklas |
988 |
} |
6167 |
09 Mar 21 |
nicklas |
989 |
} |
6167 |
09 Mar 21 |
nicklas |
990 |
|
6168 |
12 Mar 21 |
nicklas |
991 |
/** |
6168 |
12 Mar 21 |
nicklas |
Get progress information for current action. Return null |
6168 |
12 Mar 21 |
nicklas |
if there is no ongoing action. |
6168 |
12 Mar 21 |
nicklas |
994 |
*/ |
6164 |
05 Mar 21 |
nicklas |
995 |
public SimpleProgressReporter getProgress() |
6164 |
05 Mar 21 |
nicklas |
996 |
{ |
6164 |
05 Mar 21 |
nicklas |
997 |
return progress; |
6164 |
05 Mar 21 |
nicklas |
998 |
} |
6164 |
05 Mar 21 |
nicklas |
999 |
|
6544 |
19 Jan 22 |
nicklas |
1000 |
protected SimpleProgressReporter setProgressReporter(SimpleProgressReporter p) |
6544 |
19 Jan 22 |
nicklas |
1001 |
{ |
6544 |
19 Jan 22 |
nicklas |
1002 |
this.progress = p; |
6544 |
19 Jan 22 |
nicklas |
1003 |
return progress; |
6544 |
19 Jan 22 |
nicklas |
1004 |
} |
6544 |
19 Jan 22 |
nicklas |
1005 |
|
6132 |
16 Feb 21 |
nicklas |
1006 |
/** |
6546 |
24 Jan 22 |
nicklas |
Create a filter action implementation that is suitable for the index. |
6546 |
24 Jan 22 |
nicklas |
@since 1.5 |
6546 |
24 Jan 22 |
nicklas |
1009 |
*/ |
6546 |
24 Jan 22 |
nicklas |
1010 |
public abstract LuceneFilterAction<?> createFilterAction(InvokationContext<? super QueryFilterAction> context); |
6546 |
24 Jan 22 |
nicklas |
1011 |
|
6546 |
24 Jan 22 |
nicklas |
1012 |
/** |
6552 |
26 Jan 22 |
nicklas |
Create a column action implementation that is suitable for the index. |
6552 |
26 Jan 22 |
nicklas |
@since 1.5 |
6552 |
26 Jan 22 |
nicklas |
1015 |
*/ |
6552 |
26 Jan 22 |
nicklas |
1016 |
public abstract LuceneColumnAction<?, ?> createColumnAction(LuceneFilterAction<?> filter); |
6552 |
26 Jan 22 |
nicklas |
1017 |
|
6552 |
26 Jan 22 |
nicklas |
1018 |
/** |
6377 |
08 Sep 21 |
nicklas |
Get the number of variants indexed for the rawbioassay. Note that this count |
6377 |
08 Sep 21 |
nicklas |
doesn't include variants with 0/0 genotype. |
6241 |
21 May 21 |
nicklas |
@return Number of variants or null if the raw bioassay is not indexed or if the index is closed |
6132 |
16 Feb 21 |
nicklas |
1022 |
*/ |
6141 |
19 Feb 21 |
nicklas |
1023 |
public Integer countVariants(RawBioAssay rba) |
6112 |
02 Feb 21 |
nicklas |
1024 |
throws IOException |
6112 |
02 Feb 21 |
nicklas |
1025 |
{ |
6377 |
08 Sep 21 |
nicklas |
1026 |
return countGenotypes(rba, false); |
6377 |
08 Sep 21 |
nicklas |
1027 |
} |
6377 |
08 Sep 21 |
nicklas |
1028 |
|
6377 |
08 Sep 21 |
nicklas |
1029 |
|
6377 |
08 Sep 21 |
nicklas |
1030 |
/** |
6377 |
08 Sep 21 |
nicklas |
Get the number of genotypes indexed for the rawbioassay. This count |
6377 |
08 Sep 21 |
nicklas |
may include variants with 0/0 genotype. |
6377 |
08 Sep 21 |
nicklas |
@return Number of variants or null if the raw bioassay is not indexed or if the index is closed |
6377 |
08 Sep 21 |
nicklas |
@since 1.2 |
6377 |
08 Sep 21 |
nicklas |
1035 |
*/ |
6377 |
08 Sep 21 |
nicklas |
1036 |
public Integer countGenotypes(RawBioAssay rba) |
6377 |
08 Sep 21 |
nicklas |
1037 |
throws IOException |
6377 |
08 Sep 21 |
nicklas |
1038 |
{ |
6377 |
08 Sep 21 |
nicklas |
1039 |
return countGenotypes(rba, true); |
6377 |
08 Sep 21 |
nicklas |
1040 |
} |
6377 |
08 Sep 21 |
nicklas |
1041 |
|
6541 |
17 Jan 22 |
nicklas |
1042 |
protected Integer countGenotypes(RawBioAssay rba, boolean allGenotypes) |
6377 |
08 Sep 21 |
nicklas |
1043 |
throws IOException |
6377 |
08 Sep 21 |
nicklas |
1044 |
{ |
6241 |
21 May 21 |
nicklas |
1045 |
if (status == Status.DISABLED) return null; |
6241 |
21 May 21 |
nicklas |
1046 |
|
6541 |
17 Jan 22 |
nicklas |
1047 |
Query query = IntPoint.newExactQuery("mainId", rba.getId()); |
6141 |
19 Feb 21 |
nicklas |
1048 |
|
6550 |
25 Jan 22 |
nicklas |
1049 |
AllDocsCollector hits = new AllDocsCollector(1, null); |
6541 |
17 Jan 22 |
nicklas |
1050 |
getIndexSearcher().search(query, hits); |
6541 |
17 Jan 22 |
nicklas |
1051 |
if (hits.getTotalHits() == 0) return null; |
6541 |
17 Jan 22 |
nicklas |
1052 |
|
6541 |
17 Jan 22 |
nicklas |
1053 |
Document doc = hits.getDocuments().iterator().next(); |
6541 |
17 Jan 22 |
nicklas |
1054 |
return Values.getInt(doc.get(allGenotypes ? "numGenotypes" : "numVariants")); |
6112 |
02 Feb 21 |
nicklas |
1055 |
} |
6541 |
17 Jan 22 |
nicklas |
1056 |
|
6112 |
02 Feb 21 |
nicklas |
1057 |
|
6132 |
16 Feb 21 |
nicklas |
1058 |
/** |
6140 |
19 Feb 21 |
nicklas |
Get the point in time when extended waiting for more rawbioassays |
6140 |
19 Feb 21 |
nicklas |
should stop. |
6140 |
19 Feb 21 |
nicklas |
1061 |
*/ |
6140 |
19 Feb 21 |
nicklas |
1062 |
public long getExtendedWaitUntil(long maxExtendedWait) |
6140 |
19 Feb 21 |
nicklas |
1063 |
{ |
6140 |
19 Feb 21 |
nicklas |
1064 |
if (extendedWaitUntil == 0) |
6140 |
19 Feb 21 |
nicklas |
1065 |
{ |
6140 |
19 Feb 21 |
nicklas |
1066 |
extendedWaitUntil = System.currentTimeMillis()+maxExtendedWait; |
6140 |
19 Feb 21 |
nicklas |
1067 |
} |
6140 |
19 Feb 21 |
nicklas |
1068 |
return extendedWaitUntil; |
6140 |
19 Feb 21 |
nicklas |
1069 |
} |
6140 |
19 Feb 21 |
nicklas |
1070 |
|
6140 |
19 Feb 21 |
nicklas |
1071 |
/** |
6132 |
16 Feb 21 |
nicklas |
Get a thread-safe index reader. |
6132 |
16 Feb 21 |
nicklas |
1073 |
*/ |
6111 |
29 Jan 21 |
nicklas |
1074 |
public IndexReader getIndexReader() |
6111 |
29 Jan 21 |
nicklas |
1075 |
{ |
6241 |
21 May 21 |
nicklas |
1076 |
if (status == Status.DISABLED) return null; |
6135 |
17 Feb 21 |
nicklas |
1077 |
rwLock.readLock().lock(); |
6135 |
17 Feb 21 |
nicklas |
1078 |
try |
6135 |
17 Feb 21 |
nicklas |
1079 |
{ |
6135 |
17 Feb 21 |
nicklas |
1080 |
return reader; |
6135 |
17 Feb 21 |
nicklas |
1081 |
} |
6135 |
17 Feb 21 |
nicklas |
1082 |
finally |
6135 |
17 Feb 21 |
nicklas |
1083 |
{ |
6135 |
17 Feb 21 |
nicklas |
1084 |
rwLock.readLock().unlock(); |
6135 |
17 Feb 21 |
nicklas |
1085 |
} |
6111 |
29 Jan 21 |
nicklas |
1086 |
} |
6111 |
29 Jan 21 |
nicklas |
1087 |
|
6132 |
16 Feb 21 |
nicklas |
1088 |
/** |
6132 |
16 Feb 21 |
nicklas |
Get a thread-safe index searcher. |
6132 |
16 Feb 21 |
nicklas |
1090 |
*/ |
6111 |
29 Jan 21 |
nicklas |
1091 |
public IndexSearcher getIndexSearcher() |
6111 |
29 Jan 21 |
nicklas |
1092 |
{ |
6241 |
21 May 21 |
nicklas |
1093 |
if (status == Status.DISABLED) return null; |
6135 |
17 Feb 21 |
nicklas |
1094 |
rwLock.readLock().lock(); |
6135 |
17 Feb 21 |
nicklas |
1095 |
try |
6135 |
17 Feb 21 |
nicklas |
1096 |
{ |
6135 |
17 Feb 21 |
nicklas |
1097 |
return searcher; |
6135 |
17 Feb 21 |
nicklas |
1098 |
} |
6135 |
17 Feb 21 |
nicklas |
1099 |
finally |
6135 |
17 Feb 21 |
nicklas |
1100 |
{ |
6135 |
17 Feb 21 |
nicklas |
1101 |
rwLock.readLock().unlock(); |
6135 |
17 Feb 21 |
nicklas |
1102 |
} |
6111 |
29 Jan 21 |
nicklas |
1103 |
} |
6114 |
04 Feb 21 |
nicklas |
1104 |
|
6132 |
16 Feb 21 |
nicklas |
1105 |
/** |
6132 |
16 Feb 21 |
nicklas |
Get the analyzer that is used for indexing and query parsing. |
6132 |
16 Feb 21 |
nicklas |
1107 |
*/ |
6114 |
04 Feb 21 |
nicklas |
1108 |
public Analyzer getAnalyzer() |
6114 |
04 Feb 21 |
nicklas |
1109 |
{ |
6114 |
04 Feb 21 |
nicklas |
1110 |
return analyzer; |
6114 |
04 Feb 21 |
nicklas |
1111 |
} |
6112 |
02 Feb 21 |
nicklas |
1112 |
|
6132 |
16 Feb 21 |
nicklas |
1113 |
/** |
6132 |
16 Feb 21 |
nicklas |
Create a new parser for creating queries from strings. |
6132 |
16 Feb 21 |
nicklas |
1115 |
*/ |
6541 |
17 Jan 22 |
nicklas |
1116 |
public abstract QueryParser createQueryParser(); |
6114 |
04 Feb 21 |
nicklas |
1117 |
|
6137 |
18 Feb 21 |
nicklas |
1118 |
/** |
6137 |
18 Feb 21 |
nicklas |
Get the current query cache. |
6137 |
18 Feb 21 |
nicklas |
1120 |
*/ |
6137 |
18 Feb 21 |
nicklas |
1121 |
public QueryCache getQueryCache() |
6137 |
18 Feb 21 |
nicklas |
1122 |
{ |
6137 |
18 Feb 21 |
nicklas |
1123 |
return cache; |
6137 |
18 Feb 21 |
nicklas |
1124 |
} |
6137 |
18 Feb 21 |
nicklas |
1125 |
|
6163 |
05 Mar 21 |
nicklas |
1126 |
/** |
6163 |
05 Mar 21 |
nicklas |
Get the total number of variants in the index. |
6241 |
21 May 21 |
nicklas |
@return The number of variants or -1 if the index is not open |
6163 |
05 Mar 21 |
nicklas |
1129 |
*/ |
6541 |
17 Jan 22 |
nicklas |
1130 |
public abstract long getNumVariants() |
6541 |
17 Jan 22 |
nicklas |
1131 |
throws IOException; |
6163 |
05 Mar 21 |
nicklas |
1132 |
|
6163 |
05 Mar 21 |
nicklas |
1133 |
/** |
6163 |
05 Mar 21 |
nicklas |
Get the total number of indexed raw bioassays. |
6241 |
21 May 21 |
nicklas |
@return The number of raw bioassays or -1 if the index is not open |
6163 |
05 Mar 21 |
nicklas |
1136 |
*/ |
6163 |
05 Mar 21 |
nicklas |
1137 |
public int getNumRawBioAssays() |
6163 |
05 Mar 21 |
nicklas |
1138 |
throws IOException |
6163 |
05 Mar 21 |
nicklas |
1139 |
{ |
6241 |
21 May 21 |
nicklas |
1140 |
if (status == Status.DISABLED) return -1; |
6163 |
05 Mar 21 |
nicklas |
1141 |
Query query = IntPoint.newRangeQuery("mainId", 0, Integer.MAX_VALUE); |
6163 |
05 Mar 21 |
nicklas |
1142 |
return searcher.count(query); |
6163 |
05 Mar 21 |
nicklas |
1143 |
} |
6163 |
05 Mar 21 |
nicklas |
1144 |
|
6168 |
12 Mar 21 |
nicklas |
1145 |
/** |
6168 |
12 Mar 21 |
nicklas |
Get the id values of all indexed raw bioassays. |
6168 |
12 Mar 21 |
nicklas |
1147 |
*/ |
6168 |
12 Mar 21 |
nicklas |
1148 |
public Set<Integer> getIndexedRawBioAssays() |
6168 |
12 Mar 21 |
nicklas |
1149 |
throws IOException |
6168 |
12 Mar 21 |
nicklas |
1150 |
{ |
6241 |
21 May 21 |
nicklas |
1151 |
if (status == Status.DISABLED) return Collections.emptySet(); |
6168 |
12 Mar 21 |
nicklas |
1152 |
Query query = IntPoint.newRangeQuery("mainId", 0, Integer.MAX_VALUE); |
6553 |
27 Jan 22 |
nicklas |
1153 |
RawBioAssayIdCollector collector = new RawBioAssayIdCollector("mainId", -1); |
6246 |
24 May 21 |
nicklas |
1154 |
searcher.search(query, collector); |
6246 |
24 May 21 |
nicklas |
1155 |
return collector.getRbaIds(); |
6168 |
12 Mar 21 |
nicklas |
1156 |
} |
6163 |
05 Mar 21 |
nicklas |
1157 |
|
6163 |
05 Mar 21 |
nicklas |
1158 |
/** |
6173 |
18 Mar 21 |
nicklas |
Get the id of all raw bioassays where the specified variant has been found. |
6173 |
18 Mar 21 |
nicklas |
1160 |
*/ |
6551 |
26 Jan 22 |
nicklas |
1161 |
public abstract Set<Integer> getRawBioAssaysWithVariant(String chrom, long pos, String ref, String alt, String snpId) |
6551 |
26 Jan 22 |
nicklas |
1162 |
throws IOException; |
6173 |
18 Mar 21 |
nicklas |
1163 |
|
6173 |
18 Mar 21 |
nicklas |
1164 |
|
6173 |
18 Mar 21 |
nicklas |
1165 |
/** |
6246 |
24 May 21 |
nicklas |
Get the id values of non-existing raw bioassays. This can happen if |
6246 |
24 May 21 |
nicklas |
raw bioassays that have been indexed are deleted from the BASE server. |
6163 |
05 Mar 21 |
nicklas |
1168 |
*/ |
6163 |
05 Mar 21 |
nicklas |
1169 |
public Set<Integer> getNonExistingRawBioAssays(DbControl dc) |
6163 |
05 Mar 21 |
nicklas |
1170 |
throws IOException |
6163 |
05 Mar 21 |
nicklas |
1171 |
{ |
6241 |
21 May 21 |
nicklas |
1172 |
if (status == Status.DISABLED) return Collections.emptySet(); |
6246 |
24 May 21 |
nicklas |
1173 |
Set<Integer> rbaIds = getIndexedRawBioAssays(); |
6163 |
05 Mar 21 |
nicklas |
1174 |
|
6163 |
05 Mar 21 |
nicklas |
1175 |
ItemQuery<RawBioAssay> rbaQuery = RawBioAssay.getQuery(); |
6163 |
05 Mar 21 |
nicklas |
1176 |
rbaQuery.setIncludes(Include.ALL); |
6163 |
05 Mar 21 |
nicklas |
1177 |
rbaQuery.exclude(Include.REMOVED); |
6163 |
05 Mar 21 |
nicklas |
1178 |
rbaQuery.restrict(new IdListRestriction(rbaIds)); |
6167 |
09 Mar 21 |
nicklas |
//rbaQuery.setMaxResults(2000); |
6163 |
05 Mar 21 |
nicklas |
1180 |
|
6163 |
05 Mar 21 |
nicklas |
1181 |
List<Integer> existing = rbaQuery.idList(dc); |
6163 |
05 Mar 21 |
nicklas |
1182 |
rbaIds.removeAll(existing); |
6163 |
05 Mar 21 |
nicklas |
1183 |
return rbaIds; |
6163 |
05 Mar 21 |
nicklas |
1184 |
} |
6163 |
05 Mar 21 |
nicklas |
1185 |
|
6163 |
05 Mar 21 |
nicklas |
1186 |
/** |
6168 |
12 Mar 21 |
nicklas |
Get the path to the index directory. |
6168 |
12 Mar 21 |
nicklas |
1188 |
*/ |
6168 |
12 Mar 21 |
nicklas |
1189 |
public java.io.File getPath() |
6168 |
12 Mar 21 |
nicklas |
1190 |
{ |
6168 |
12 Mar 21 |
nicklas |
1191 |
return path; |
6168 |
12 Mar 21 |
nicklas |
1192 |
} |
6168 |
12 Mar 21 |
nicklas |
1193 |
|
6168 |
12 Mar 21 |
nicklas |
1194 |
/** |
6163 |
05 Mar 21 |
nicklas |
Get the size of the index database on disk. |
6163 |
05 Mar 21 |
nicklas |
1196 |
*/ |
6163 |
05 Mar 21 |
nicklas |
1197 |
public long getSizeOnDisk() |
6163 |
05 Mar 21 |
nicklas |
1198 |
{ |
6408 |
20 Sep 21 |
nicklas |
1199 |
if (path == null) return -1; |
6545 |
21 Jan 22 |
nicklas |
1200 |
return getSizeOfDir(path); |
6545 |
21 Jan 22 |
nicklas |
1201 |
} |
6545 |
21 Jan 22 |
nicklas |
1202 |
|
6545 |
21 Jan 22 |
nicklas |
1203 |
protected long getSizeOfDir(java.io.File dir) |
6545 |
21 Jan 22 |
nicklas |
1204 |
{ |
6163 |
05 Mar 21 |
nicklas |
1205 |
long size = 0; |
6545 |
21 Jan 22 |
nicklas |
1206 |
for (java.io.File f : dir.listFiles()) |
6163 |
05 Mar 21 |
nicklas |
1207 |
{ |
6163 |
05 Mar 21 |
nicklas |
1208 |
if (f.isFile()) size += f.length(); |
6163 |
05 Mar 21 |
nicklas |
1209 |
} |
6163 |
05 Mar 21 |
nicklas |
1210 |
return size; |
6163 |
05 Mar 21 |
nicklas |
1211 |
} |
6545 |
21 Jan 22 |
nicklas |
1212 |
|
6163 |
05 Mar 21 |
nicklas |
1213 |
|
6167 |
09 Mar 21 |
nicklas |
1214 |
/** |
6167 |
09 Mar 21 |
nicklas |
Current status of the index. |
6167 |
09 Mar 21 |
nicklas |
1216 |
*/ |
6164 |
05 Mar 21 |
nicklas |
1217 |
public static enum Status |
6164 |
05 Mar 21 |
nicklas |
1218 |
{ |
6164 |
05 Mar 21 |
nicklas |
1219 |
DISABLED, |
6544 |
19 Jan 22 |
nicklas |
1220 |
ENABLED, |
6164 |
05 Mar 21 |
nicklas |
1221 |
IDLE, |
6167 |
09 Mar 21 |
nicklas |
1222 |
INDEXING, |
6167 |
09 Mar 21 |
nicklas |
1223 |
REMOVING, |
6167 |
09 Mar 21 |
nicklas |
1224 |
REBUILDING; |
6164 |
05 Mar 21 |
nicklas |
1225 |
} |
6164 |
05 Mar 21 |
nicklas |
1226 |
|
6167 |
09 Mar 21 |
nicklas |
1227 |
/** |
6167 |
09 Mar 21 |
nicklas |
Action that should be performed by the next auto-update. |
6167 |
09 Mar 21 |
nicklas |
1229 |
*/ |
6167 |
09 Mar 21 |
nicklas |
1230 |
public static enum AutoUpdate |
6167 |
09 Mar 21 |
nicklas |
1231 |
{ |
6167 |
09 Mar 21 |
nicklas |
1232 |
DISABLED, |
6167 |
09 Mar 21 |
nicklas |
1233 |
DEFAULT, |
6167 |
09 Mar 21 |
nicklas |
1234 |
REMOVE_NON_EXISTING, |
6167 |
09 Mar 21 |
nicklas |
1235 |
ADD_TO_INDEX, |
6169 |
15 Mar 21 |
nicklas |
1236 |
FULL_REBUILD, |
6544 |
19 Jan 22 |
nicklas |
1237 |
DELETE, |
6544 |
19 Jan 22 |
nicklas |
1238 |
CUSTOM; |
6167 |
09 Mar 21 |
nicklas |
1239 |
} |
6167 |
09 Mar 21 |
nicklas |
1240 |
|
6168 |
12 Mar 21 |
nicklas |
1241 |
|
6168 |
12 Mar 21 |
nicklas |
1242 |
/** |
6168 |
12 Mar 21 |
nicklas |
Runnable implementation for starting a full index rebuild |
6168 |
12 Mar 21 |
nicklas |
in a new thread. |
6168 |
12 Mar 21 |
nicklas |
1245 |
*/ |
6168 |
12 Mar 21 |
nicklas |
1246 |
public static class FullRebuildRunnable |
6168 |
12 Mar 21 |
nicklas |
1247 |
implements Runnable |
6168 |
12 Mar 21 |
nicklas |
1248 |
{ |
6168 |
12 Mar 21 |
nicklas |
1249 |
|
6168 |
12 Mar 21 |
nicklas |
1250 |
private final LuceneIndex idx; |
6168 |
12 Mar 21 |
nicklas |
1251 |
|
6168 |
12 Mar 21 |
nicklas |
1252 |
public FullRebuildRunnable(LuceneIndex idx) |
6168 |
12 Mar 21 |
nicklas |
1253 |
{ |
6168 |
12 Mar 21 |
nicklas |
1254 |
this.idx = idx; |
6168 |
12 Mar 21 |
nicklas |
1255 |
} |
6168 |
12 Mar 21 |
nicklas |
1256 |
|
6168 |
12 Mar 21 |
nicklas |
1257 |
@Override |
6168 |
12 Mar 21 |
nicklas |
1258 |
public void run() |
6168 |
12 Mar 21 |
nicklas |
1259 |
{ |
6168 |
12 Mar 21 |
nicklas |
1260 |
DbControl dc = null; |
6168 |
12 Mar 21 |
nicklas |
1261 |
try |
6168 |
12 Mar 21 |
nicklas |
1262 |
{ |
6604 |
23 Feb 22 |
nicklas |
1263 |
dc = VarSearchService.getInstance().getRootSessionControl().newDbControl("Variant search: Full index rebuild"); |
6168 |
12 Mar 21 |
nicklas |
1264 |
idx.fullRebuild(dc, null); |
6528 |
20 Dec 21 |
nicklas |
1265 |
dc.commit(); |
6168 |
12 Mar 21 |
nicklas |
1266 |
} |
6168 |
12 Mar 21 |
nicklas |
1267 |
finally |
6168 |
12 Mar 21 |
nicklas |
1268 |
{ |
6241 |
21 May 21 |
nicklas |
1269 |
if (dc != null) dc.close(); |
6544 |
19 Jan 22 |
nicklas |
1270 |
idx.setNextAutoUpdateAction(AutoUpdate.DEFAULT, null); |
6241 |
21 May 21 |
nicklas |
1271 |
idx.releaseCloseWait(); |
6168 |
12 Mar 21 |
nicklas |
1272 |
} |
6168 |
12 Mar 21 |
nicklas |
1273 |
} |
6168 |
12 Mar 21 |
nicklas |
1274 |
} |
6168 |
12 Mar 21 |
nicklas |
1275 |
|
6168 |
12 Mar 21 |
nicklas |
1276 |
/** |
6168 |
12 Mar 21 |
nicklas |
Thread factory for indexing threads. Priority is set |
6168 |
12 Mar 21 |
nicklas |
to MIN_PRIORITY. |
6168 |
12 Mar 21 |
nicklas |
1279 |
*/ |
6168 |
12 Mar 21 |
nicklas |
1280 |
public static class IndexThreadFactory |
6168 |
12 Mar 21 |
nicklas |
1281 |
implements ThreadFactory |
6168 |
12 Mar 21 |
nicklas |
1282 |
{ |
6168 |
12 Mar 21 |
nicklas |
1283 |
|
6168 |
12 Mar 21 |
nicklas |
1284 |
private final ThreadGroup grp; |
6168 |
12 Mar 21 |
nicklas |
1285 |
private int nThreads; |
6168 |
12 Mar 21 |
nicklas |
1286 |
|
6168 |
12 Mar 21 |
nicklas |
1287 |
public IndexThreadFactory(String idxId) |
6168 |
12 Mar 21 |
nicklas |
1288 |
{ |
6168 |
12 Mar 21 |
nicklas |
1289 |
this.grp = new ThreadGroup("Varsearch-Index-"+idxId); |
6168 |
12 Mar 21 |
nicklas |
1290 |
} |
6168 |
12 Mar 21 |
nicklas |
1291 |
|
6168 |
12 Mar 21 |
nicklas |
1292 |
@Override |
6168 |
12 Mar 21 |
nicklas |
1293 |
public Thread newThread(Runnable r) |
6168 |
12 Mar 21 |
nicklas |
1294 |
{ |
6168 |
12 Mar 21 |
nicklas |
1295 |
nThreads++; |
6168 |
12 Mar 21 |
nicklas |
1296 |
Thread t = new Thread(grp, r, grp.getName()+"-"+nThreads); |
6168 |
12 Mar 21 |
nicklas |
1297 |
t.setPriority(Thread.MIN_PRIORITY); |
6168 |
12 Mar 21 |
nicklas |
1298 |
return t; |
6168 |
12 Mar 21 |
nicklas |
1299 |
} |
6168 |
12 Mar 21 |
nicklas |
1300 |
} |
6168 |
12 Mar 21 |
nicklas |
1301 |
|
6168 |
12 Mar 21 |
nicklas |
1302 |
/** |
6168 |
12 Mar 21 |
nicklas |
Thread factory for query threads. Priority is set |
6168 |
12 Mar 21 |
nicklas |
to NORM_PRIORITY. |
6168 |
12 Mar 21 |
nicklas |
1305 |
*/ |
6168 |
12 Mar 21 |
nicklas |
1306 |
public static class QueryThreadFactory |
6168 |
12 Mar 21 |
nicklas |
1307 |
implements ThreadFactory |
6168 |
12 Mar 21 |
nicklas |
1308 |
{ |
6168 |
12 Mar 21 |
nicklas |
1309 |
|
6168 |
12 Mar 21 |
nicklas |
1310 |
private final ThreadGroup grp; |
6168 |
12 Mar 21 |
nicklas |
1311 |
private int nThreads; |
6168 |
12 Mar 21 |
nicklas |
1312 |
|
6168 |
12 Mar 21 |
nicklas |
1313 |
public QueryThreadFactory(String idxId) |
6168 |
12 Mar 21 |
nicklas |
1314 |
{ |
6168 |
12 Mar 21 |
nicklas |
1315 |
this.grp = new ThreadGroup("Varsearch-Query-"+idxId); |
6168 |
12 Mar 21 |
nicklas |
1316 |
} |
6168 |
12 Mar 21 |
nicklas |
1317 |
|
6168 |
12 Mar 21 |
nicklas |
1318 |
@Override |
6168 |
12 Mar 21 |
nicklas |
1319 |
public Thread newThread(Runnable r) |
6168 |
12 Mar 21 |
nicklas |
1320 |
{ |
6168 |
12 Mar 21 |
nicklas |
1321 |
nThreads++; |
6168 |
12 Mar 21 |
nicklas |
1322 |
Thread t = new Thread(grp, r, grp.getName()+"-"+nThreads); |
6168 |
12 Mar 21 |
nicklas |
1323 |
t.setPriority(Thread.NORM_PRIORITY); |
6168 |
12 Mar 21 |
nicklas |
1324 |
return t; |
6168 |
12 Mar 21 |
nicklas |
1325 |
} |
6168 |
12 Mar 21 |
nicklas |
1326 |
} |
6168 |
12 Mar 21 |
nicklas |
1327 |
|
6111 |
29 Jan 21 |
nicklas |
1328 |
} |