4647 |
14 Dec 17 |
nicklas |
1 |
package net.sf.basedb.reggie.vcf; |
4647 |
14 Dec 17 |
nicklas |
2 |
|
4647 |
14 Dec 17 |
nicklas |
3 |
import java.io.IOException; |
4647 |
14 Dec 17 |
nicklas |
4 |
import java.io.InputStream; |
6594 |
21 Feb 22 |
nicklas |
5 |
import java.io.OutputStream; |
4818 |
22 May 18 |
nicklas |
6 |
import java.util.Collection; |
6589 |
21 Feb 22 |
nicklas |
7 |
import java.util.Collections; |
4794 |
07 May 18 |
nicklas |
8 |
import java.util.Date; |
4647 |
14 Dec 17 |
nicklas |
9 |
import java.util.HashMap; |
4649 |
20 Dec 17 |
nicklas |
10 |
import java.util.HashSet; |
4647 |
14 Dec 17 |
nicklas |
11 |
import java.util.List; |
4647 |
14 Dec 17 |
nicklas |
12 |
import java.util.Map; |
4649 |
20 Dec 17 |
nicklas |
13 |
import java.util.Set; |
7071 |
20 Mar 23 |
nicklas |
14 |
import java.util.concurrent.Callable; |
4647 |
14 Dec 17 |
nicklas |
15 |
import java.util.concurrent.CountDownLatch; |
7071 |
20 Mar 23 |
nicklas |
16 |
import java.util.concurrent.ExecutorCompletionService; |
7071 |
20 Mar 23 |
nicklas |
17 |
import java.util.concurrent.ExecutorService; |
7071 |
20 Mar 23 |
nicklas |
18 |
import java.util.concurrent.Executors; |
7071 |
20 Mar 23 |
nicklas |
19 |
import java.util.concurrent.Future; |
4647 |
14 Dec 17 |
nicklas |
20 |
import java.util.concurrent.TimeUnit; |
4647 |
14 Dec 17 |
nicklas |
21 |
import java.util.concurrent.TimeoutException; |
6597 |
22 Feb 22 |
nicklas |
22 |
import java.util.zip.GZIPInputStream; |
4647 |
14 Dec 17 |
nicklas |
23 |
|
4647 |
14 Dec 17 |
nicklas |
24 |
import net.sf.basedb.core.AnyToAny; |
7073 |
21 Mar 23 |
nicklas |
25 |
import net.sf.basedb.core.AnyToAny.FastLinkLoader; |
4647 |
14 Dec 17 |
nicklas |
26 |
import net.sf.basedb.core.DbControl; |
4647 |
14 Dec 17 |
nicklas |
27 |
import net.sf.basedb.core.DerivedBioAssay; |
4794 |
07 May 18 |
nicklas |
28 |
import net.sf.basedb.core.Extract; |
4647 |
14 Dec 17 |
nicklas |
29 |
import net.sf.basedb.core.File; |
4647 |
14 Dec 17 |
nicklas |
30 |
import net.sf.basedb.core.Item; |
4649 |
20 Dec 17 |
nicklas |
31 |
import net.sf.basedb.core.ItemList; |
4647 |
14 Dec 17 |
nicklas |
32 |
import net.sf.basedb.core.ItemQuery; |
6594 |
21 Feb 22 |
nicklas |
33 |
import net.sf.basedb.core.Location; |
4647 |
14 Dec 17 |
nicklas |
34 |
import net.sf.basedb.core.Nameable; |
4647 |
14 Dec 17 |
nicklas |
35 |
import net.sf.basedb.core.Sample; |
6589 |
21 Feb 22 |
nicklas |
36 |
import net.sf.basedb.core.SyncFilter.SourceItemTransform; |
4794 |
07 May 18 |
nicklas |
37 |
import net.sf.basedb.core.Type; |
4647 |
14 Dec 17 |
nicklas |
38 |
import net.sf.basedb.core.query.Annotations; |
4794 |
07 May 18 |
nicklas |
39 |
import net.sf.basedb.core.query.Expression; |
4647 |
14 Dec 17 |
nicklas |
40 |
import net.sf.basedb.core.query.Expressions; |
4647 |
14 Dec 17 |
nicklas |
41 |
import net.sf.basedb.core.query.Hql; |
6589 |
21 Feb 22 |
nicklas |
42 |
import net.sf.basedb.core.query.IdListRestriction; |
7217 |
30 May 23 |
nicklas |
43 |
import net.sf.basedb.core.query.Restriction; |
4647 |
14 Dec 17 |
nicklas |
44 |
import net.sf.basedb.core.query.Restrictions; |
7071 |
20 Mar 23 |
nicklas |
45 |
import net.sf.basedb.reggie.ReggieThreadFactory; |
4647 |
14 Dec 17 |
nicklas |
46 |
import net.sf.basedb.reggie.Reggie; |
4681 |
21 Feb 18 |
nicklas |
47 |
import net.sf.basedb.reggie.autoconfirm.CufflinksAutoConfirmer; |
4647 |
14 Dec 17 |
nicklas |
48 |
import net.sf.basedb.reggie.dao.Annotationtype; |
4649 |
20 Dec 17 |
nicklas |
49 |
import net.sf.basedb.reggie.dao.BiomaterialList; |
7001 |
20 Jan 23 |
nicklas |
50 |
import net.sf.basedb.reggie.dao.Pipeline; |
4647 |
14 Dec 17 |
nicklas |
51 |
import net.sf.basedb.reggie.dao.Subtype; |
4647 |
14 Dec 17 |
nicklas |
52 |
import net.sf.basedb.reggie.query.AnyToAnyRestriction; |
4794 |
07 May 18 |
nicklas |
53 |
import net.sf.basedb.reggie.vcf.GenoTypeMessage.Category; |
6588 |
18 Feb 22 |
nicklas |
54 |
import net.sf.basedb.reggie.vcf.SpecimenData.SpecimenType; |
4647 |
14 Dec 17 |
nicklas |
55 |
import net.sf.basedb.util.FileUtil; |
6594 |
21 Feb 22 |
nicklas |
56 |
import net.sf.basedb.util.InputStreamSplitter; |
4671 |
07 Feb 18 |
nicklas |
57 |
import net.sf.basedb.util.Values; |
6589 |
21 Feb 22 |
nicklas |
58 |
import net.sf.basedb.util.listable.ListableUtil; |
6589 |
21 Feb 22 |
nicklas |
59 |
import net.sf.basedb.util.listable.SourceItemTransformerFactory; |
6589 |
21 Feb 22 |
nicklas |
60 |
import net.sf.basedb.util.listable.TransformContext; |
4647 |
14 Dec 17 |
nicklas |
61 |
|
4647 |
14 Dec 17 |
nicklas |
62 |
/** |
4647 |
14 Dec 17 |
nicklas |
Helper class for performing genotype checks between pairs of |
4647 |
14 Dec 17 |
nicklas |
samples. |
4647 |
14 Dec 17 |
nicklas |
65 |
|
4647 |
14 Dec 17 |
nicklas |
Before it can be used existing alignments and VCF files |
4647 |
14 Dec 17 |
nicklas |
that should be used in the comparison should be preloaded |
4647 |
14 Dec 17 |
nicklas |
with {@link #preloadVcfForCheckedAlignments(DbControl)} and |
4647 |
14 Dec 17 |
nicklas |
existing specimen and patient information should be preloaded |
4647 |
14 Dec 17 |
nicklas |
with {@link #preloadSampleData(DbControl)}. The preloading |
4647 |
14 Dec 17 |
nicklas |
methods can (but doesn't have to) be executed in a separate |
4647 |
14 Dec 17 |
nicklas |
thread. See {@link GenoTypePreloaderRunnable}. |
4647 |
14 Dec 17 |
nicklas |
73 |
|
4647 |
14 Dec 17 |
nicklas |
When the preload has been complete additional alignments |
4647 |
14 Dec 17 |
nicklas |
can be check with {@link #check(DbControl, DerivedBioAssay)} |
4647 |
14 Dec 17 |
nicklas |
76 |
|
4647 |
14 Dec 17 |
nicklas |
@author nicklas |
4647 |
14 Dec 17 |
nicklas |
@since 4.14 |
4647 |
14 Dec 17 |
nicklas |
79 |
*/ |
4647 |
14 Dec 17 |
nicklas |
80 |
public class GenoTypeChecker |
4647 |
14 Dec 17 |
nicklas |
81 |
{ |
7251 |
08 Jun 23 |
nicklas |
82 |
/** |
7251 |
08 Jun 23 |
nicklas |
The max number of threads to use if the default setting is used. It is always |
7251 |
08 Jun 23 |
nicklas |
possible to use more threads by calling {@link #setPreloadThreads(int)}. |
7251 |
08 Jun 23 |
nicklas |
@since 4.48.2 |
7251 |
08 Jun 23 |
nicklas |
86 |
*/ |
7251 |
08 Jun 23 |
nicklas |
87 |
public static final int MAX_DEFAULT_THREADS = 8; |
4671 |
07 Feb 18 |
nicklas |
88 |
|
4647 |
14 Dec 17 |
nicklas |
89 |
private final VcfParser parser; |
4647 |
14 Dec 17 |
nicklas |
90 |
private final Map<DerivedBioAssay, VcfData> preloaded; |
4649 |
20 Dec 17 |
nicklas |
91 |
private final Map<String, SpecimenData> specimenData; |
4649 |
20 Dec 17 |
nicklas |
92 |
private final Set<Integer> flaggedAlignments; |
4647 |
14 Dec 17 |
nicklas |
93 |
|
4647 |
14 Dec 17 |
nicklas |
94 |
private final CountDownLatch countCompleted; |
4647 |
14 Dec 17 |
nicklas |
95 |
private final CountDownLatch preloadCompleted; |
6591 |
21 Feb 22 |
nicklas |
96 |
private RuntimeException preloadError; |
4647 |
14 Dec 17 |
nicklas |
97 |
|
7217 |
30 May 23 |
nicklas |
98 |
private Restriction tumorPreloadFilter; |
7217 |
30 May 23 |
nicklas |
99 |
private Restriction normalPreloadFilter; |
4647 |
14 Dec 17 |
nicklas |
100 |
private int totalVcfsToPreload; |
7199 |
25 May 23 |
nicklas |
101 |
private int tumorItemsToPreload; |
7199 |
25 May 23 |
nicklas |
102 |
private int normalItemsToPreload; |
4647 |
14 Dec 17 |
nicklas |
103 |
private int currentVcfPreloadCount; |
7071 |
20 Mar 23 |
nicklas |
104 |
private int preloadThreads; |
4647 |
14 Dec 17 |
nicklas |
105 |
|
4671 |
07 Feb 18 |
nicklas |
106 |
private int NUM_REF_GT = 213; |
4671 |
07 Feb 18 |
nicklas |
107 |
|
6452 |
22 Oct 21 |
nicklas |
108 |
private float minCommonGtPct = 90f; // We want at least 90% of the SNPs |
4681 |
21 Feb 18 |
nicklas |
109 |
private float maxMismatchForSamePatPct = 10f; // At most 10% mismatches for same patient |
4671 |
07 Feb 18 |
nicklas |
110 |
private float minMismatchForDiffPatPct = 20f; // 20% or more mismatches for different patients |
4671 |
07 Feb 18 |
nicklas |
111 |
private float maxHetPct = 65f; |
4647 |
14 Dec 17 |
nicklas |
112 |
|
4681 |
21 Feb 18 |
nicklas |
113 |
private float highMismatchPct = 35f; |
4681 |
21 Feb 18 |
nicklas |
114 |
private float highHomHomMismatchPct = 10f; |
4681 |
21 Feb 18 |
nicklas |
115 |
|
7199 |
25 May 23 |
nicklas |
116 |
public GenoTypeChecker() |
4647 |
14 Dec 17 |
nicklas |
117 |
{ |
4647 |
14 Dec 17 |
nicklas |
118 |
this.parser = new VcfParser(); |
4647 |
14 Dec 17 |
nicklas |
119 |
this.preloaded = new HashMap<>(); |
4649 |
20 Dec 17 |
nicklas |
120 |
this.specimenData = new HashMap<>(); |
4649 |
20 Dec 17 |
nicklas |
121 |
this.flaggedAlignments = new HashSet<>(); |
4647 |
14 Dec 17 |
nicklas |
122 |
this.countCompleted = new CountDownLatch(1); |
4647 |
14 Dec 17 |
nicklas |
123 |
this.preloadCompleted = new CountDownLatch(2); |
7251 |
08 Jun 23 |
nicklas |
124 |
this.preloadThreads = Math.max(1, Math.min(Runtime.getRuntime().availableProcessors() / 3, MAX_DEFAULT_THREADS)); |
4647 |
14 Dec 17 |
nicklas |
125 |
} |
4647 |
14 Dec 17 |
nicklas |
126 |
|
4647 |
14 Dec 17 |
nicklas |
127 |
/** |
6444 |
19 Oct 21 |
nicklas |
Load a VCF file to use as reference when comparing. |
6444 |
19 Oct 21 |
nicklas |
@since 4.34 |
6444 |
19 Oct 21 |
nicklas |
130 |
*/ |
6444 |
19 Oct 21 |
nicklas |
131 |
public void loadReference(InputStream in, String name) |
6444 |
19 Oct 21 |
nicklas |
132 |
throws IOException |
6444 |
19 Oct 21 |
nicklas |
133 |
{ |
6444 |
19 Oct 21 |
nicklas |
134 |
parser.parseRef(in, name); |
6444 |
19 Oct 21 |
nicklas |
135 |
NUM_REF_GT = parser.getSnpCount(); |
6444 |
19 Oct 21 |
nicklas |
136 |
} |
6444 |
19 Oct 21 |
nicklas |
137 |
|
6444 |
19 Oct 21 |
nicklas |
138 |
/** |
7071 |
20 Mar 23 |
nicklas |
Get the number of threads that are used for pre-loading |
7071 |
20 Mar 23 |
nicklas |
VCF files. The default is to use 1/3 of available CPU cores |
7071 |
20 Mar 23 |
nicklas |
but not more than 8. |
7071 |
20 Mar 23 |
nicklas |
@since 4.46 |
7071 |
20 Mar 23 |
nicklas |
143 |
*/ |
7071 |
20 Mar 23 |
nicklas |
144 |
public int getPreloadThreads() |
7071 |
20 Mar 23 |
nicklas |
145 |
{ |
7071 |
20 Mar 23 |
nicklas |
146 |
return preloadThreads; |
7071 |
20 Mar 23 |
nicklas |
147 |
} |
7071 |
20 Mar 23 |
nicklas |
148 |
|
7071 |
20 Mar 23 |
nicklas |
149 |
/** |
7071 |
20 Mar 23 |
nicklas |
Set the number of threads that should be used for pre-loading |
7071 |
20 Mar 23 |
nicklas |
VCF files. It must be between 1 and the number of available |
7071 |
20 Mar 23 |
nicklas |
CPU cores. |
7071 |
20 Mar 23 |
nicklas |
@since 4.46 |
7071 |
20 Mar 23 |
nicklas |
154 |
*/ |
7071 |
20 Mar 23 |
nicklas |
155 |
public void setPreloadThreads(int numThreads) |
7071 |
20 Mar 23 |
nicklas |
156 |
{ |
7071 |
20 Mar 23 |
nicklas |
157 |
this.preloadThreads = Math.max(1, Math.min(numThreads, Runtime.getRuntime().availableProcessors())); |
7071 |
20 Mar 23 |
nicklas |
158 |
} |
7071 |
20 Mar 23 |
nicklas |
159 |
|
7071 |
20 Mar 23 |
nicklas |
160 |
/** |
4647 |
14 Dec 17 |
nicklas |
Get the total number of VCF files that need to |
4647 |
14 Dec 17 |
nicklas |
be pre-loaded before the genotype checker can |
4647 |
14 Dec 17 |
nicklas |
be used. This value is only available some |
4647 |
14 Dec 17 |
nicklas |
time after calling one of the preload methods. |
4647 |
14 Dec 17 |
nicklas |
Use {@link #awaitCounts(int)} to wait for this |
4647 |
14 Dec 17 |
nicklas |
value to be available. |
4647 |
14 Dec 17 |
nicklas |
167 |
|
4647 |
14 Dec 17 |
nicklas |
168 |
*/ |
4647 |
14 Dec 17 |
nicklas |
169 |
public int getTotalVcfsToPreload() |
4647 |
14 Dec 17 |
nicklas |
170 |
{ |
4647 |
14 Dec 17 |
nicklas |
171 |
return totalVcfsToPreload; |
4647 |
14 Dec 17 |
nicklas |
172 |
} |
4647 |
14 Dec 17 |
nicklas |
173 |
|
4647 |
14 Dec 17 |
nicklas |
174 |
/** |
4647 |
14 Dec 17 |
nicklas |
Get the number of VCF that has currently been |
4647 |
14 Dec 17 |
nicklas |
pre-loaded. |
4647 |
14 Dec 17 |
nicklas |
177 |
*/ |
4647 |
14 Dec 17 |
nicklas |
178 |
public int getCurrentVcfPreloadCount() |
4647 |
14 Dec 17 |
nicklas |
179 |
{ |
4647 |
14 Dec 17 |
nicklas |
180 |
return currentVcfPreloadCount; |
4647 |
14 Dec 17 |
nicklas |
181 |
} |
4647 |
14 Dec 17 |
nicklas |
182 |
|
4647 |
14 Dec 17 |
nicklas |
183 |
/** |
7199 |
25 May 23 |
nicklas |
Get the number of tumor alignments to preload. |
7199 |
25 May 23 |
nicklas |
@since 4.48 |
6591 |
21 Feb 22 |
nicklas |
186 |
*/ |
7199 |
25 May 23 |
nicklas |
187 |
public int getNumTumorItemsToPreload() |
6591 |
21 Feb 22 |
nicklas |
188 |
{ |
7199 |
25 May 23 |
nicklas |
189 |
return tumorItemsToPreload; |
6591 |
21 Feb 22 |
nicklas |
190 |
} |
6591 |
21 Feb 22 |
nicklas |
191 |
|
6591 |
21 Feb 22 |
nicklas |
192 |
/** |
7199 |
25 May 23 |
nicklas |
Get the number of normal alignments/genotype calls to preload. |
7199 |
25 May 23 |
nicklas |
@since 4.48 |
6591 |
21 Feb 22 |
nicklas |
195 |
*/ |
7199 |
25 May 23 |
nicklas |
196 |
public int getNumNormalItemsToPreload() |
6591 |
21 Feb 22 |
nicklas |
197 |
{ |
7199 |
25 May 23 |
nicklas |
198 |
return normalItemsToPreload; |
6591 |
21 Feb 22 |
nicklas |
199 |
} |
4647 |
14 Dec 17 |
nicklas |
200 |
|
4647 |
14 Dec 17 |
nicklas |
201 |
/** |
7210 |
29 May 23 |
nicklas |
Create a new QueryBuilder instance for generating a query that return |
7210 |
29 May 23 |
nicklas |
derived bioassays. |
7199 |
25 May 23 |
nicklas |
@since 4.48 |
4647 |
14 Dec 17 |
nicklas |
205 |
*/ |
7210 |
29 May 23 |
nicklas |
206 |
public QueryBuilder queryBuilder(DbControl dc) |
4647 |
14 Dec 17 |
nicklas |
207 |
{ |
7210 |
29 May 23 |
nicklas |
208 |
return new QueryBuilder(dc, DerivedBioAssay.getQuery()); |
4821 |
23 May 18 |
nicklas |
209 |
} |
4821 |
23 May 18 |
nicklas |
210 |
|
6588 |
18 Feb 22 |
nicklas |
211 |
/** |
7199 |
25 May 23 |
nicklas |
Get a query that returns all flagged tumor items (alignments). The alignments must have |
4794 |
07 May 18 |
nicklas |
a 'qc_genotype.vcf' file, but the value of the QC_GENOTYPE_STATUS annotation |
4794 |
07 May 18 |
nicklas |
doesn't matter. Sort order is not specified. |
7199 |
25 May 23 |
nicklas |
@since 4.48 |
4794 |
07 May 18 |
nicklas |
216 |
*/ |
7199 |
25 May 23 |
nicklas |
217 |
public ItemQuery<DerivedBioAssay> getFlaggedTumorItems(DbControl dc) |
4794 |
07 May 18 |
nicklas |
218 |
{ |
4794 |
07 May 18 |
nicklas |
219 |
ItemList flaggedAlignments = BiomaterialList.FLAGGED_ALIGNMENT.get(dc); |
7217 |
30 May 23 |
nicklas |
220 |
return new QueryBuilder(dc, flaggedAlignments.getMembers()).tumors(null).query(); |
4794 |
07 May 18 |
nicklas |
221 |
} |
4794 |
07 May 18 |
nicklas |
222 |
|
4794 |
07 May 18 |
nicklas |
223 |
/** |
4975 |
21 Sep 18 |
nicklas |
Is the given derived bioassay flagged? |
7199 |
25 May 23 |
nicklas |
(=member of the "Flagged alignments" list). |
4975 |
21 Sep 18 |
nicklas |
@since 4.20 |
4975 |
21 Sep 18 |
nicklas |
227 |
*/ |
4975 |
21 Sep 18 |
nicklas |
228 |
public boolean isFlagged(DerivedBioAssay alignment) |
4975 |
21 Sep 18 |
nicklas |
229 |
{ |
4975 |
21 Sep 18 |
nicklas |
230 |
return flaggedAlignments.contains(alignment.getId()); |
4975 |
21 Sep 18 |
nicklas |
231 |
} |
4975 |
21 Sep 18 |
nicklas |
232 |
|
4975 |
21 Sep 18 |
nicklas |
233 |
/** |
4975 |
21 Sep 18 |
nicklas |
Remove the given derived bioassay from genotype comparisons. |
4975 |
21 Sep 18 |
nicklas |
Intended to be used as part of unflagging and resolving process |
4975 |
21 Sep 18 |
nicklas |
so that we can check if remaining flagged alignments still get |
4975 |
21 Sep 18 |
nicklas |
error or not. |
4975 |
21 Sep 18 |
nicklas |
@since 4.20 |
4975 |
21 Sep 18 |
nicklas |
239 |
*/ |
4975 |
21 Sep 18 |
nicklas |
240 |
public boolean remove(DerivedBioAssay alignment) |
4975 |
21 Sep 18 |
nicklas |
241 |
{ |
4975 |
21 Sep 18 |
nicklas |
242 |
flaggedAlignments.remove(alignment.getId()); |
4975 |
21 Sep 18 |
nicklas |
243 |
return preloaded.remove(alignment) != null; |
4975 |
21 Sep 18 |
nicklas |
244 |
} |
4975 |
21 Sep 18 |
nicklas |
245 |
|
4975 |
21 Sep 18 |
nicklas |
246 |
/** |
4818 |
22 May 18 |
nicklas |
Get a query that return all RNA items on the given QiaCube run. |
4794 |
07 May 18 |
nicklas |
The QiaCube run is specified by date and run number. |
4794 |
07 May 18 |
nicklas |
249 |
*/ |
4818 |
22 May 18 |
nicklas |
250 |
public ItemQuery<Extract> getRnaRelatedToQiacube(DbControl dc, Date qiacubeDate, int runNo) |
4794 |
07 May 18 |
nicklas |
251 |
{ |
4794 |
07 May 18 |
nicklas |
// Load all RNA items on the given QiaCube |
4794 |
07 May 18 |
nicklas |
253 |
ItemQuery<Extract> rnaQuery = Extract.getQuery(); |
4794 |
07 May 18 |
nicklas |
254 |
Subtype.RNA.addFilter(dc, rnaQuery); |
4794 |
07 May 18 |
nicklas |
255 |
rnaQuery.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT); |
4794 |
07 May 18 |
nicklas |
256 |
|
4794 |
07 May 18 |
nicklas |
// A date and run number is required |
4794 |
07 May 18 |
nicklas |
258 |
rnaQuery.join(Annotations.innerJoin(null, Annotationtype.QIACUBE_DATE.load(dc), "qcDate")); |
4794 |
07 May 18 |
nicklas |
259 |
rnaQuery.restrict(Restrictions.eq(Hql.alias("qcDate"), Expressions.parameter("qcDate", qiacubeDate, Type.DATE))); |
4794 |
07 May 18 |
nicklas |
260 |
|
4794 |
07 May 18 |
nicklas |
261 |
rnaQuery.join(Annotations.innerJoin(null, Annotationtype.QIACUBE_RUN_NO.load(dc), "qcRun")); |
4794 |
07 May 18 |
nicklas |
262 |
rnaQuery.restrict(Restrictions.eq(Hql.alias("qcRun"), Expressions.integer(runNo))); |
4794 |
07 May 18 |
nicklas |
263 |
|
4818 |
22 May 18 |
nicklas |
264 |
return rnaQuery; |
4818 |
22 May 18 |
nicklas |
265 |
} |
4818 |
22 May 18 |
nicklas |
266 |
|
4818 |
22 May 18 |
nicklas |
267 |
/** |
4818 |
22 May 18 |
nicklas |
Get a query that return all alignments related to the specified items. |
4818 |
22 May 18 |
nicklas |
269 |
|
4818 |
22 May 18 |
nicklas |
The query may optionally be configured to return an extended set of |
4818 |
22 May 18 |
nicklas |
alignments that include all other alignments for patients that has at least |
4818 |
22 May 18 |
nicklas |
one sample in the given list of items. |
4821 |
23 May 18 |
nicklas |
273 |
|
4821 |
23 May 18 |
nicklas |
The query may also be configured to include alignments that has been "Disabled" |
4821 |
23 May 18 |
nicklas |
for genotyping. |
7199 |
25 May 23 |
nicklas |
@since 4.48 |
4818 |
22 May 18 |
nicklas |
277 |
*/ |
7199 |
25 May 23 |
nicklas |
278 |
public ItemQuery<DerivedBioAssay> getTumorItemsRelatedTo(DbControl dc, Collection<? extends Nameable> items, boolean extended, boolean includeDisabled) |
4818 |
22 May 18 |
nicklas |
279 |
{ |
4818 |
22 May 18 |
nicklas |
// Get the names of all items (or all specimen related to the same patients if 'extended') |
4818 |
22 May 18 |
nicklas |
281 |
Set<String> names = getAllNames(items, extended); |
4818 |
22 May 18 |
nicklas |
282 |
|
4794 |
07 May 18 |
nicklas |
// Load all checked alignments derived from the given rna (or specimen) |
7217 |
30 May 23 |
nicklas |
284 |
ItemQuery<DerivedBioAssay> query = queryBuilder(dc).tumors(null).checked(includeDisabled).query(); |
5005 |
04 Oct 18 |
nicklas |
285 |
if (names.size() > 0) |
5005 |
04 Oct 18 |
nicklas |
286 |
{ |
5005 |
04 Oct 18 |
nicklas |
287 |
Expression[] nameFilter = createNameFilter(names); |
5005 |
04 Oct 18 |
nicklas |
288 |
query.restrict(Restrictions.like_in(Hql.property("name"), nameFilter)); |
5005 |
04 Oct 18 |
nicklas |
289 |
} |
5005 |
04 Oct 18 |
nicklas |
290 |
else |
5005 |
04 Oct 18 |
nicklas |
291 |
{ |
5005 |
04 Oct 18 |
nicklas |
292 |
query.restrict(Restrictions.eq(Expressions.integer(1), Expressions.integer(0))); |
5005 |
04 Oct 18 |
nicklas |
293 |
} |
4794 |
07 May 18 |
nicklas |
294 |
return query; |
4794 |
07 May 18 |
nicklas |
295 |
} |
4818 |
22 May 18 |
nicklas |
296 |
|
4818 |
22 May 18 |
nicklas |
297 |
|
4794 |
07 May 18 |
nicklas |
298 |
/** |
4818 |
22 May 18 |
nicklas |
Get a query that return all libraries on the specified library plate. |
4794 |
07 May 18 |
nicklas |
The plate is specified by name. |
4794 |
07 May 18 |
nicklas |
301 |
*/ |
4818 |
22 May 18 |
nicklas |
302 |
public ItemQuery<Extract> getLibrariesRelatedToLibPlate(DbControl dc, String libPlate) |
4794 |
07 May 18 |
nicklas |
303 |
{ |
4794 |
07 May 18 |
nicklas |
// Load all Library items on the given LibPlate |
4794 |
07 May 18 |
nicklas |
305 |
ItemQuery<Extract> libQuery = Extract.getQuery(); |
4794 |
07 May 18 |
nicklas |
306 |
Subtype.LIBRARY.addFilter(dc, libQuery); |
4794 |
07 May 18 |
nicklas |
307 |
libQuery.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT); |
4794 |
07 May 18 |
nicklas |
308 |
|
4794 |
07 May 18 |
nicklas |
// The name of the plate is required |
4794 |
07 May 18 |
nicklas |
310 |
libQuery.join(Hql.innerJoin("bioWell", "bw")); |
4794 |
07 May 18 |
nicklas |
311 |
libQuery.join(Hql.innerJoin("bw", "bioPlate", "bp")); |
4794 |
07 May 18 |
nicklas |
312 |
libQuery.restrict(Restrictions.eq(Hql.property("bp", "name"), Expressions.string(libPlate))); |
4794 |
07 May 18 |
nicklas |
313 |
|
4818 |
22 May 18 |
nicklas |
314 |
return libQuery; |
4794 |
07 May 18 |
nicklas |
315 |
} |
4818 |
22 May 18 |
nicklas |
316 |
|
4794 |
07 May 18 |
nicklas |
317 |
/** |
7199 |
25 May 23 |
nicklas |
Get a query that return all normal items (genotype calls) that are related to the given |
6589 |
21 Feb 22 |
nicklas |
list of patients. |
6589 |
21 Feb 22 |
nicklas |
320 |
|
6589 |
21 Feb 22 |
nicklas |
The query may optionally be configured to include genotype calls that |
6589 |
21 Feb 22 |
nicklas |
has been "Disabled" for genotyping. |
7199 |
25 May 23 |
nicklas |
@since 4.48 |
7199 |
25 May 23 |
nicklas |
324 |
*/ |
7199 |
25 May 23 |
nicklas |
325 |
public ItemQuery<DerivedBioAssay> getNormalItemsRelatedToPatients(DbControl dc, Set<Integer> patientIds, boolean includeDisabled) |
6589 |
21 Feb 22 |
nicklas |
326 |
{ |
6589 |
21 Feb 22 |
nicklas |
// Convert the list of patient id:s to a list of derived bioassay id:s |
6589 |
21 Feb 22 |
nicklas |
328 |
TransformContext ctx = new TransformContext(dc); |
6589 |
21 Feb 22 |
nicklas |
329 |
SourceItemTransformerFactory factory = ListableUtil.getTransformerFactory(Item.DERIVEDBIOASSAY); |
6589 |
21 Feb 22 |
nicklas |
330 |
Set<Integer> gtcIds = factory.create(Item.BIOSOURCE, SourceItemTransform.PARENT_TO_CHILD).transform(ctx, patientIds); |
7217 |
30 May 23 |
nicklas |
331 |
ItemQuery<DerivedBioAssay> query = queryBuilder(dc).checked(includeDisabled).normals(null).query(); |
6589 |
21 Feb 22 |
nicklas |
332 |
query.restrict(new IdListRestriction(gtcIds)); |
6589 |
21 Feb 22 |
nicklas |
333 |
return query; |
6589 |
21 Feb 22 |
nicklas |
334 |
} |
6589 |
21 Feb 22 |
nicklas |
335 |
|
6589 |
21 Feb 22 |
nicklas |
336 |
/** |
4816 |
21 May 18 |
nicklas |
Get a query that return all alignments with a high HET percentage (above 65%). |
4816 |
21 May 18 |
nicklas |
338 |
|
4816 |
21 May 18 |
nicklas |
The query may optionally be configured to return an extended set of |
4816 |
21 May 18 |
nicklas |
alignments that include all other alignments for patients that has at |
4816 |
21 May 18 |
nicklas |
least one sample in the original list. |
7199 |
25 May 23 |
nicklas |
@since 4.48 |
4816 |
21 May 18 |
nicklas |
343 |
*/ |
7199 |
25 May 23 |
nicklas |
344 |
public ItemQuery<DerivedBioAssay> getTumorItemsWithHighHet(DbControl dc, boolean extended) |
4816 |
21 May 18 |
nicklas |
345 |
{ |
7199 |
25 May 23 |
nicklas |
346 |
ItemQuery<DerivedBioAssay> flaggedQuery = getFlaggedTumorItems(dc); |
4816 |
21 May 18 |
nicklas |
347 |
flaggedQuery.join(Annotations.innerJoin(null, Annotationtype.QC_GENOTYPE_HET_PCT.load(dc), "het")); |
4816 |
21 May 18 |
nicklas |
348 |
flaggedQuery.restrict(Restrictions.gt(Hql.alias("het"), Expressions.aFloat(maxHetPct))); |
4816 |
21 May 18 |
nicklas |
349 |
|
4816 |
21 May 18 |
nicklas |
350 |
ItemQuery<DerivedBioAssay> query = flaggedQuery; |
4816 |
21 May 18 |
nicklas |
351 |
if (extended) |
4816 |
21 May 18 |
nicklas |
352 |
{ |
4816 |
21 May 18 |
nicklas |
// Get the names of all Specimen related to the same patients |
4816 |
21 May 18 |
nicklas |
354 |
Set<String> names = getAllNames(flaggedQuery.list(dc), extended); |
7217 |
30 May 23 |
nicklas |
355 |
query = queryBuilder(dc).checked().tumors(null).query(); |
5005 |
04 Oct 18 |
nicklas |
356 |
if (names.size() > 0) |
5005 |
04 Oct 18 |
nicklas |
357 |
{ |
5005 |
04 Oct 18 |
nicklas |
358 |
Expression[] nameFilter = createNameFilter(names); |
5005 |
04 Oct 18 |
nicklas |
359 |
query.restrict(Restrictions.like_in(Hql.property("name"), nameFilter)); |
5005 |
04 Oct 18 |
nicklas |
360 |
} |
5005 |
04 Oct 18 |
nicklas |
361 |
else |
5005 |
04 Oct 18 |
nicklas |
362 |
{ |
5005 |
04 Oct 18 |
nicklas |
363 |
query.restrict(Restrictions.eq(Expressions.integer(1), Expressions.integer(0))); |
5005 |
04 Oct 18 |
nicklas |
364 |
} |
4816 |
21 May 18 |
nicklas |
365 |
} |
4816 |
21 May 18 |
nicklas |
366 |
return query; |
4816 |
21 May 18 |
nicklas |
367 |
} |
4816 |
21 May 18 |
nicklas |
368 |
|
4816 |
21 May 18 |
nicklas |
369 |
|
4816 |
21 May 18 |
nicklas |
370 |
/** |
4794 |
07 May 18 |
nicklas |
Get the names of all items in the list or, if the 'extended' parameter |
4794 |
07 May 18 |
nicklas |
is true, the names of all specimen that have the same patient as at |
4794 |
07 May 18 |
nicklas |
least one item in the list. |
4794 |
07 May 18 |
nicklas |
374 |
|
4794 |
07 May 18 |
nicklas |
For example, if we have a list of RNA items, we simply return the names of |
4794 |
07 May 18 |
nicklas |
those RNA if 'extended'=false. |
4794 |
07 May 18 |
nicklas |
377 |
|
4794 |
07 May 18 |
nicklas |
If 'extended'=true we find the patient for each of the RNA items, |
4794 |
07 May 18 |
nicklas |
and then for each of the patient we find all specimen items and |
4794 |
07 May 18 |
nicklas |
return those items. |
4794 |
07 May 18 |
nicklas |
381 |
*/ |
4818 |
22 May 18 |
nicklas |
382 |
private Set<String> getAllNames(Collection<? extends Nameable> list, boolean extended) |
4794 |
07 May 18 |
nicklas |
383 |
{ |
4794 |
07 May 18 |
nicklas |
384 |
Set<String> names = new HashSet<>(); |
4821 |
23 May 18 |
nicklas |
385 |
if (extended) |
4794 |
07 May 18 |
nicklas |
386 |
{ |
4821 |
23 May 18 |
nicklas |
// We should find the patient name for each item |
4821 |
23 May 18 |
nicklas |
388 |
Set<String> patientNames = new HashSet<>(); |
4821 |
23 May 18 |
nicklas |
389 |
for (Nameable item : list) |
4794 |
07 May 18 |
nicklas |
390 |
{ |
4821 |
23 May 18 |
nicklas |
391 |
SpecimenData sp = getSpecimenData(item); |
4821 |
23 May 18 |
nicklas |
392 |
if (sp == null) |
4821 |
23 May 18 |
nicklas |
393 |
{ |
4821 |
23 May 18 |
nicklas |
// If there is no patient info (eg. external sample) we use the item as is |
4821 |
23 May 18 |
nicklas |
395 |
names.add(item.getName()); |
4821 |
23 May 18 |
nicklas |
396 |
} |
4821 |
23 May 18 |
nicklas |
397 |
else |
4821 |
23 May 18 |
nicklas |
398 |
{ |
4821 |
23 May 18 |
nicklas |
399 |
patientNames.add(sp.getPatientName()); |
4821 |
23 May 18 |
nicklas |
400 |
} |
4794 |
07 May 18 |
nicklas |
401 |
} |
4821 |
23 May 18 |
nicklas |
402 |
|
4821 |
23 May 18 |
nicklas |
403 |
if (patientNames.size() > 0) |
4794 |
07 May 18 |
nicklas |
404 |
{ |
4821 |
23 May 18 |
nicklas |
// Get all specimen names for the patients that we found earlier |
4821 |
23 May 18 |
nicklas |
406 |
for (SpecimenData sp : specimenData.values()) |
4794 |
07 May 18 |
nicklas |
407 |
{ |
4821 |
23 May 18 |
nicklas |
408 |
if (patientNames.contains(sp.getPatientName())) |
4821 |
23 May 18 |
nicklas |
409 |
{ |
4821 |
23 May 18 |
nicklas |
410 |
names.add(sp.getSpecimenName()); |
4821 |
23 May 18 |
nicklas |
411 |
} |
4794 |
07 May 18 |
nicklas |
412 |
} |
4794 |
07 May 18 |
nicklas |
413 |
} |
4794 |
07 May 18 |
nicklas |
414 |
} |
4821 |
23 May 18 |
nicklas |
415 |
else |
4821 |
23 May 18 |
nicklas |
416 |
{ |
4821 |
23 May 18 |
nicklas |
417 |
for (Nameable item : list) |
4821 |
23 May 18 |
nicklas |
418 |
{ |
4821 |
23 May 18 |
nicklas |
419 |
names.add(item.getName()); |
4821 |
23 May 18 |
nicklas |
420 |
} |
4821 |
23 May 18 |
nicklas |
421 |
} |
4794 |
07 May 18 |
nicklas |
422 |
|
4794 |
07 May 18 |
nicklas |
423 |
return names; |
4794 |
07 May 18 |
nicklas |
424 |
} |
4794 |
07 May 18 |
nicklas |
425 |
|
4794 |
07 May 18 |
nicklas |
426 |
/** |
4794 |
07 May 18 |
nicklas |
Convert a set of names into 'LIKE' expressions using the |
4794 |
07 May 18 |
nicklas |
names as a prefix. For example: 1234567 --> 1234567% |
4794 |
07 May 18 |
nicklas |
429 |
*/ |
4794 |
07 May 18 |
nicklas |
430 |
private Expression[] createNameFilter(Set<String> names) |
4794 |
07 May 18 |
nicklas |
431 |
{ |
4794 |
07 May 18 |
nicklas |
432 |
Expression[] nameFilter = new Expression[names.size()]; |
4794 |
07 May 18 |
nicklas |
433 |
int i = 0; |
4794 |
07 May 18 |
nicklas |
434 |
for (String name : names) |
4794 |
07 May 18 |
nicklas |
435 |
{ |
4794 |
07 May 18 |
nicklas |
436 |
nameFilter[i] = Expressions.string(name + "%"); |
4794 |
07 May 18 |
nicklas |
437 |
i++; |
4794 |
07 May 18 |
nicklas |
438 |
} |
4794 |
07 May 18 |
nicklas |
439 |
return nameFilter; |
4794 |
07 May 18 |
nicklas |
440 |
} |
7217 |
30 May 23 |
nicklas |
441 |
|
4794 |
07 May 18 |
nicklas |
442 |
/** |
7217 |
30 May 23 |
nicklas |
Set an extra filter that is used when pre-loading tumor items. |
7217 |
30 May 23 |
nicklas |
@since 4.48 |
7217 |
30 May 23 |
nicklas |
445 |
*/ |
7217 |
30 May 23 |
nicklas |
446 |
public void setTumorPreloadFilter(Restriction filter) |
7217 |
30 May 23 |
nicklas |
447 |
{ |
7217 |
30 May 23 |
nicklas |
448 |
this.tumorPreloadFilter = filter; |
7217 |
30 May 23 |
nicklas |
449 |
} |
7217 |
30 May 23 |
nicklas |
450 |
|
7217 |
30 May 23 |
nicklas |
451 |
/** |
7217 |
30 May 23 |
nicklas |
Set an extra filter that is used when pre-loading normal items. |
7217 |
30 May 23 |
nicklas |
@since 4.48 |
7217 |
30 May 23 |
nicklas |
454 |
*/ |
7217 |
30 May 23 |
nicklas |
455 |
public void setNormalPreloadFilter(Restriction filter) |
7217 |
30 May 23 |
nicklas |
456 |
{ |
7217 |
30 May 23 |
nicklas |
457 |
this.normalPreloadFilter = filter; |
7217 |
30 May 23 |
nicklas |
458 |
} |
7217 |
30 May 23 |
nicklas |
459 |
|
7217 |
30 May 23 |
nicklas |
460 |
/** |
7199 |
25 May 23 |
nicklas |
Pre-load VCF files from existing items that has |
4647 |
14 Dec 17 |
nicklas |
the {@link Annotationtype#QC_GENOTYPE_STATUS} set to |
4647 |
14 Dec 17 |
nicklas |
"Checked". This method should be called before comparing |
4647 |
14 Dec 17 |
nicklas |
new VCF files with the existing VCF files. It can be |
4647 |
14 Dec 17 |
nicklas |
called from a separate thread. Use {@link #preloadCompleted()} |
4647 |
14 Dec 17 |
nicklas |
to check if it is ok to start the comparison. |
7199 |
25 May 23 |
nicklas |
@since 4.48 |
4647 |
14 Dec 17 |
nicklas |
468 |
*/ |
7199 |
25 May 23 |
nicklas |
469 |
public void preloadVcfForCheckedItems(DbControl dc) |
4647 |
14 Dec 17 |
nicklas |
470 |
{ |
6591 |
21 Feb 22 |
nicklas |
471 |
this.preloadError = null; |
7071 |
20 Mar 23 |
nicklas |
472 |
ExecutorService threadPool = null; |
7071 |
20 Mar 23 |
nicklas |
473 |
ExecutorCompletionService<VcfLoaderCallable> executor = null; |
6591 |
21 Feb 22 |
nicklas |
474 |
try |
6588 |
18 Feb 22 |
nicklas |
475 |
{ |
7073 |
21 Mar 23 |
nicklas |
//long time = System.currentTimeMillis(); |
7217 |
30 May 23 |
nicklas |
477 |
ItemQuery<DerivedBioAssay> queryTumor = queryBuilder(dc).tumors(tumorPreloadFilter).checked().query(); |
7199 |
25 May 23 |
nicklas |
478 |
Set<Integer> allBioAssays = new HashSet<>(queryTumor.idList(dc)); |
7199 |
25 May 23 |
nicklas |
479 |
tumorItemsToPreload = allBioAssays.size(); |
6591 |
21 Feb 22 |
nicklas |
480 |
|
7199 |
25 May 23 |
nicklas |
481 |
ItemQuery<DerivedBioAssay> queryNormal = null; |
7217 |
30 May 23 |
nicklas |
482 |
queryNormal = queryBuilder(dc).normals(normalPreloadFilter).checked().query(); |
7199 |
25 May 23 |
nicklas |
483 |
allBioAssays.addAll(queryNormal.idList(dc)); |
7199 |
25 May 23 |
nicklas |
484 |
|
7073 |
21 Mar 23 |
nicklas |
485 |
totalVcfsToPreload = allBioAssays.size(); |
7199 |
25 May 23 |
nicklas |
486 |
normalItemsToPreload = totalVcfsToPreload-tumorItemsToPreload; |
6591 |
21 Feb 22 |
nicklas |
487 |
countCompleted.countDown(); |
7073 |
21 Mar 23 |
nicklas |
//System.out.println("Counted:" + (System.currentTimeMillis()-time)+" ms"); |
6591 |
21 Feb 22 |
nicklas |
489 |
|
6591 |
21 Feb 22 |
nicklas |
// This will increase total performance later since |
6591 |
21 Feb 22 |
nicklas |
// loadVcf() don't have to SELECT each file by id |
7073 |
21 Mar 23 |
nicklas |
492 |
FastLinkLoader<DerivedBioAssay, File> vcfLinkLoader = preloadFiles(dc, "qc_genotype.vcf", allBioAssays); |
7073 |
21 Mar 23 |
nicklas |
//System.out.println("Preloaded:" + (System.currentTimeMillis()-time)+" ms"); |
7071 |
20 Mar 23 |
nicklas |
494 |
threadPool = Executors.newFixedThreadPool(preloadThreads, new ReggieThreadFactory("VcfPreloaderThread")); |
7071 |
20 Mar 23 |
nicklas |
495 |
executor = new ExecutorCompletionService<>(threadPool); |
7071 |
20 Mar 23 |
nicklas |
// Submit jobs to the executor |
7071 |
20 Mar 23 |
nicklas |
497 |
int numSubmitted = 0; |
7199 |
25 May 23 |
nicklas |
498 |
List<DerivedBioAssay> tumors = queryTumor.list(dc); |
7199 |
25 May 23 |
nicklas |
499 |
for (int i = 0; i < tumors.size(); i++) |
6591 |
21 Feb 22 |
nicklas |
500 |
{ |
7199 |
25 May 23 |
nicklas |
501 |
DerivedBioAssay dba = tumors.get(i); |
7073 |
21 Mar 23 |
nicklas |
502 |
File vcfFile = vcfLinkLoader.getTo(dba); |
7071 |
20 Mar 23 |
nicklas |
503 |
executor.submit(new VcfLoaderCallable(this, dba, vcfFile, true)); |
7071 |
20 Mar 23 |
nicklas |
504 |
numSubmitted++; |
6588 |
18 Feb 22 |
nicklas |
505 |
} |
7199 |
25 May 23 |
nicklas |
506 |
List<DerivedBioAssay> normals = queryNormal.list(dc); |
7199 |
25 May 23 |
nicklas |
507 |
for (int i = 0; i < normals.size(); i++) |
6591 |
21 Feb 22 |
nicklas |
508 |
{ |
7199 |
25 May 23 |
nicklas |
509 |
DerivedBioAssay dba = normals.get(i); |
7199 |
25 May 23 |
nicklas |
510 |
File vcfFile = vcfLinkLoader.getTo(dba); |
7199 |
25 May 23 |
nicklas |
511 |
executor.submit(new VcfLoaderCallable(this, dba, vcfFile, true)); |
7199 |
25 May 23 |
nicklas |
512 |
numSubmitted++; |
6591 |
21 Feb 22 |
nicklas |
513 |
} |
7073 |
21 Mar 23 |
nicklas |
//System.out.println("Submitted:" + (System.currentTimeMillis()-time)+" ms"); |
7071 |
20 Mar 23 |
nicklas |
// Wait for the jobs to complete and collect the results |
7071 |
20 Mar 23 |
nicklas |
516 |
for (int i = 0; i < numSubmitted; i++) |
7071 |
20 Mar 23 |
nicklas |
517 |
{ |
7071 |
20 Mar 23 |
nicklas |
518 |
Future<VcfLoaderCallable> result = executor.take(); |
7071 |
20 Mar 23 |
nicklas |
519 |
VcfLoaderCallable caller = result.get(); |
7071 |
20 Mar 23 |
nicklas |
520 |
preloaded.put(caller.alignment, caller.vcfData); |
7071 |
20 Mar 23 |
nicklas |
521 |
currentVcfPreloadCount++; |
7071 |
20 Mar 23 |
nicklas |
522 |
} |
7073 |
21 Mar 23 |
nicklas |
//System.out.println("Loaded:" + (System.currentTimeMillis()-time)+" ms"); |
7071 |
20 Mar 23 |
nicklas |
524 |
|
6591 |
21 Feb 22 |
nicklas |
525 |
preloadCompleted.countDown(); |
6588 |
18 Feb 22 |
nicklas |
526 |
} |
6591 |
21 Feb 22 |
nicklas |
527 |
catch (RuntimeException ex) |
6591 |
21 Feb 22 |
nicklas |
528 |
{ |
6591 |
21 Feb 22 |
nicklas |
529 |
preloadError = ex; |
6591 |
21 Feb 22 |
nicklas |
530 |
throw ex; |
6591 |
21 Feb 22 |
nicklas |
531 |
} |
7071 |
20 Mar 23 |
nicklas |
532 |
catch (Exception ex) |
7071 |
20 Mar 23 |
nicklas |
533 |
{ |
7071 |
20 Mar 23 |
nicklas |
534 |
preloadError = new RuntimeException(ex); |
7071 |
20 Mar 23 |
nicklas |
535 |
throw preloadError; |
7071 |
20 Mar 23 |
nicklas |
536 |
} |
7071 |
20 Mar 23 |
nicklas |
537 |
finally |
7071 |
20 Mar 23 |
nicklas |
538 |
{ |
7071 |
20 Mar 23 |
nicklas |
539 |
try |
7071 |
20 Mar 23 |
nicklas |
540 |
{ |
7071 |
20 Mar 23 |
nicklas |
// This will send an interrupt to all running threads |
7071 |
20 Mar 23 |
nicklas |
// There should be none if everything went ok, but if one |
7071 |
20 Mar 23 |
nicklas |
// thread fails, Future.get() will throw an exception and we |
7071 |
20 Mar 23 |
nicklas |
// will ask the other threads to abort. We give them 10 seconds... |
7071 |
20 Mar 23 |
nicklas |
545 |
if (threadPool != null) |
7071 |
20 Mar 23 |
nicklas |
546 |
{ |
7071 |
20 Mar 23 |
nicklas |
547 |
threadPool.shutdownNow(); |
7071 |
20 Mar 23 |
nicklas |
548 |
threadPool.awaitTermination(10, TimeUnit.SECONDS); |
7071 |
20 Mar 23 |
nicklas |
549 |
} |
7071 |
20 Mar 23 |
nicklas |
550 |
} |
7071 |
20 Mar 23 |
nicklas |
551 |
catch (InterruptedException ex) |
7071 |
20 Mar 23 |
nicklas |
552 |
{} |
7071 |
20 Mar 23 |
nicklas |
553 |
} |
4647 |
14 Dec 17 |
nicklas |
554 |
} |
4647 |
14 Dec 17 |
nicklas |
555 |
|
4647 |
14 Dec 17 |
nicklas |
556 |
/** |
4817 |
21 May 18 |
nicklas |
Pre-load VCF files from existing alignments that are |
4817 |
21 May 18 |
nicklas |
members of the {@link BiomaterialList#FLAGGED_ALIGNMENT} |
4817 |
21 May 18 |
nicklas |
list. This method can be used as an alternative to |
4817 |
21 May 18 |
nicklas |
{@link #preloadVcfForCheckedAlignments(DbControl)} when |
4817 |
21 May 18 |
nicklas |
we are mainly interested in the flagged alignments. |
6589 |
21 Feb 22 |
nicklas |
@return A list with the flagged alignments |
7199 |
25 May 23 |
nicklas |
@since 4.48 |
4647 |
14 Dec 17 |
nicklas |
564 |
*/ |
7199 |
25 May 23 |
nicklas |
565 |
public List<DerivedBioAssay> preloadVcfForFlaggedTumorItems(DbControl dc) |
4794 |
07 May 18 |
nicklas |
566 |
{ |
6591 |
21 Feb 22 |
nicklas |
567 |
this.preloadError = null; |
6591 |
21 Feb 22 |
nicklas |
568 |
try |
4794 |
07 May 18 |
nicklas |
569 |
{ |
7199 |
25 May 23 |
nicklas |
570 |
ItemQuery<DerivedBioAssay> query = getFlaggedTumorItems(dc); |
7073 |
21 Mar 23 |
nicklas |
571 |
List<Integer> idList = query.idList(dc); |
7073 |
21 Mar 23 |
nicklas |
572 |
totalVcfsToPreload = idList.size(); |
6591 |
21 Feb 22 |
nicklas |
573 |
countCompleted.countDown(); |
6591 |
21 Feb 22 |
nicklas |
574 |
|
6591 |
21 Feb 22 |
nicklas |
// This will increase total performance later since |
6591 |
21 Feb 22 |
nicklas |
// loadVcf() don't have to SELECT each file by id |
7073 |
21 Mar 23 |
nicklas |
577 |
FastLinkLoader<DerivedBioAssay, File> vcfLinkLoader = preloadFiles(dc, "qc_genotype.vcf", idList); |
6591 |
21 Feb 22 |
nicklas |
578 |
|
6591 |
21 Feb 22 |
nicklas |
579 |
List<DerivedBioAssay> alignments = query.list(dc); |
6591 |
21 Feb 22 |
nicklas |
580 |
for (int i = 0; i < alignments.size(); i++) |
6591 |
21 Feb 22 |
nicklas |
581 |
{ |
6591 |
21 Feb 22 |
nicklas |
582 |
DerivedBioAssay dba = alignments.get(i); |
7073 |
21 Mar 23 |
nicklas |
583 |
File vcfFile = vcfLinkLoader.getTo(dba); |
7073 |
21 Mar 23 |
nicklas |
584 |
preloaded.put(dba, loadVcf(dc, dba, vcfFile, true)); |
6591 |
21 Feb 22 |
nicklas |
585 |
currentVcfPreloadCount++; |
6591 |
21 Feb 22 |
nicklas |
586 |
} |
6591 |
21 Feb 22 |
nicklas |
587 |
|
6591 |
21 Feb 22 |
nicklas |
588 |
preloadCompleted.countDown(); |
6591 |
21 Feb 22 |
nicklas |
589 |
return alignments; |
4794 |
07 May 18 |
nicklas |
590 |
} |
6591 |
21 Feb 22 |
nicklas |
591 |
catch (RuntimeException ex) |
6591 |
21 Feb 22 |
nicklas |
592 |
{ |
6591 |
21 Feb 22 |
nicklas |
593 |
preloadError = ex; |
6591 |
21 Feb 22 |
nicklas |
594 |
throw ex; |
6591 |
21 Feb 22 |
nicklas |
595 |
} |
4794 |
07 May 18 |
nicklas |
596 |
} |
4647 |
14 Dec 17 |
nicklas |
597 |
|
6589 |
21 Feb 22 |
nicklas |
598 |
/** |
6589 |
21 Feb 22 |
nicklas |
Pre-load VCF files from existing genotype calls that are |
6589 |
21 Feb 22 |
nicklas |
related to the currently pre-loaded alignments and an |
6589 |
21 Feb 22 |
nicklas |
optional list with additional patients. |
6589 |
21 Feb 22 |
nicklas |
602 |
|
6589 |
21 Feb 22 |
nicklas |
@return A list with the genotype calls |
6589 |
21 Feb 22 |
nicklas |
@since 4.37 |
6589 |
21 Feb 22 |
nicklas |
605 |
*/ |
7199 |
25 May 23 |
nicklas |
606 |
public List<DerivedBioAssay> preloadRelatedNormalItems(DbControl dc, Set<Integer> morePatients) |
6589 |
21 Feb 22 |
nicklas |
607 |
{ |
6589 |
21 Feb 22 |
nicklas |
608 |
if (preloaded.isEmpty()) return Collections.emptyList(); |
6589 |
21 Feb 22 |
nicklas |
609 |
Set<Integer> patients = new HashSet<>(); |
6589 |
21 Feb 22 |
nicklas |
610 |
if (morePatients != null) patients.addAll(morePatients); |
6589 |
21 Feb 22 |
nicklas |
611 |
for (DerivedBioAssay alignment : preloaded.keySet()) |
6589 |
21 Feb 22 |
nicklas |
612 |
{ |
6589 |
21 Feb 22 |
nicklas |
613 |
SpecimenData sp = getSpecimenData(alignment); |
6589 |
21 Feb 22 |
nicklas |
614 |
if (sp != null) patients.add(sp.getPatientId()); |
6589 |
21 Feb 22 |
nicklas |
615 |
} |
6589 |
21 Feb 22 |
nicklas |
616 |
if (patients.isEmpty()) return Collections.emptyList(); |
6589 |
21 Feb 22 |
nicklas |
617 |
|
7199 |
25 May 23 |
nicklas |
618 |
ItemQuery<DerivedBioAssay> query = getNormalItemsRelatedToPatients(dc, patients, false); |
7199 |
25 May 23 |
nicklas |
619 |
List<DerivedBioAssay> normals = query.list(dc); |
7199 |
25 May 23 |
nicklas |
620 |
for (DerivedBioAssay dba : normals) |
6589 |
21 Feb 22 |
nicklas |
621 |
{ |
7211 |
29 May 23 |
nicklas |
622 |
preloadItem(dc, dba); |
6589 |
21 Feb 22 |
nicklas |
623 |
} |
7199 |
25 May 23 |
nicklas |
624 |
return normals; |
6589 |
21 Feb 22 |
nicklas |
625 |
} |
6589 |
21 Feb 22 |
nicklas |
626 |
|
6588 |
18 Feb 22 |
nicklas |
627 |
public void preloadSpecimenData(DbControl dc, boolean includeBlood) |
4647 |
14 Dec 17 |
nicklas |
628 |
{ |
6591 |
21 Feb 22 |
nicklas |
629 |
this.preloadError = null; |
6591 |
21 Feb 22 |
nicklas |
630 |
try |
4647 |
14 Dec 17 |
nicklas |
631 |
{ |
6591 |
21 Feb 22 |
nicklas |
632 |
ItemQuery<Sample> query = Sample.getQuery(); |
6591 |
21 Feb 22 |
nicklas |
633 |
Subtype.SPECIMEN.addFilter(dc, query); |
6588 |
18 Feb 22 |
nicklas |
634 |
query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT); |
6591 |
21 Feb 22 |
nicklas |
635 |
query.join(Hql.innerJoin(null, "parent", "cse", true)); |
6588 |
18 Feb 22 |
nicklas |
636 |
query.join(Hql.innerJoin(null, "creationEvent", "ce1", true)); |
6591 |
21 Feb 22 |
nicklas |
637 |
query.join(Hql.innerJoin("cse", "parent", "pat", true)); |
6591 |
21 Feb 22 |
nicklas |
638 |
query.join(Hql.innerJoin("cse", "creationEvent", "ce2", true)); |
6591 |
21 Feb 22 |
nicklas |
639 |
|
6591 |
21 Feb 22 |
nicklas |
640 |
for (Sample specimen : query.list(dc)) |
6588 |
18 Feb 22 |
nicklas |
641 |
{ |
6591 |
21 Feb 22 |
nicklas |
642 |
specimenData.put(specimen.getName(), new SpecimenData(SpecimenType.SPECIMEN, specimen)); |
6588 |
18 Feb 22 |
nicklas |
643 |
} |
6591 |
21 Feb 22 |
nicklas |
644 |
|
6591 |
21 Feb 22 |
nicklas |
645 |
if (includeBlood) |
6591 |
21 Feb 22 |
nicklas |
646 |
{ |
6591 |
21 Feb 22 |
nicklas |
647 |
query = Sample.getQuery(); |
6591 |
21 Feb 22 |
nicklas |
648 |
Subtype.BLOOD.addFilter(dc, query); |
6591 |
21 Feb 22 |
nicklas |
649 |
query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT); |
6591 |
21 Feb 22 |
nicklas |
650 |
query.join(Hql.innerJoin(null, "parent", "pat", true)); |
6591 |
21 Feb 22 |
nicklas |
651 |
query.join(Hql.innerJoin(null, "creationEvent", "ce1", true)); |
6591 |
21 Feb 22 |
nicklas |
652 |
query.join(Annotations.leftJoin(Annotationtype.BLOOD_SAMPLE.get(dc), "bs")); |
6591 |
21 Feb 22 |
nicklas |
653 |
query.restrict(Restrictions.or( |
6591 |
21 Feb 22 |
nicklas |
654 |
Restrictions.eq(Hql.alias("bs"), null), |
6591 |
21 Feb 22 |
nicklas |
655 |
Restrictions.eq(Hql.alias("bs"), Expressions.string("PreOp")), |
6591 |
21 Feb 22 |
nicklas |
656 |
Restrictions.eq(Hql.alias("bs"), Expressions.string("PreNeo")) |
6591 |
21 Feb 22 |
nicklas |
657 |
)); |
6591 |
21 Feb 22 |
nicklas |
658 |
for (Sample blood : query.list(dc)) |
6591 |
21 Feb 22 |
nicklas |
659 |
{ |
6591 |
21 Feb 22 |
nicklas |
660 |
specimenData.put(blood.getName(), new SpecimenData(SpecimenType.BLOOD, blood)); |
6591 |
21 Feb 22 |
nicklas |
661 |
} |
6591 |
21 Feb 22 |
nicklas |
662 |
} |
6591 |
21 Feb 22 |
nicklas |
663 |
|
6591 |
21 Feb 22 |
nicklas |
664 |
ItemList flagged = BiomaterialList.FLAGGED_ALIGNMENT.get(dc); |
6591 |
21 Feb 22 |
nicklas |
665 |
ItemQuery<DerivedBioAssay> flaggedQuery = flagged.getMembers(); |
6591 |
21 Feb 22 |
nicklas |
666 |
flaggedQuery.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT); |
7199 |
25 May 23 |
nicklas |
667 |
Pipeline.RNASEQ_HISAT_STRINGTIE.addFilter(dc, flaggedQuery); |
6591 |
21 Feb 22 |
nicklas |
668 |
flaggedAlignments.addAll(flaggedQuery.idList(dc)); |
6588 |
18 Feb 22 |
nicklas |
669 |
} |
6591 |
21 Feb 22 |
nicklas |
670 |
catch (RuntimeException ex) |
6591 |
21 Feb 22 |
nicklas |
671 |
{ |
6591 |
21 Feb 22 |
nicklas |
672 |
preloadError = ex; |
6591 |
21 Feb 22 |
nicklas |
673 |
throw ex; |
6591 |
21 Feb 22 |
nicklas |
674 |
} |
6591 |
21 Feb 22 |
nicklas |
675 |
finally |
6591 |
21 Feb 22 |
nicklas |
676 |
{ |
6591 |
21 Feb 22 |
nicklas |
677 |
preloadCompleted.countDown(); |
6591 |
21 Feb 22 |
nicklas |
678 |
} |
4647 |
14 Dec 17 |
nicklas |
679 |
} |
4647 |
14 Dec 17 |
nicklas |
680 |
|
4647 |
14 Dec 17 |
nicklas |
681 |
|
7073 |
21 Mar 23 |
nicklas |
682 |
private FastLinkLoader<DerivedBioAssay, File> preloadFiles(DbControl dc, String name, Collection<Integer> alignedIds) |
7073 |
21 Mar 23 |
nicklas |
683 |
{ |
7073 |
21 Mar 23 |
nicklas |
684 |
FastLinkLoader<DerivedBioAssay, File> loader = AnyToAny.getFastLoader(dc, Item.DERIVEDBIOASSAY, name, Item.FILE); |
7073 |
21 Mar 23 |
nicklas |
685 |
if (alignedIds != null) |
7071 |
20 Mar 23 |
nicklas |
686 |
{ |
7073 |
21 Mar 23 |
nicklas |
687 |
loader.preload(new IdListRestriction("fromId", alignedIds), null); |
7071 |
20 Mar 23 |
nicklas |
688 |
} |
7073 |
21 Mar 23 |
nicklas |
689 |
else |
7073 |
21 Mar 23 |
nicklas |
690 |
{ |
7073 |
21 Mar 23 |
nicklas |
691 |
loader.preloadAll(); |
7073 |
21 Mar 23 |
nicklas |
692 |
} |
7073 |
21 Mar 23 |
nicklas |
693 |
return loader; |
4647 |
14 Dec 17 |
nicklas |
694 |
} |
4647 |
14 Dec 17 |
nicklas |
695 |
|
4647 |
14 Dec 17 |
nicklas |
696 |
/** |
4647 |
14 Dec 17 |
nicklas |
Wait for the pre-loading thread to have counted how many |
4647 |
14 Dec 17 |
nicklas |
VCF files that need to be loaded. |
4647 |
14 Dec 17 |
nicklas |
@param seconds Max number of seconds to wait before a TimeoutException is thrown |
4647 |
14 Dec 17 |
nicklas |
700 |
*/ |
4647 |
14 Dec 17 |
nicklas |
701 |
public void awaitCounts(int seconds) |
4647 |
14 Dec 17 |
nicklas |
702 |
throws InterruptedException, TimeoutException |
4647 |
14 Dec 17 |
nicklas |
703 |
{ |
6591 |
21 Feb 22 |
nicklas |
704 |
long waitUntil = System.currentTimeMillis()+1000*seconds; |
6591 |
21 Feb 22 |
nicklas |
705 |
while (System.currentTimeMillis() < waitUntil) |
4647 |
14 Dec 17 |
nicklas |
706 |
{ |
6591 |
21 Feb 22 |
nicklas |
707 |
if (countCompleted.await(1, TimeUnit.SECONDS)) return; // Count reached 0 --> all is ok |
6591 |
21 Feb 22 |
nicklas |
708 |
if (preloadError != null) throw preloadError; |
4647 |
14 Dec 17 |
nicklas |
709 |
} |
6591 |
21 Feb 22 |
nicklas |
710 |
throw new TimeoutException("Timeout while waiting for pre-load count in " + seconds + " seconds"); |
4647 |
14 Dec 17 |
nicklas |
711 |
} |
4647 |
14 Dec 17 |
nicklas |
712 |
|
4647 |
14 Dec 17 |
nicklas |
713 |
/** |
4647 |
14 Dec 17 |
nicklas |
Check if the pre-load count phase has been completed yet. |
4647 |
14 Dec 17 |
nicklas |
715 |
*/ |
4647 |
14 Dec 17 |
nicklas |
716 |
public boolean countCompleted() |
4647 |
14 Dec 17 |
nicklas |
717 |
{ |
4647 |
14 Dec 17 |
nicklas |
718 |
return countCompleted.getCount() == 0; |
4647 |
14 Dec 17 |
nicklas |
719 |
} |
4647 |
14 Dec 17 |
nicklas |
720 |
|
4647 |
14 Dec 17 |
nicklas |
721 |
/** |
4647 |
14 Dec 17 |
nicklas |
Check if the pre-load phase has been completed yet. |
4647 |
14 Dec 17 |
nicklas |
723 |
*/ |
4647 |
14 Dec 17 |
nicklas |
724 |
public boolean preloadCompleted() |
4647 |
14 Dec 17 |
nicklas |
725 |
{ |
4647 |
14 Dec 17 |
nicklas |
726 |
return preloadCompleted.getCount() == 0; |
4647 |
14 Dec 17 |
nicklas |
727 |
} |
4647 |
14 Dec 17 |
nicklas |
728 |
|
4647 |
14 Dec 17 |
nicklas |
729 |
/** |
4647 |
14 Dec 17 |
nicklas |
Wait for the pre-loading to complete. |
4647 |
14 Dec 17 |
nicklas |
@param seconds Max number of seconds to wait before a TimeoutException is thrown |
4647 |
14 Dec 17 |
nicklas |
732 |
*/ |
4647 |
14 Dec 17 |
nicklas |
733 |
public void awaitPreload(int seconds) |
4647 |
14 Dec 17 |
nicklas |
734 |
throws InterruptedException, TimeoutException |
4647 |
14 Dec 17 |
nicklas |
735 |
{ |
6591 |
21 Feb 22 |
nicklas |
736 |
long waitUntil = System.currentTimeMillis()+1000*seconds; |
6591 |
21 Feb 22 |
nicklas |
737 |
while (System.currentTimeMillis() < waitUntil) |
4647 |
14 Dec 17 |
nicklas |
738 |
{ |
6591 |
21 Feb 22 |
nicklas |
739 |
if (preloadCompleted.await(1, TimeUnit.SECONDS)) return; // Count reached 0 --> all is ok |
6591 |
21 Feb 22 |
nicklas |
740 |
if (preloadError != null) throw preloadError; |
4647 |
14 Dec 17 |
nicklas |
741 |
} |
6591 |
21 Feb 22 |
nicklas |
742 |
throw new TimeoutException("Timeout while waiting for pre-load in " + seconds + " seconds"); |
4647 |
14 Dec 17 |
nicklas |
743 |
} |
4647 |
14 Dec 17 |
nicklas |
744 |
|
6591 |
21 Feb 22 |
nicklas |
745 |
/** |
6591 |
21 Feb 22 |
nicklas |
Throw the pre-load exception if there is one. Otherwise do nothing. |
6591 |
21 Feb 22 |
nicklas |
@since 4.37 |
6591 |
21 Feb 22 |
nicklas |
748 |
*/ |
6591 |
21 Feb 22 |
nicklas |
749 |
public void throwIfPreloadError() |
6591 |
21 Feb 22 |
nicklas |
750 |
{ |
6591 |
21 Feb 22 |
nicklas |
751 |
if (preloadError != null) throw preloadError; |
6591 |
21 Feb 22 |
nicklas |
752 |
} |
4794 |
07 May 18 |
nicklas |
753 |
|
4671 |
07 Feb 18 |
nicklas |
754 |
/** |
7199 |
25 May 23 |
nicklas |
Preload the VCF file from the given alignment. |
7199 |
25 May 23 |
nicklas |
@since 4.48 |
4671 |
07 Feb 18 |
nicklas |
757 |
*/ |
7211 |
29 May 23 |
nicklas |
758 |
public VcfData preloadItem(DbControl dc, DerivedBioAssay item) |
4671 |
07 Feb 18 |
nicklas |
759 |
{ |
4671 |
07 Feb 18 |
nicklas |
760 |
if (preloadCompleted.getCount() != 0) |
4671 |
07 Feb 18 |
nicklas |
761 |
{ |
4671 |
07 Feb 18 |
nicklas |
762 |
throw new IllegalStateException("Pre-load has not completed"); |
4671 |
07 Feb 18 |
nicklas |
763 |
} |
4671 |
07 Feb 18 |
nicklas |
764 |
|
7211 |
29 May 23 |
nicklas |
765 |
VcfData vcf = preloaded.get(item); |
4671 |
07 Feb 18 |
nicklas |
766 |
if (vcf == null) |
4671 |
07 Feb 18 |
nicklas |
767 |
{ |
7211 |
29 May 23 |
nicklas |
768 |
File vcfFile = getVcfFile(dc, item); |
7211 |
29 May 23 |
nicklas |
769 |
vcf = loadVcf(dc, item, vcfFile, false); |
7211 |
29 May 23 |
nicklas |
770 |
if (vcf != null) preloaded.put(item, vcf); |
4671 |
07 Feb 18 |
nicklas |
771 |
} |
4671 |
07 Feb 18 |
nicklas |
772 |
return vcf; |
4671 |
07 Feb 18 |
nicklas |
773 |
} |
4671 |
07 Feb 18 |
nicklas |
774 |
|
4647 |
14 Dec 17 |
nicklas |
775 |
public CompareData check(DbControl dc, DerivedBioAssay alignment) |
4647 |
14 Dec 17 |
nicklas |
776 |
{ |
4647 |
14 Dec 17 |
nicklas |
777 |
if (preloadCompleted.getCount() != 0) |
4647 |
14 Dec 17 |
nicklas |
778 |
{ |
4647 |
14 Dec 17 |
nicklas |
779 |
throw new IllegalStateException("Pre-load has not completed"); |
4647 |
14 Dec 17 |
nicklas |
780 |
} |
4647 |
14 Dec 17 |
nicklas |
781 |
|
4671 |
07 Feb 18 |
nicklas |
782 |
VcfData vcf = preloaded.get(alignment); |
4794 |
07 May 18 |
nicklas |
783 |
CompareData cmp = new CompareData(alignment, vcf, flaggedAlignments.contains(alignment.getId())); |
4647 |
14 Dec 17 |
nicklas |
784 |
|
4647 |
14 Dec 17 |
nicklas |
785 |
SpecimenData sp1 = getSpecimenData(alignment); |
4647 |
14 Dec 17 |
nicklas |
786 |
boolean skipComparison = false; |
6452 |
22 Oct 21 |
nicklas |
787 |
int minCommonGt = (int)(NUM_REF_GT * minCommonGtPct / 100); |
4681 |
21 Feb 18 |
nicklas |
788 |
|
7218 |
30 May 23 |
nicklas |
789 |
Pipeline alignPipeline = Pipeline.getByName((String)Annotationtype.PIPELINE.getAnnotationValue(dc, alignment)); |
4681 |
21 Feb 18 |
nicklas |
790 |
Long alignedPairs = (Long)Annotationtype.ALIGNED_PAIRS.getAnnotationValue(dc, alignment); |
4681 |
21 Feb 18 |
nicklas |
791 |
boolean lowData = alignedPairs == null || alignedPairs < CufflinksAutoConfirmer.MIN_ALIGNED_PAIRS_FLAG; |
4671 |
07 Feb 18 |
nicklas |
792 |
boolean lowGt = vcf.getGtCount() < minCommonGt; |
6513 |
07 Dec 21 |
nicklas |
793 |
boolean lowGq = vcf.getLowGQCount() > vcf.getHighGQCount(); |
4671 |
07 Feb 18 |
nicklas |
794 |
boolean highHet = vcf.getHetPercentage() > maxHetPct; |
4671 |
07 Feb 18 |
nicklas |
795 |
|
4728 |
04 Apr 18 |
nicklas |
796 |
GenoTypeMessage gtm = null; |
4671 |
07 Feb 18 |
nicklas |
797 |
if (highHet) |
4647 |
14 Dec 17 |
nicklas |
798 |
{ |
4794 |
07 May 18 |
nicklas |
799 |
gtm = Category.HIGH_HET.message(" (" + Values.formatNumber(vcf.getHetPercentage(), 1) + "%)"); |
4728 |
04 Apr 18 |
nicklas |
800 |
gtm.setMessageTooltip("A HET value over " + ((int)maxHetPct) + "% may indicate a contaminated sample"); |
4728 |
04 Apr 18 |
nicklas |
801 |
cmp.addMessage(gtm); |
7218 |
30 May 23 |
nicklas |
802 |
if (alignPipeline.isTumorPipeline() && alignPipeline.isRnaPipeline()) |
7215 |
30 May 23 |
nicklas |
803 |
{ |
7215 |
30 May 23 |
nicklas |
804 |
cmp.setRecommendFlag(); |
7215 |
30 May 23 |
nicklas |
805 |
} |
7215 |
30 May 23 |
nicklas |
806 |
else |
7215 |
30 May 23 |
nicklas |
807 |
{ |
7215 |
30 May 23 |
nicklas |
808 |
cmp.setRecommendDisable(); |
7216 |
30 May 23 |
nicklas |
809 |
cmp.setRecommendDoNotUse("Library", "High HET"); |
7215 |
30 May 23 |
nicklas |
810 |
} |
4647 |
14 Dec 17 |
nicklas |
811 |
} |
4671 |
07 Feb 18 |
nicklas |
812 |
|
4681 |
21 Feb 18 |
nicklas |
813 |
if (lowData) |
4648 |
15 Dec 17 |
nicklas |
814 |
{ |
4794 |
07 May 18 |
nicklas |
815 |
gtm = Category.LOW_DATA.message(" (" + Values.formatNumber(alignedPairs / 1000000f, 1) + "M)"); |
4728 |
04 Apr 18 |
nicklas |
816 |
gtm.setMessageTooltip("The genotype calling may be affected if the aligned pairs is less than " + Values.formatNumber(CufflinksAutoConfirmer.MIN_ALIGNED_PAIRS_FLAG / 1000000f, 1) + "M"); |
4728 |
04 Apr 18 |
nicklas |
817 |
cmp.addMessage(gtm); |
4681 |
21 Feb 18 |
nicklas |
818 |
} |
4681 |
21 Feb 18 |
nicklas |
819 |
else if (lowGq) |
4681 |
21 Feb 18 |
nicklas |
820 |
{ |
6513 |
07 Dec 21 |
nicklas |
821 |
gtm = Category.LOW_GQ.message(" (" + vcf.getLowGQCount() + "/" + vcf.getHighGQCount()+")"); |
4728 |
04 Apr 18 |
nicklas |
822 |
gtm.setMessageTooltip("The genotype calling may be affected due to low quality scores"); |
4728 |
04 Apr 18 |
nicklas |
823 |
cmp.addMessage(gtm); |
4648 |
15 Dec 17 |
nicklas |
824 |
} |
4671 |
07 Feb 18 |
nicklas |
825 |
|
4671 |
07 Feb 18 |
nicklas |
826 |
if (lowGt) |
4647 |
14 Dec 17 |
nicklas |
827 |
{ |
4794 |
07 May 18 |
nicklas |
828 |
gtm = Category.LOW_GT_COUNT.message(" (" + vcf.getGtCount() + ")"); |
4728 |
04 Apr 18 |
nicklas |
829 |
gtm.setMessageTooltip("We need at least " + minCommonGt + " genotypes to be able to make comparisons"); |
4728 |
04 Apr 18 |
nicklas |
830 |
cmp.addMessage(gtm); |
4647 |
14 Dec 17 |
nicklas |
831 |
cmp.setRecommendDisable(); |
4671 |
07 Feb 18 |
nicklas |
832 |
cmp.addSkipped(preloaded.size()); |
4671 |
07 Feb 18 |
nicklas |
833 |
return cmp; |
4647 |
14 Dec 17 |
nicklas |
834 |
} |
4647 |
14 Dec 17 |
nicklas |
835 |
|
4647 |
14 Dec 17 |
nicklas |
836 |
for (Map.Entry<DerivedBioAssay, VcfData> entry : preloaded.entrySet()) |
4647 |
14 Dec 17 |
nicklas |
837 |
{ |
6595 |
22 Feb 22 |
nicklas |
838 |
DerivedBioAssay otherAssay = DerivedBioAssay.getById(dc, entry.getKey().getId()); |
4647 |
14 Dec 17 |
nicklas |
839 |
VcfData vcf2 = entry.getValue(); |
4647 |
14 Dec 17 |
nicklas |
840 |
|
4647 |
14 Dec 17 |
nicklas |
// Do not compare to self |
6595 |
22 Feb 22 |
nicklas |
842 |
if (alignment.equals(otherAssay)) continue; |
4671 |
07 Feb 18 |
nicklas |
843 |
|
6595 |
22 Feb 22 |
nicklas |
844 |
SpecimenData sp2 = getSpecimenData(otherAssay); |
4671 |
07 Feb 18 |
nicklas |
845 |
boolean samePat = sp1.hasSamePatient(sp2); |
7210 |
29 May 23 |
nicklas |
846 |
boolean sameSpecimenType = sp1.getSpecimenType() == sp2.getSpecimenType(); |
4712 |
22 Mar 18 |
nicklas |
847 |
boolean sameSpecimen = sp1.hasSameSpecimen(sp2); |
6595 |
22 Feb 22 |
nicklas |
848 |
boolean sameLysate = sameSpecimen && isSameLysate(alignment.getName(), otherAssay.getName()); |
4671 |
07 Feb 18 |
nicklas |
849 |
|
4671 |
07 Feb 18 |
nicklas |
850 |
if (vcf2.getHetPercentage() > maxHetPct && highHet && !samePat) |
4647 |
14 Dec 17 |
nicklas |
851 |
{ |
4671 |
07 Feb 18 |
nicklas |
// Skip comparison if both have high HET and different patients |
4647 |
14 Dec 17 |
nicklas |
853 |
cmp.addSkipped(1); |
4647 |
14 Dec 17 |
nicklas |
854 |
continue; |
4647 |
14 Dec 17 |
nicklas |
855 |
} |
4647 |
14 Dec 17 |
nicklas |
856 |
|
4647 |
14 Dec 17 |
nicklas |
857 |
VcfPair vcfPair = new VcfPair(vcf, vcf2); |
4647 |
14 Dec 17 |
nicklas |
// Skip if there are not enough common pairs |
4647 |
14 Dec 17 |
nicklas |
859 |
if (vcfPair.getCommonGt() < minCommonGt) |
4647 |
14 Dec 17 |
nicklas |
860 |
{ |
4647 |
14 Dec 17 |
nicklas |
861 |
cmp.addSkipped(1); |
4647 |
14 Dec 17 |
nicklas |
862 |
continue; |
4647 |
14 Dec 17 |
nicklas |
863 |
} |
4647 |
14 Dec 17 |
nicklas |
864 |
|
4647 |
14 Dec 17 |
nicklas |
865 |
cmp.addCompared(samePat); |
6595 |
22 Feb 22 |
nicklas |
866 |
boolean flagged = flaggedAlignments.contains(otherAssay.getId()); |
4649 |
20 Dec 17 |
nicklas |
867 |
|
4671 |
07 Feb 18 |
nicklas |
868 |
float mismatchPct = vcfPair.getMismatchPercentage(); |
4647 |
14 Dec 17 |
nicklas |
869 |
if (samePat) |
4647 |
14 Dec 17 |
nicklas |
870 |
{ |
6595 |
22 Feb 22 |
nicklas |
871 |
String doNotUse = (String)Annotationtype.DO_NOT_USE.getAnnotationValue(dc, otherAssay); |
4671 |
07 Feb 18 |
nicklas |
872 |
if (mismatchPct > maxMismatchForSamePatPct) |
4647 |
14 Dec 17 |
nicklas |
873 |
{ |
4681 |
21 Feb 18 |
nicklas |
// A high mismatch if 'total mismatches > 35%' |
4681 |
21 Feb 18 |
nicklas |
// or 'hom-hom mismatches > 10%' |
4681 |
21 Feb 18 |
nicklas |
876 |
boolean high = mismatchPct > highMismatchPct |
4681 |
21 Feb 18 |
nicklas |
877 |
|| vcfPair.getHomHomMismatchPercentage() > highHomHomMismatchPct; |
4648 |
15 Dec 17 |
nicklas |
878 |
|
4794 |
07 May 18 |
nicklas |
879 |
GenoTypeMessage.Category category = high ? GenoTypeMessage.Category.HIGH_MISMATCH : GenoTypeMessage.Category.MEDIUM_MISMATCH; |
6595 |
22 Feb 22 |
nicklas |
880 |
gtm = new GenoTypeMessage(category, category.getDefaultLevel(), category.getMessagePrefix() + " ("+vcfPair.getMismatches()+","+vcfPair.getHomHomMismatches() + "/" + vcfPair.getCommonGt()+")", otherAssay, sp2, vcf2, flagged, null); |
4728 |
04 Apr 18 |
nicklas |
881 |
gtm.setMessageTooltip("There are " + vcfPair.getMismatches() + " mismatches (including " + vcfPair.getHomHomMismatches() + " HOM/HOM mismatches) among the " + vcfPair.getCommonGt() + " common genotypes"); |
4728 |
04 Apr 18 |
nicklas |
882 |
cmp.addMessage(gtm); |
5014 |
08 Oct 18 |
nicklas |
883 |
if (high || doNotUse != null) |
4681 |
21 Feb 18 |
nicklas |
884 |
{ |
7220 |
31 May 23 |
nicklas |
885 |
if (alignPipeline.isRnaPipeline() || alignPipeline.isNormalPipeline()) |
7218 |
30 May 23 |
nicklas |
886 |
{ |
7220 |
31 May 23 |
nicklas |
// RNA items are flagged in the RNAseq pipeline and also in the DNA/Normal pipeline |
7218 |
30 May 23 |
nicklas |
888 |
cmp.setRecommendFlag(); |
7218 |
30 May 23 |
nicklas |
889 |
} |
7220 |
31 May 23 |
nicklas |
890 |
else if (alignPipeline.isTumorPipeline() && alignPipeline.isDnaPipeline()) |
7218 |
30 May 23 |
nicklas |
891 |
{ |
7220 |
31 May 23 |
nicklas |
// DNA/Tumor is disabled and marked as DoNotUse |
7218 |
30 May 23 |
nicklas |
893 |
cmp.setRecommendDisable(); |
7218 |
30 May 23 |
nicklas |
894 |
cmp.setRecommendDoNotUse("Library", "High mismatch: " + otherAssay.getName()); |
7218 |
30 May 23 |
nicklas |
895 |
} |
4681 |
21 Feb 18 |
nicklas |
896 |
} |
4681 |
21 Feb 18 |
nicklas |
897 |
else if (lowData) |
4681 |
21 Feb 18 |
nicklas |
898 |
{ |
4681 |
21 Feb 18 |
nicklas |
899 |
cmp.setRecommendDisable(); |
4681 |
21 Feb 18 |
nicklas |
900 |
} |
4647 |
14 Dec 17 |
nicklas |
901 |
} |
4648 |
15 Dec 17 |
nicklas |
902 |
else |
4648 |
15 Dec 17 |
nicklas |
903 |
{ |
6595 |
22 Feb 22 |
nicklas |
904 |
VerifiedMatch verified = null; |
6595 |
22 Feb 22 |
nicklas |
905 |
if (sameSpecimen) |
5014 |
08 Oct 18 |
nicklas |
906 |
{ |
6595 |
22 Feb 22 |
nicklas |
// If both assays have the same specimen, but different Lysate |
6595 |
22 Feb 22 |
nicklas |
908 |
if (!sameLysate) verified = VerifiedMatch.LYSATE; |
5014 |
08 Oct 18 |
nicklas |
909 |
} |
6595 |
22 Feb 22 |
nicklas |
910 |
else |
5014 |
08 Oct 18 |
nicklas |
911 |
{ |
6595 |
22 Feb 22 |
nicklas |
// The assays have different specimen |
7210 |
29 May 23 |
nicklas |
913 |
verified = sameSpecimenType ? VerifiedMatch.SPECIMEN : VerifiedMatch.DNA; |
5014 |
08 Oct 18 |
nicklas |
914 |
} |
6588 |
18 Feb 22 |
nicklas |
915 |
GenoTypeMessage.Category category = GenoTypeMessage.Category.GOOD_MATCH; |
6588 |
18 Feb 22 |
nicklas |
916 |
String prefix = "GOOD MATCH"; |
6588 |
18 Feb 22 |
nicklas |
917 |
if (doNotUse != null) |
6588 |
18 Feb 22 |
nicklas |
918 |
{ |
6588 |
18 Feb 22 |
nicklas |
919 |
category = GenoTypeMessage.Category.DO_NOT_USE_MATCH; |
6588 |
18 Feb 22 |
nicklas |
920 |
prefix = "DONOTUSE MATCH"; |
6588 |
18 Feb 22 |
nicklas |
921 |
cmp.setRecommendFlag(); |
6588 |
18 Feb 22 |
nicklas |
922 |
} |
6588 |
18 Feb 22 |
nicklas |
923 |
else if (verified != null) |
6588 |
18 Feb 22 |
nicklas |
924 |
{ |
6588 |
18 Feb 22 |
nicklas |
925 |
category = GenoTypeMessage.Category.VERIFIED_MATCH; |
6588 |
18 Feb 22 |
nicklas |
926 |
prefix = "VERIFIED BY "+verified.name(); |
6588 |
18 Feb 22 |
nicklas |
927 |
} |
6595 |
22 Feb 22 |
nicklas |
928 |
gtm = new GenoTypeMessage(category, category.getDefaultLevel(), prefix + " ("+vcfPair.getMismatches() + "/" + vcfPair.getCommonGt()+")", otherAssay, sp2, vcf2, flagged, verified); |
6588 |
18 Feb 22 |
nicklas |
929 |
gtm.setMessageTooltip("There are " + vcfPair.getMismatches() + " mismatches among the " + vcfPair.getCommonGt() + " common genotypes"); |
6588 |
18 Feb 22 |
nicklas |
930 |
cmp.addMessage(gtm); |
6588 |
18 Feb 22 |
nicklas |
931 |
} |
6588 |
18 Feb 22 |
nicklas |
932 |
} |
6588 |
18 Feb 22 |
nicklas |
933 |
else |
6588 |
18 Feb 22 |
nicklas |
934 |
{ |
6588 |
18 Feb 22 |
nicklas |
935 |
if (mismatchPct < minMismatchForDiffPatPct) |
6588 |
18 Feb 22 |
nicklas |
936 |
{ |
6588 |
18 Feb 22 |
nicklas |
937 |
GenoTypeMessage.Category category = Category.LOW_MISMATCH; |
6595 |
22 Feb 22 |
nicklas |
938 |
gtm = new GenoTypeMessage(category, category.getDefaultLevel(), category.getMessagePrefix() + " ("+vcfPair.getMismatches() + "/" + vcfPair.getCommonGt()+")", otherAssay, sp2, vcf2, flagged, null); |
6588 |
18 Feb 22 |
nicklas |
939 |
gtm.setMessageTooltip("There are " + vcfPair.getMismatches() + " mismatches among the " + vcfPair.getCommonGt() + " common genotypes"); |
6588 |
18 Feb 22 |
nicklas |
940 |
cmp.addMessage(gtm); |
7220 |
31 May 23 |
nicklas |
941 |
if (!highHet) |
7220 |
31 May 23 |
nicklas |
942 |
{ |
7220 |
31 May 23 |
nicklas |
943 |
if (alignPipeline.isRnaPipeline() || alignPipeline.isNormalPipeline()) |
7220 |
31 May 23 |
nicklas |
944 |
{ |
7220 |
31 May 23 |
nicklas |
// RNA items are flagged in the RNAseq pipeline and also in the DNA/Normal pipeline |
7220 |
31 May 23 |
nicklas |
946 |
cmp.setRecommendFlag(); |
7220 |
31 May 23 |
nicklas |
947 |
} |
7220 |
31 May 23 |
nicklas |
948 |
else if (alignPipeline.isTumorPipeline() && alignPipeline.isDnaPipeline()) |
7220 |
31 May 23 |
nicklas |
949 |
{ |
7220 |
31 May 23 |
nicklas |
// DNA/Tumor is disabled and marked as DoNotUse |
7220 |
31 May 23 |
nicklas |
951 |
if (!cmp.getRecommendDisable()) |
7220 |
31 May 23 |
nicklas |
952 |
{ |
7220 |
31 May 23 |
nicklas |
953 |
cmp.setRecommendDisable(); |
7220 |
31 May 23 |
nicklas |
954 |
cmp.setRecommendDoNotUse("Library", "Low mismatch: " + otherAssay.getName()); |
7220 |
31 May 23 |
nicklas |
955 |
} |
7220 |
31 May 23 |
nicklas |
956 |
} |
7220 |
31 May 23 |
nicklas |
957 |
} |
6588 |
18 Feb 22 |
nicklas |
958 |
} |
6588 |
18 Feb 22 |
nicklas |
959 |
} |
6588 |
18 Feb 22 |
nicklas |
960 |
} |
6588 |
18 Feb 22 |
nicklas |
961 |
|
4647 |
14 Dec 17 |
nicklas |
962 |
return cmp; |
4647 |
14 Dec 17 |
nicklas |
963 |
} |
4647 |
14 Dec 17 |
nicklas |
964 |
|
6443 |
19 Oct 21 |
nicklas |
965 |
/** |
6443 |
19 Oct 21 |
nicklas |
Check the given VCF files agains all currently loaded VCF files. |
6443 |
19 Oct 21 |
nicklas |
@since 4.34 |
6443 |
19 Oct 21 |
nicklas |
968 |
*/ |
6443 |
19 Oct 21 |
nicklas |
969 |
public CompareData check(DbControl dc, File vcfFile) |
6443 |
19 Oct 21 |
nicklas |
970 |
{ |
6443 |
19 Oct 21 |
nicklas |
971 |
if (preloadCompleted.getCount() != 0) |
6443 |
19 Oct 21 |
nicklas |
972 |
{ |
6443 |
19 Oct 21 |
nicklas |
973 |
throw new IllegalStateException("Pre-load has not completed"); |
6443 |
19 Oct 21 |
nicklas |
974 |
} |
6443 |
19 Oct 21 |
nicklas |
975 |
|
7071 |
20 Mar 23 |
nicklas |
976 |
VcfData vcf = loadVcf(vcfFile, vcfFile.getName(), false); |
6443 |
19 Oct 21 |
nicklas |
977 |
CompareData cmp = new CompareData(vcfFile, vcf); |
6452 |
22 Oct 21 |
nicklas |
978 |
int recommendedGt = (int)(NUM_REF_GT * minCommonGtPct / 100); |
6452 |
22 Oct 21 |
nicklas |
979 |
int minCommonGt = 100; |
6443 |
19 Oct 21 |
nicklas |
980 |
|
6452 |
22 Oct 21 |
nicklas |
981 |
String GQheader = vcf.getFormatHeader("GQ"); |
6452 |
22 Oct 21 |
nicklas |
982 |
boolean lowGt = vcf.getGtCount() < recommendedGt; |
6513 |
07 Dec 21 |
nicklas |
983 |
boolean lowGq = vcf.getLowGQCount() > vcf.getHighGQCount(); |
6443 |
19 Oct 21 |
nicklas |
984 |
boolean highHet = vcf.getHetPercentage() > maxHetPct; |
6443 |
19 Oct 21 |
nicklas |
985 |
|
6443 |
19 Oct 21 |
nicklas |
986 |
GenoTypeMessage gtm = null; |
6443 |
19 Oct 21 |
nicklas |
987 |
if (highHet) |
6443 |
19 Oct 21 |
nicklas |
988 |
{ |
6443 |
19 Oct 21 |
nicklas |
989 |
gtm = Category.HIGH_HET.message(" (" + Values.formatNumber(vcf.getHetPercentage(), 1) + "%)"); |
6443 |
19 Oct 21 |
nicklas |
990 |
gtm.setMessageTooltip("A HET value over " + ((int)maxHetPct) + "% may indicate a contaminated sample"); |
6443 |
19 Oct 21 |
nicklas |
991 |
cmp.addMessage(gtm); |
6443 |
19 Oct 21 |
nicklas |
992 |
} |
6443 |
19 Oct 21 |
nicklas |
993 |
if (lowGq) |
6443 |
19 Oct 21 |
nicklas |
994 |
{ |
6513 |
07 Dec 21 |
nicklas |
995 |
gtm = Category.LOW_GQ.message(" (" + vcf.getLowGQCount() + "/" + vcf.getHighGQCount()+")"); |
6443 |
19 Oct 21 |
nicklas |
996 |
gtm.setMessageTooltip("The genotype calling may be affected due to low quality scores"); |
6443 |
19 Oct 21 |
nicklas |
997 |
cmp.addMessage(gtm); |
6443 |
19 Oct 21 |
nicklas |
998 |
} |
6443 |
19 Oct 21 |
nicklas |
999 |
if (lowGt) |
6443 |
19 Oct 21 |
nicklas |
1000 |
{ |
6443 |
19 Oct 21 |
nicklas |
1001 |
gtm = Category.LOW_GT_COUNT.message(" (" + vcf.getGtCount() + ")"); |
6452 |
22 Oct 21 |
nicklas |
1002 |
gtm.setMessageTooltip("There are less than " + recommendedGt + " genotypes which may affect the comparisons"); |
6443 |
19 Oct 21 |
nicklas |
1003 |
cmp.addMessage(gtm); |
6443 |
19 Oct 21 |
nicklas |
1004 |
} |
6443 |
19 Oct 21 |
nicklas |
1005 |
|
6451 |
22 Oct 21 |
nicklas |
1006 |
Map<DerivedBioAssay, VcfPair> comparedPairs = new HashMap<>(); |
6451 |
22 Oct 21 |
nicklas |
1007 |
Set<Integer> matchedPatients = new HashSet<>(); |
6452 |
22 Oct 21 |
nicklas |
1008 |
|
6452 |
22 Oct 21 |
nicklas |
1009 |
int lowestCommonGtForMatchedPatient = vcf.getGtCount(); |
6443 |
19 Oct 21 |
nicklas |
1010 |
for (Map.Entry<DerivedBioAssay, VcfData> entry : preloaded.entrySet()) |
6443 |
19 Oct 21 |
nicklas |
1011 |
{ |
6443 |
19 Oct 21 |
nicklas |
1012 |
DerivedBioAssay alignment2 = DerivedBioAssay.getById(dc, entry.getKey().getId()); |
6443 |
19 Oct 21 |
nicklas |
1013 |
VcfData vcf2 = entry.getValue(); |
6443 |
19 Oct 21 |
nicklas |
1014 |
|
6443 |
19 Oct 21 |
nicklas |
1015 |
if (vcf2.getHetPercentage() > maxHetPct && highHet) |
6443 |
19 Oct 21 |
nicklas |
1016 |
{ |
6443 |
19 Oct 21 |
nicklas |
// Skip comparison if both have high HET |
6443 |
19 Oct 21 |
nicklas |
1018 |
cmp.addSkipped(1); |
6443 |
19 Oct 21 |
nicklas |
1019 |
continue; |
6443 |
19 Oct 21 |
nicklas |
1020 |
} |
6443 |
19 Oct 21 |
nicklas |
1021 |
|
6443 |
19 Oct 21 |
nicklas |
1022 |
VcfPair vcfPair = new VcfPair(vcf, vcf2); |
6443 |
19 Oct 21 |
nicklas |
// Skip if there are not enough common pairs |
6443 |
19 Oct 21 |
nicklas |
1024 |
if (vcfPair.getCommonGt() < minCommonGt) |
6443 |
19 Oct 21 |
nicklas |
1025 |
{ |
6443 |
19 Oct 21 |
nicklas |
1026 |
cmp.addSkipped(1); |
6443 |
19 Oct 21 |
nicklas |
1027 |
continue; |
6443 |
19 Oct 21 |
nicklas |
1028 |
} |
6452 |
22 Oct 21 |
nicklas |
1029 |
|
6451 |
22 Oct 21 |
nicklas |
1030 |
comparedPairs.put(alignment2, vcfPair); |
6451 |
22 Oct 21 |
nicklas |
1031 |
float mismatchPct = vcfPair.getMismatchPercentage(); |
6451 |
22 Oct 21 |
nicklas |
1032 |
if (mismatchPct <= maxMismatchForSamePatPct) |
6451 |
22 Oct 21 |
nicklas |
1033 |
{ |
6452 |
22 Oct 21 |
nicklas |
1034 |
if (vcfPair.getCommonGt() < lowestCommonGtForMatchedPatient) |
6452 |
22 Oct 21 |
nicklas |
1035 |
{ |
6452 |
22 Oct 21 |
nicklas |
1036 |
lowestCommonGtForMatchedPatient = vcfPair.getCommonGt(); |
6452 |
22 Oct 21 |
nicklas |
1037 |
} |
6451 |
22 Oct 21 |
nicklas |
1038 |
SpecimenData sp2 = getSpecimenData(alignment2); |
6451 |
22 Oct 21 |
nicklas |
1039 |
matchedPatients.add(sp2.getPatientId()); |
6451 |
22 Oct 21 |
nicklas |
1040 |
} |
6451 |
22 Oct 21 |
nicklas |
1041 |
} |
6451 |
22 Oct 21 |
nicklas |
1042 |
|
6452 |
22 Oct 21 |
nicklas |
1043 |
if (lowestCommonGtForMatchedPatient < recommendedGt && !lowGt) |
6452 |
22 Oct 21 |
nicklas |
1044 |
{ |
6452 |
22 Oct 21 |
nicklas |
1045 |
gtm = Category.LOW_GT_COUNT.message(" (" + lowestCommonGtForMatchedPatient + ")"); |
6452 |
22 Oct 21 |
nicklas |
1046 |
gtm.setMessageTooltip("There are less than " + lowestCommonGtForMatchedPatient + " common genotypes which may affect the comparisons"); |
6452 |
22 Oct 21 |
nicklas |
1047 |
cmp.addMessage(gtm); |
6452 |
22 Oct 21 |
nicklas |
1048 |
} |
6452 |
22 Oct 21 |
nicklas |
1049 |
|
6451 |
22 Oct 21 |
nicklas |
1050 |
for (Map.Entry<DerivedBioAssay, VcfPair> entry : comparedPairs.entrySet()) |
6451 |
22 Oct 21 |
nicklas |
1051 |
{ |
6451 |
22 Oct 21 |
nicklas |
1052 |
DerivedBioAssay alignment2 = entry.getKey(); |
6451 |
22 Oct 21 |
nicklas |
1053 |
VcfPair vcfPair = entry.getValue(); |
6451 |
22 Oct 21 |
nicklas |
1054 |
|
6451 |
22 Oct 21 |
nicklas |
1055 |
VcfData vcf2 = vcfPair.getVcf2(); |
6443 |
19 Oct 21 |
nicklas |
1056 |
SpecimenData sp2 = getSpecimenData(alignment2); |
6451 |
22 Oct 21 |
nicklas |
1057 |
boolean hasMatchedPatient = matchedPatients.contains(sp2.getPatientId()); |
6451 |
22 Oct 21 |
nicklas |
1058 |
cmp.addCompared(hasMatchedPatient); |
6443 |
19 Oct 21 |
nicklas |
1059 |
boolean flagged = flaggedAlignments.contains(alignment2.getId()); |
6443 |
19 Oct 21 |
nicklas |
1060 |
float mismatchPct = vcfPair.getMismatchPercentage(); |
6443 |
19 Oct 21 |
nicklas |
1061 |
String doNotUse = (String)Annotationtype.DO_NOT_USE.getAnnotationValue(dc, alignment2); |
6443 |
19 Oct 21 |
nicklas |
1062 |
|
6451 |
22 Oct 21 |
nicklas |
1063 |
if (hasMatchedPatient) |
6443 |
19 Oct 21 |
nicklas |
1064 |
{ |
6451 |
22 Oct 21 |
nicklas |
1065 |
if (mismatchPct > maxMismatchForSamePatPct) |
6443 |
19 Oct 21 |
nicklas |
1066 |
{ |
6451 |
22 Oct 21 |
nicklas |
// A high mismatch if 'total mismatches > 35%' |
6451 |
22 Oct 21 |
nicklas |
// or 'hom-hom mismatches > 10%' |
6451 |
22 Oct 21 |
nicklas |
1069 |
boolean high = mismatchPct > highMismatchPct |
6451 |
22 Oct 21 |
nicklas |
1070 |
|| vcfPair.getHomHomMismatchPercentage() > highHomHomMismatchPct; |
6451 |
22 Oct 21 |
nicklas |
1071 |
|
6451 |
22 Oct 21 |
nicklas |
1072 |
GenoTypeMessage.Category category = high ? GenoTypeMessage.Category.HIGH_MISMATCH : GenoTypeMessage.Category.MEDIUM_MISMATCH; |
6451 |
22 Oct 21 |
nicklas |
1073 |
gtm = new GenoTypeMessage(category, category.getDefaultLevel(), category.getMessagePrefix() + " ("+vcfPair.getMismatches()+","+vcfPair.getHomHomMismatches() + "/" + vcfPair.getCommonGt()+")", alignment2, sp2, vcf2, flagged, null); |
6451 |
22 Oct 21 |
nicklas |
1074 |
gtm.setMessageTooltip("There are " + vcfPair.getMismatches() + " mismatches (including " + vcfPair.getHomHomMismatches() + " HOM/HOM mismatches) among the " + vcfPair.getCommonGt() + " common genotypes"); |
6451 |
22 Oct 21 |
nicklas |
1075 |
cmp.addMessage(gtm); |
6443 |
19 Oct 21 |
nicklas |
1076 |
} |
6451 |
22 Oct 21 |
nicklas |
1077 |
else |
6451 |
22 Oct 21 |
nicklas |
1078 |
{ |
6451 |
22 Oct 21 |
nicklas |
1079 |
GenoTypeMessage.Category category = GenoTypeMessage.Category.GOOD_MATCH; |
6451 |
22 Oct 21 |
nicklas |
1080 |
String prefix = "GOOD MATCH"; |
6451 |
22 Oct 21 |
nicklas |
1081 |
if (doNotUse != null) |
6451 |
22 Oct 21 |
nicklas |
1082 |
{ |
6451 |
22 Oct 21 |
nicklas |
1083 |
category = GenoTypeMessage.Category.DO_NOT_USE_MATCH; |
6451 |
22 Oct 21 |
nicklas |
1084 |
prefix = "DONOTUSE MATCH"; |
6451 |
22 Oct 21 |
nicklas |
1085 |
} |
6451 |
22 Oct 21 |
nicklas |
1086 |
else if (matchedPatients.size() > 1) |
6451 |
22 Oct 21 |
nicklas |
1087 |
{ |
6452 |
22 Oct 21 |
nicklas |
1088 |
category = GenoTypeMessage.Category.AMBIGUOUS_PATIENT; |
6452 |
22 Oct 21 |
nicklas |
1089 |
prefix = "AMBIGUOUS PATIENT"; |
6451 |
22 Oct 21 |
nicklas |
1090 |
} |
6451 |
22 Oct 21 |
nicklas |
1091 |
|
6451 |
22 Oct 21 |
nicklas |
1092 |
gtm = new GenoTypeMessage(category, category.getDefaultLevel(), prefix + " ("+vcfPair.getMismatches() + "/" + vcfPair.getCommonGt()+")", alignment2, sp2, vcf2, flagged, null); |
6451 |
22 Oct 21 |
nicklas |
1093 |
gtm.setMessageTooltip("There are " + vcfPair.getMismatches() + " mismatches among the " + vcfPair.getCommonGt() + " common genotypes"); |
6451 |
22 Oct 21 |
nicklas |
1094 |
cmp.addMessage(gtm); |
6451 |
22 Oct 21 |
nicklas |
1095 |
} |
6443 |
19 Oct 21 |
nicklas |
1096 |
} |
6451 |
22 Oct 21 |
nicklas |
1097 |
else |
6443 |
19 Oct 21 |
nicklas |
1098 |
{ |
6451 |
22 Oct 21 |
nicklas |
1099 |
if (mismatchPct < minMismatchForDiffPatPct) |
6451 |
22 Oct 21 |
nicklas |
1100 |
{ |
6451 |
22 Oct 21 |
nicklas |
// Number of mismatches are lower than expected for two different patients |
6451 |
22 Oct 21 |
nicklas |
1102 |
GenoTypeMessage.Category category = Category.LOW_MISMATCH; |
6451 |
22 Oct 21 |
nicklas |
1103 |
gtm = new GenoTypeMessage(category, category.getDefaultLevel(), category.getMessagePrefix() + " ("+vcfPair.getMismatches() + "/" + vcfPair.getCommonGt()+")", alignment2, sp2, vcf2, flagged, null); |
6451 |
22 Oct 21 |
nicklas |
1104 |
gtm.setMessageTooltip("There are " + vcfPair.getMismatches() + " mismatches among the " + vcfPair.getCommonGt() + " common genotypes"); |
6451 |
22 Oct 21 |
nicklas |
1105 |
cmp.addMessage(gtm); |
6451 |
22 Oct 21 |
nicklas |
1106 |
} |
6443 |
19 Oct 21 |
nicklas |
1107 |
} |
6443 |
19 Oct 21 |
nicklas |
1108 |
} |
6443 |
19 Oct 21 |
nicklas |
1109 |
return cmp; |
6443 |
19 Oct 21 |
nicklas |
1110 |
} |
6443 |
19 Oct 21 |
nicklas |
1111 |
|
4794 |
07 May 18 |
nicklas |
1112 |
public SpecimenData getSpecimenData(Nameable item) |
4647 |
14 Dec 17 |
nicklas |
1113 |
{ |
4647 |
14 Dec 17 |
nicklas |
1114 |
return specimenData.get(item.getName().substring(0, 9)); |
4647 |
14 Dec 17 |
nicklas |
1115 |
} |
4647 |
14 Dec 17 |
nicklas |
1116 |
|
4647 |
14 Dec 17 |
nicklas |
1117 |
/** |
4712 |
22 Mar 18 |
nicklas |
Are the items from the same lyste? |
4712 |
22 Mar 18 |
nicklas |
1119 |
*/ |
4712 |
22 Mar 18 |
nicklas |
1120 |
private boolean isSameLysate(String name1, String name2) |
4712 |
22 Mar 18 |
nicklas |
1121 |
{ |
4712 |
22 Mar 18 |
nicklas |
1122 |
int maxIndex = Math.min(name1.length(), name2.length()); |
4712 |
22 Mar 18 |
nicklas |
1123 |
int index = 0; |
4712 |
22 Mar 18 |
nicklas |
1124 |
int numDots = 0; |
4712 |
22 Mar 18 |
nicklas |
1125 |
while (name1.charAt(index) == name2.charAt(index) && index < maxIndex) |
4712 |
22 Mar 18 |
nicklas |
1126 |
{ |
4712 |
22 Mar 18 |
nicklas |
1127 |
if (name1.charAt(index) == '.') numDots++; |
4712 |
22 Mar 18 |
nicklas |
1128 |
index++; |
4712 |
22 Mar 18 |
nicklas |
1129 |
} |
4712 |
22 Mar 18 |
nicklas |
1130 |
return numDots >= 3; // If the names are equal to the third '.' it is the same lysate |
4712 |
22 Mar 18 |
nicklas |
1131 |
} |
4712 |
22 Mar 18 |
nicklas |
1132 |
|
4712 |
22 Mar 18 |
nicklas |
1133 |
/** |
4647 |
14 Dec 17 |
nicklas |
Load the "qc_genotype.vcf" that is linked from |
4647 |
14 Dec 17 |
nicklas |
the given alignment. |
4647 |
14 Dec 17 |
nicklas |
1136 |
*/ |
7071 |
20 Mar 23 |
nicklas |
1137 |
private File getVcfFile(DbControl dc, DerivedBioAssay alignment) |
4647 |
14 Dec 17 |
nicklas |
1138 |
{ |
4647 |
14 Dec 17 |
nicklas |
1139 |
AnyToAny link = AnyToAny.getByName(dc, alignment, "qc_genotype.vcf"); |
7071 |
20 Mar 23 |
nicklas |
1140 |
return (File)link.getTo(); |
6443 |
19 Oct 21 |
nicklas |
1141 |
} |
6443 |
19 Oct 21 |
nicklas |
1142 |
|
7071 |
20 Mar 23 |
nicklas |
1143 |
/** |
7073 |
21 Mar 23 |
nicklas |
Load the "qc_genotype.vcf" that is linked from the given alignment. |
7071 |
20 Mar 23 |
nicklas |
1145 |
*/ |
7073 |
21 Mar 23 |
nicklas |
1146 |
private VcfData loadVcf(DbControl dc, DerivedBioAssay alignment, File vcfFile, boolean copyToBase) |
6443 |
19 Oct 21 |
nicklas |
1147 |
{ |
7071 |
20 Mar 23 |
nicklas |
1148 |
return loadVcf(vcfFile, alignment.getName()+"["+vcfFile.getName()+"]", copyToBase); |
7071 |
20 Mar 23 |
nicklas |
1149 |
} |
7071 |
20 Mar 23 |
nicklas |
1150 |
|
7071 |
20 Mar 23 |
nicklas |
1151 |
VcfData loadVcf(File vcfFile, String name, boolean copyToBase) |
7071 |
20 Mar 23 |
nicklas |
1152 |
{ |
4647 |
14 Dec 17 |
nicklas |
1153 |
InputStream in = null; |
6594 |
21 Feb 22 |
nicklas |
1154 |
OutputStream copy = null; |
4647 |
14 Dec 17 |
nicklas |
1155 |
VcfData vcf = null; |
4647 |
14 Dec 17 |
nicklas |
1156 |
try |
4647 |
14 Dec 17 |
nicklas |
1157 |
{ |
4647 |
14 Dec 17 |
nicklas |
1158 |
in = vcfFile.getDownloadStream(0); |
6594 |
21 Feb 22 |
nicklas |
1159 |
if (copyToBase && vcfFile.getLocation() != Location.PRIMARY && !vcfFile.isWriteProtected()) |
6594 |
21 Feb 22 |
nicklas |
1160 |
{ |
6594 |
21 Feb 22 |
nicklas |
1161 |
try |
6594 |
21 Feb 22 |
nicklas |
1162 |
{ |
6598 |
22 Feb 22 |
nicklas |
1163 |
copy = vcfFile.getUploadStream(false, false); |
6594 |
21 Feb 22 |
nicklas |
1164 |
in = new InputStreamSplitter(in, true, true, copy); |
6594 |
21 Feb 22 |
nicklas |
1165 |
} |
6594 |
21 Feb 22 |
nicklas |
1166 |
catch (Throwable t) |
6594 |
21 Feb 22 |
nicklas |
1167 |
{} // Ignore all error and just read the file as is |
6594 |
21 Feb 22 |
nicklas |
1168 |
} |
6597 |
22 Feb 22 |
nicklas |
1169 |
if (vcfFile.getName().endsWith(".gz")) in = new GZIPInputStream(in); |
6443 |
19 Oct 21 |
nicklas |
1170 |
vcf = parser.parse(in, name); |
4647 |
14 Dec 17 |
nicklas |
1171 |
vcf.setFileId(vcfFile.getId()); |
4647 |
14 Dec 17 |
nicklas |
1172 |
} |
4647 |
14 Dec 17 |
nicklas |
1173 |
catch (IOException ex) |
4647 |
14 Dec 17 |
nicklas |
1174 |
{ |
4647 |
14 Dec 17 |
nicklas |
1175 |
throw new RuntimeException(ex); |
4647 |
14 Dec 17 |
nicklas |
1176 |
} |
4647 |
14 Dec 17 |
nicklas |
1177 |
finally |
4647 |
14 Dec 17 |
nicklas |
1178 |
{ |
4647 |
14 Dec 17 |
nicklas |
1179 |
FileUtil.close(in); |
6594 |
21 Feb 22 |
nicklas |
1180 |
FileUtil.close(copy); |
4647 |
14 Dec 17 |
nicklas |
1181 |
} |
4647 |
14 Dec 17 |
nicklas |
1182 |
return vcf; |
4647 |
14 Dec 17 |
nicklas |
1183 |
} |
4647 |
14 Dec 17 |
nicklas |
1184 |
|
7071 |
20 Mar 23 |
nicklas |
1185 |
|
7071 |
20 Mar 23 |
nicklas |
1186 |
static class VcfLoaderCallable |
7071 |
20 Mar 23 |
nicklas |
1187 |
implements Callable<VcfLoaderCallable> |
7071 |
20 Mar 23 |
nicklas |
1188 |
{ |
7071 |
20 Mar 23 |
nicklas |
1189 |
|
7071 |
20 Mar 23 |
nicklas |
1190 |
final DerivedBioAssay alignment; |
7071 |
20 Mar 23 |
nicklas |
1191 |
final File vcfFile; |
7071 |
20 Mar 23 |
nicklas |
1192 |
final boolean copyToBase; |
7071 |
20 Mar 23 |
nicklas |
1193 |
final GenoTypeChecker checker; |
7071 |
20 Mar 23 |
nicklas |
1194 |
VcfData vcfData; |
7071 |
20 Mar 23 |
nicklas |
1195 |
|
7071 |
20 Mar 23 |
nicklas |
1196 |
VcfLoaderCallable(GenoTypeChecker checker, DerivedBioAssay alignment, File vcfFile, boolean copyToBase) |
7071 |
20 Mar 23 |
nicklas |
1197 |
{ |
7071 |
20 Mar 23 |
nicklas |
1198 |
this.checker = checker; |
7071 |
20 Mar 23 |
nicklas |
1199 |
this.alignment = alignment; |
7071 |
20 Mar 23 |
nicklas |
1200 |
this.vcfFile = vcfFile; |
7071 |
20 Mar 23 |
nicklas |
1201 |
this.copyToBase = copyToBase; |
7071 |
20 Mar 23 |
nicklas |
1202 |
} |
7071 |
20 Mar 23 |
nicklas |
1203 |
|
7071 |
20 Mar 23 |
nicklas |
1204 |
@Override |
7071 |
20 Mar 23 |
nicklas |
1205 |
public VcfLoaderCallable call() |
7071 |
20 Mar 23 |
nicklas |
1206 |
throws Exception |
7071 |
20 Mar 23 |
nicklas |
1207 |
{ |
7071 |
20 Mar 23 |
nicklas |
1208 |
vcfData = checker.loadVcf(vcfFile, alignment.getName()+"["+vcfFile.getName()+"]", copyToBase); |
7071 |
20 Mar 23 |
nicklas |
1209 |
return this; |
7071 |
20 Mar 23 |
nicklas |
1210 |
} |
7071 |
20 Mar 23 |
nicklas |
1211 |
|
7071 |
20 Mar 23 |
nicklas |
1212 |
} |
7210 |
29 May 23 |
nicklas |
1213 |
|
7210 |
29 May 23 |
nicklas |
1214 |
/** |
7210 |
29 May 23 |
nicklas |
A builder for generating queries that return derived bioassays that can be used by the |
7210 |
29 May 23 |
nicklas |
genotype checked. The default settings will return all derived bioassays that has |
7210 |
29 May 23 |
nicklas |
a qc_genotype.vcf file linked to them. Use the other methods if the builder to |
7210 |
29 May 23 |
nicklas |
further restrict which items to return. |
7210 |
29 May 23 |
nicklas |
1219 |
*/ |
7210 |
29 May 23 |
nicklas |
1220 |
public static class QueryBuilder |
7210 |
29 May 23 |
nicklas |
1221 |
{ |
7210 |
29 May 23 |
nicklas |
1222 |
private final DbControl dc; |
7210 |
29 May 23 |
nicklas |
1223 |
private final ItemQuery<DerivedBioAssay> query; |
7210 |
29 May 23 |
nicklas |
1224 |
|
7210 |
29 May 23 |
nicklas |
1225 |
QueryBuilder(DbControl dc, ItemQuery<DerivedBioAssay> query) |
7210 |
29 May 23 |
nicklas |
1226 |
{ |
7210 |
29 May 23 |
nicklas |
1227 |
this.dc = dc; |
7210 |
29 May 23 |
nicklas |
1228 |
this.query = query; |
7210 |
29 May 23 |
nicklas |
1229 |
query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT); |
7210 |
29 May 23 |
nicklas |
1230 |
query.join(Annotations.innerJoin(null, Annotationtype.PIPELINE.load(dc), "ppln")); |
7210 |
29 May 23 |
nicklas |
1231 |
withQcVcf(); |
7210 |
29 May 23 |
nicklas |
1232 |
} |
7210 |
29 May 23 |
nicklas |
1233 |
|
7210 |
29 May 23 |
nicklas |
1234 |
private QueryBuilder withQcVcf() |
7210 |
29 May 23 |
nicklas |
1235 |
{ |
7210 |
29 May 23 |
nicklas |
1236 |
query.restrict(AnyToAnyRestriction.exists("qc_genotype.vcf", Item.FILE)); |
7210 |
29 May 23 |
nicklas |
1237 |
return this; |
7210 |
29 May 23 |
nicklas |
1238 |
} |
7210 |
29 May 23 |
nicklas |
1239 |
|
7210 |
29 May 23 |
nicklas |
1240 |
/** |
7210 |
29 May 23 |
nicklas |
Filter the query to only return items with QC_GenotypeStatus=Checked |
7210 |
29 May 23 |
nicklas |
1242 |
*/ |
7210 |
29 May 23 |
nicklas |
1243 |
public QueryBuilder checked() |
7210 |
29 May 23 |
nicklas |
1244 |
{ |
7210 |
29 May 23 |
nicklas |
1245 |
query.join(Annotations.innerJoin(null, Annotationtype.QC_GENOTYPE_STATUS.load(dc), "qc")); |
7210 |
29 May 23 |
nicklas |
1246 |
query.restrict(Restrictions.eq(Hql.alias("qc"), Expressions.string("Checked"))); |
7210 |
29 May 23 |
nicklas |
1247 |
return this; |
7210 |
29 May 23 |
nicklas |
1248 |
} |
7210 |
29 May 23 |
nicklas |
1249 |
|
7210 |
29 May 23 |
nicklas |
1250 |
/** |
7210 |
29 May 23 |
nicklas |
Filter the query to only return items with QC_GenotypeStatus=any value (not null) |
7210 |
29 May 23 |
nicklas |
1252 |
*/ |
7210 |
29 May 23 |
nicklas |
1253 |
public QueryBuilder checked(boolean includeDisabled) |
7210 |
29 May 23 |
nicklas |
1254 |
{ |
7210 |
29 May 23 |
nicklas |
1255 |
query.join(Annotations.innerJoin(null, Annotationtype.QC_GENOTYPE_STATUS.load(dc), "qc")); |
7210 |
29 May 23 |
nicklas |
1256 |
if (!includeDisabled) |
7210 |
29 May 23 |
nicklas |
1257 |
{ |
7210 |
29 May 23 |
nicklas |
1258 |
query.restrict(Restrictions.eq(Hql.alias("qc"), Expressions.string("Checked"))); |
7210 |
29 May 23 |
nicklas |
1259 |
} |
7210 |
29 May 23 |
nicklas |
1260 |
return this; |
7210 |
29 May 23 |
nicklas |
1261 |
} |
7210 |
29 May 23 |
nicklas |
1262 |
|
7210 |
29 May 23 |
nicklas |
1263 |
/** |
7210 |
29 May 23 |
nicklas |
Filter the query to only return items without QC_GenotypeStatus annotation. |
7210 |
29 May 23 |
nicklas |
1265 |
*/ |
7210 |
29 May 23 |
nicklas |
1266 |
public QueryBuilder notChecked() |
7210 |
29 May 23 |
nicklas |
1267 |
{ |
7210 |
29 May 23 |
nicklas |
1268 |
query.join(Annotations.leftJoin(null, Annotationtype.QC_GENOTYPE_STATUS.load(dc), "qc")); |
7210 |
29 May 23 |
nicklas |
1269 |
query.restrict(Restrictions.eq(Hql.alias("qc"), null)); |
7210 |
29 May 23 |
nicklas |
1270 |
return this; |
7210 |
29 May 23 |
nicklas |
1271 |
} |
7210 |
29 May 23 |
nicklas |
1272 |
|
7210 |
29 May 23 |
nicklas |
1273 |
/** |
7210 |
29 May 23 |
nicklas |
Filter the query to only return items with the given pipeline. If |
7210 |
29 May 23 |
nicklas |
the pipeline is null, the query is not modified. |
7210 |
29 May 23 |
nicklas |
1276 |
*/ |
7210 |
29 May 23 |
nicklas |
1277 |
public QueryBuilder pipeline(Pipeline pipeline) |
7210 |
29 May 23 |
nicklas |
1278 |
{ |
7210 |
29 May 23 |
nicklas |
1279 |
if (pipeline != null) query.restrict(pipeline.restriction("ppln")); |
7210 |
29 May 23 |
nicklas |
1280 |
return this; |
7210 |
29 May 23 |
nicklas |
1281 |
} |
7210 |
29 May 23 |
nicklas |
1282 |
|
7210 |
29 May 23 |
nicklas |
1283 |
/** |
7210 |
29 May 23 |
nicklas |
Filter the query to only return items with the given subtype. If |
7210 |
29 May 23 |
nicklas |
the subtype is null, the query is not modified. |
7210 |
29 May 23 |
nicklas |
1286 |
*/ |
7210 |
29 May 23 |
nicklas |
1287 |
public QueryBuilder subtype(Subtype subtype) |
7210 |
29 May 23 |
nicklas |
1288 |
{ |
7210 |
29 May 23 |
nicklas |
1289 |
if (subtype != null) query.restrict(subtype.restriction(dc, null)); |
7210 |
29 May 23 |
nicklas |
1290 |
return this; |
7210 |
29 May 23 |
nicklas |
1291 |
} |
7210 |
29 May 23 |
nicklas |
1292 |
|
7210 |
29 May 23 |
nicklas |
1293 |
/** |
7210 |
29 May 23 |
nicklas |
Filter the query to only return tumor items that can be used |
7210 |
29 May 23 |
nicklas |
by the genotype checker. |
7210 |
29 May 23 |
nicklas |
Subtype=AlignedSequences and Pipeline=RNAseq/Hisat/Stringtie |
7210 |
29 May 23 |
nicklas |
1297 |
*/ |
7217 |
30 May 23 |
nicklas |
1298 |
public QueryBuilder tumors(Restriction extraFilter) |
7210 |
29 May 23 |
nicklas |
1299 |
{ |
7210 |
29 May 23 |
nicklas |
1300 |
Subtype.ALIGNED_SEQUENCES.addFilter(dc, query); |
7210 |
29 May 23 |
nicklas |
1301 |
query.restrict(Pipeline.RNASEQ_HISAT_STRINGTIE.restriction("ppln")); |
7217 |
30 May 23 |
nicklas |
1302 |
return filter(extraFilter); |
7210 |
29 May 23 |
nicklas |
1303 |
} |
7210 |
29 May 23 |
nicklas |
1304 |
|
7210 |
29 May 23 |
nicklas |
1305 |
/** |
7210 |
29 May 23 |
nicklas |
Filter the query to only return normal items that can be used |
7210 |
29 May 23 |
nicklas |
by the genotype checker. |
7210 |
29 May 23 |
nicklas |
Subtype=GenotypeCall and Pipeline=DNA/Genotyping |
7210 |
29 May 23 |
nicklas |
or Subtype=AlignedSequences and Pipeline=DNA/Normal/WGS |
7210 |
29 May 23 |
nicklas |
1310 |
*/ |
7217 |
30 May 23 |
nicklas |
1311 |
public QueryBuilder normals(Restriction extraFilter) |
7210 |
29 May 23 |
nicklas |
1312 |
{ |
7210 |
29 May 23 |
nicklas |
1313 |
query.restrict(Restrictions.or( |
7210 |
29 May 23 |
nicklas |
1314 |
Restrictions.and( |
7210 |
29 May 23 |
nicklas |
1315 |
Subtype.GENOTYPE_CALL.restriction(dc, null), |
7210 |
29 May 23 |
nicklas |
1316 |
Pipeline.DNA_GENOTYPING.restriction("ppln") |
7210 |
29 May 23 |
nicklas |
1317 |
), |
7210 |
29 May 23 |
nicklas |
1318 |
Restrictions.and( |
7210 |
29 May 23 |
nicklas |
1319 |
Subtype.ALIGNED_SEQUENCES.restriction(dc, null), |
7210 |
29 May 23 |
nicklas |
1320 |
Pipeline.DNA_NORMAL_WGS.restriction("ppln") |
7210 |
29 May 23 |
nicklas |
1321 |
) |
7210 |
29 May 23 |
nicklas |
1322 |
)); |
7210 |
29 May 23 |
nicklas |
1323 |
return this; |
7210 |
29 May 23 |
nicklas |
1324 |
} |
7210 |
29 May 23 |
nicklas |
1325 |
|
7210 |
29 May 23 |
nicklas |
1326 |
/** |
7217 |
30 May 23 |
nicklas |
Additional filter to apply to the query. |
7217 |
30 May 23 |
nicklas |
1328 |
*/ |
7217 |
30 May 23 |
nicklas |
1329 |
public QueryBuilder filter(Restriction filter) |
7217 |
30 May 23 |
nicklas |
1330 |
{ |
7217 |
30 May 23 |
nicklas |
1331 |
if (filter != null) query.restrict(filter); |
7217 |
30 May 23 |
nicklas |
1332 |
return this; |
7217 |
30 May 23 |
nicklas |
1333 |
} |
7217 |
30 May 23 |
nicklas |
1334 |
|
7217 |
30 May 23 |
nicklas |
1335 |
/** |
7210 |
29 May 23 |
nicklas |
Return the query. |
7210 |
29 May 23 |
nicklas |
1337 |
*/ |
7210 |
29 May 23 |
nicklas |
1338 |
public ItemQuery<DerivedBioAssay> query() |
7210 |
29 May 23 |
nicklas |
1339 |
{ |
7210 |
29 May 23 |
nicklas |
1340 |
return query; |
7210 |
29 May 23 |
nicklas |
1341 |
} |
7210 |
29 May 23 |
nicklas |
1342 |
} |
7210 |
29 May 23 |
nicklas |
1343 |
|
4647 |
14 Dec 17 |
nicklas |
1344 |
} |