7119 |
19 Apr 23 |
nicklas |
1 |
package net.sf.basedb.reggie.autoconfirm; |
7119 |
19 Apr 23 |
nicklas |
2 |
|
7119 |
19 Apr 23 |
nicklas |
3 |
|
7119 |
19 Apr 23 |
nicklas |
4 |
import org.slf4j.LoggerFactory; |
7119 |
19 Apr 23 |
nicklas |
5 |
|
7119 |
19 Apr 23 |
nicklas |
6 |
import net.sf.basedb.core.DbControl; |
7119 |
19 Apr 23 |
nicklas |
7 |
import net.sf.basedb.core.DerivedBioAssay; |
7119 |
19 Apr 23 |
nicklas |
8 |
import net.sf.basedb.core.Job; |
7119 |
19 Apr 23 |
nicklas |
9 |
import net.sf.basedb.core.SessionControl; |
7119 |
19 Apr 23 |
nicklas |
10 |
import net.sf.basedb.reggie.activity.ActivityDef; |
7119 |
19 Apr 23 |
nicklas |
11 |
import net.sf.basedb.reggie.dao.AlignedSequences; |
7119 |
19 Apr 23 |
nicklas |
12 |
import net.sf.basedb.reggie.dao.Annotationtype; |
7294 |
23 Aug 23 |
nicklas |
13 |
import net.sf.basedb.reggie.dao.BiomaterialList; |
7294 |
23 Aug 23 |
nicklas |
14 |
import net.sf.basedb.reggie.dao.Pipeline; |
7119 |
19 Apr 23 |
nicklas |
15 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7119 |
19 Apr 23 |
nicklas |
16 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
7119 |
19 Apr 23 |
nicklas |
17 |
|
7119 |
19 Apr 23 |
nicklas |
18 |
/** |
7119 |
19 Apr 23 |
nicklas |
Auto-confirm implementation for sequences aligned with BwaMem2. |
7119 |
19 Apr 23 |
nicklas |
The rules are: |
7119 |
19 Apr 23 |
nicklas |
21 |
|
7119 |
19 Apr 23 |
nicklas |
* {@link DerivedBioAssay#getJob()} job status must be {@link net.sf.basedb.core.Job.Status#DONE}. |
7119 |
19 Apr 23 |
nicklas |
* Number of aligned pairs must be over 400 millions. |
7119 |
19 Apr 23 |
nicklas |
* HET percentage must be below 65% |
7119 |
19 Apr 23 |
nicklas |
* Duplication percentage must be below 30% |
7132 |
24 Apr 23 |
nicklas |
* Mean coverage must be over 25 |
7119 |
19 Apr 23 |
nicklas |
27 |
|
7119 |
19 Apr 23 |
nicklas |
@author nicklas |
7119 |
19 Apr 23 |
nicklas |
@since 4.46 |
7119 |
19 Apr 23 |
nicklas |
30 |
*/ |
7119 |
19 Apr 23 |
nicklas |
31 |
public class BwaMem2AutoConfirmer |
7119 |
19 Apr 23 |
nicklas |
32 |
extends AutoConfirmer<DerivedBioAssay> |
7119 |
19 Apr 23 |
nicklas |
33 |
{ |
7119 |
19 Apr 23 |
nicklas |
34 |
|
7119 |
19 Apr 23 |
nicklas |
35 |
private static final ExtensionsLogger logger = |
7119 |
19 Apr 23 |
nicklas |
36 |
ExtensionsLog.getLogger(AutoConfirmService.ID, true).wrap(LoggerFactory.getLogger(BwaMem2AutoConfirmer.class)); |
7119 |
19 Apr 23 |
nicklas |
37 |
|
7119 |
19 Apr 23 |
nicklas |
38 |
|
7119 |
19 Apr 23 |
nicklas |
39 |
/** |
7119 |
19 Apr 23 |
nicklas |
At least 400 million aligned pairs is required for |
7119 |
19 Apr 23 |
nicklas |
alignment to be auto-confirmed. |
7119 |
19 Apr 23 |
nicklas |
42 |
*/ |
7119 |
19 Apr 23 |
nicklas |
43 |
public static final long MIN_ALIGNED_PAIRS = 400*M; |
7119 |
19 Apr 23 |
nicklas |
44 |
|
7119 |
19 Apr 23 |
nicklas |
45 |
/** |
7119 |
19 Apr 23 |
nicklas |
At most 30 percent duplication rate. |
7119 |
19 Apr 23 |
nicklas |
47 |
*/ |
7119 |
19 Apr 23 |
nicklas |
48 |
public static final float MAX_FRACTION_DUPLICATION = 0.3f; |
7119 |
19 Apr 23 |
nicklas |
49 |
|
7119 |
19 Apr 23 |
nicklas |
50 |
/** |
7119 |
19 Apr 23 |
nicklas |
At least 20 in mean coverage. |
7119 |
19 Apr 23 |
nicklas |
52 |
*/ |
7132 |
24 Apr 23 |
nicklas |
53 |
public static final float MIN_MEAN_COVERAGE = 25f; |
7119 |
19 Apr 23 |
nicklas |
54 |
|
7119 |
19 Apr 23 |
nicklas |
55 |
/** |
7119 |
19 Apr 23 |
nicklas |
A higher HET percentage may indicate contamination. |
7119 |
19 Apr 23 |
nicklas |
57 |
*/ |
7119 |
19 Apr 23 |
nicklas |
58 |
public static final float MAX_HET_PERCENTAGE = 65f; |
7119 |
19 Apr 23 |
nicklas |
59 |
|
7119 |
19 Apr 23 |
nicklas |
60 |
|
7119 |
19 Apr 23 |
nicklas |
61 |
public BwaMem2AutoConfirmer(DerivedBioAssay aligned) |
7119 |
19 Apr 23 |
nicklas |
62 |
{ |
7119 |
19 Apr 23 |
nicklas |
63 |
super(aligned, aligned.getJob()); |
7119 |
19 Apr 23 |
nicklas |
64 |
} |
7119 |
19 Apr 23 |
nicklas |
65 |
|
7119 |
19 Apr 23 |
nicklas |
66 |
/** |
7119 |
19 Apr 23 |
nicklas |
Passes if the job ended successfully and HET percentage is below the |
7119 |
19 Apr 23 |
nicklas |
limit. |
7119 |
19 Apr 23 |
nicklas |
69 |
*/ |
7119 |
19 Apr 23 |
nicklas |
70 |
@Override |
7119 |
19 Apr 23 |
nicklas |
71 |
public boolean checkRules(DbControl dc, AutoConfirmManager manager) |
7119 |
19 Apr 23 |
nicklas |
72 |
{ |
7119 |
19 Apr 23 |
nicklas |
73 |
DerivedBioAssay aligned = item(dc); |
7119 |
19 Apr 23 |
nicklas |
74 |
Job job = aligned.getJob(); |
7119 |
19 Apr 23 |
nicklas |
75 |
|
7119 |
19 Apr 23 |
nicklas |
76 |
if (job.getStatus() != Job.Status.DONE) return false; |
7119 |
19 Apr 23 |
nicklas |
77 |
boolean debug = Boolean.TRUE.equals(job.getParameterValue("debug")); |
7119 |
19 Apr 23 |
nicklas |
78 |
if (debug) return true; // All numbers will be messed up so we continue in all cases |
7119 |
19 Apr 23 |
nicklas |
79 |
|
7119 |
19 Apr 23 |
nicklas |
80 |
Long alignedPairs = (Long)Annotationtype.ALIGNED_PAIRS.getAnnotationValue(dc, aligned); |
7119 |
19 Apr 23 |
nicklas |
81 |
Float hetPercentage = (Float)Annotationtype.QC_GENOTYPE_HET_PCT.getAnnotationValue(dc, aligned); |
7119 |
19 Apr 23 |
nicklas |
82 |
Float duplicates = (Float)Annotationtype.FRACTION_DUPLICATION.getAnnotationValue(dc, aligned); |
7119 |
19 Apr 23 |
nicklas |
83 |
Float meanCoverage = (Float)Annotationtype.MEAN_COVERAGE.getAnnotationValue(dc, aligned); |
7119 |
19 Apr 23 |
nicklas |
84 |
|
7119 |
19 Apr 23 |
nicklas |
85 |
if (alignedPairs == null || alignedPairs < MIN_ALIGNED_PAIRS) return false; |
7119 |
19 Apr 23 |
nicklas |
86 |
if (hetPercentage == null || hetPercentage > MAX_HET_PERCENTAGE) return false; |
7119 |
19 Apr 23 |
nicklas |
87 |
if (duplicates == null || duplicates > MAX_FRACTION_DUPLICATION) return false; |
7119 |
19 Apr 23 |
nicklas |
88 |
if (meanCoverage == null || meanCoverage < MIN_MEAN_COVERAGE) return false; |
7119 |
19 Apr 23 |
nicklas |
89 |
|
7119 |
19 Apr 23 |
nicklas |
90 |
return true; |
7119 |
19 Apr 23 |
nicklas |
91 |
} |
7119 |
19 Apr 23 |
nicklas |
92 |
|
7119 |
19 Apr 23 |
nicklas |
93 |
/** |
7119 |
19 Apr 23 |
nicklas |
If we get here, the result should be accepted |
7119 |
19 Apr 23 |
nicklas |
95 |
*/ |
7119 |
19 Apr 23 |
nicklas |
96 |
@Override |
7119 |
19 Apr 23 |
nicklas |
97 |
public boolean autoConfirm(DbControl dc, AutoConfirmManager manager) |
7119 |
19 Apr 23 |
nicklas |
98 |
{ |
7119 |
19 Apr 23 |
nicklas |
99 |
DerivedBioAssay aligned = item(dc); |
7119 |
19 Apr 23 |
nicklas |
100 |
Annotationtype.ANALYSIS_RESULT.setAnnotationValue(dc, aligned, AlignedSequences.ALIGN_SUCCESSFUL); |
7119 |
19 Apr 23 |
nicklas |
101 |
ActivityDef.BWA_MEM2_AUTOCONFIRMED.merge(dc, 1).setUser("Auto-confirm"); |
7294 |
23 Aug 23 |
nicklas |
// Pipelines for further processing |
7294 |
23 Aug 23 |
nicklas |
// If it is a tumor item add it to the ASCAT pipeline |
7294 |
23 Aug 23 |
nicklas |
// but note that ASCAT also requires a paired normal so we can't auto-start it |
7294 |
23 Aug 23 |
nicklas |
105 |
Pipeline pipeline = Pipeline.getByName((String)Annotationtype.PIPELINE.getAnnotationValue(dc, aligned)); |
7294 |
23 Aug 23 |
nicklas |
106 |
if (pipeline == Pipeline.DNA_TUMOR_WGS) |
7294 |
23 Aug 23 |
nicklas |
107 |
{ |
7294 |
23 Aug 23 |
nicklas |
108 |
BiomaterialList.ASCAT_PIPELINE.load(dc).add(aligned); |
7398 |
07 Nov 23 |
nicklas |
109 |
BiomaterialList.WGS_VARIANT_CALLING_PIPELINE.load(dc).add(aligned); |
7294 |
23 Aug 23 |
nicklas |
110 |
} |
7119 |
19 Apr 23 |
nicklas |
111 |
return true; |
7119 |
19 Apr 23 |
nicklas |
112 |
} |
7119 |
19 Apr 23 |
nicklas |
113 |
|
7119 |
19 Apr 23 |
nicklas |
114 |
/** |
7119 |
19 Apr 23 |
nicklas |
There is nothing to do (yet). |
7119 |
19 Apr 23 |
nicklas |
116 |
*/ |
7119 |
19 Apr 23 |
nicklas |
117 |
@Override |
7119 |
19 Apr 23 |
nicklas |
118 |
public boolean startNextStep(SessionControl sc, AutoConfirmManager manager) |
7119 |
19 Apr 23 |
nicklas |
119 |
{ |
7119 |
19 Apr 23 |
nicklas |
120 |
return false; |
7119 |
19 Apr 23 |
nicklas |
121 |
} |
7119 |
19 Apr 23 |
nicklas |
122 |
} |