6185 |
26 Mar 21 |
nicklas |
1 |
package net.sf.basedb.reggie.autoconfirm; |
6185 |
26 Mar 21 |
nicklas |
2 |
|
6185 |
26 Mar 21 |
nicklas |
3 |
import java.util.Collections; |
6185 |
26 Mar 21 |
nicklas |
4 |
import java.util.List; |
6185 |
26 Mar 21 |
nicklas |
5 |
|
6185 |
26 Mar 21 |
nicklas |
6 |
import org.slf4j.LoggerFactory; |
6185 |
26 Mar 21 |
nicklas |
7 |
|
6185 |
26 Mar 21 |
nicklas |
8 |
import net.sf.basedb.core.DbControl; |
6185 |
26 Mar 21 |
nicklas |
9 |
import net.sf.basedb.core.DerivedBioAssay; |
6185 |
26 Mar 21 |
nicklas |
10 |
import net.sf.basedb.core.ItemList; |
6185 |
26 Mar 21 |
nicklas |
11 |
import net.sf.basedb.core.Job; |
6185 |
26 Mar 21 |
nicklas |
12 |
import net.sf.basedb.core.SessionControl; |
6185 |
26 Mar 21 |
nicklas |
13 |
import net.sf.basedb.opengrid.OpenGridCluster; |
6674 |
11 Apr 22 |
nicklas |
14 |
import net.sf.basedb.opengrid.config.BatchConfig; |
6185 |
26 Mar 21 |
nicklas |
15 |
import net.sf.basedb.opengrid.service.OpenGridService; |
6185 |
26 Mar 21 |
nicklas |
16 |
import net.sf.basedb.reggie.activity.ActivityDef; |
6185 |
26 Mar 21 |
nicklas |
17 |
import net.sf.basedb.reggie.dao.Annotationtype; |
6185 |
26 Mar 21 |
nicklas |
18 |
import net.sf.basedb.reggie.dao.BiomaterialList; |
6185 |
26 Mar 21 |
nicklas |
19 |
import net.sf.basedb.reggie.dao.MergedSequences; |
6637 |
09 Mar 22 |
nicklas |
20 |
import net.sf.basedb.reggie.grid.ScriptUtil; |
7077 |
27 Mar 23 |
nicklas |
21 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7077 |
27 Mar 23 |
nicklas |
22 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
6185 |
26 Mar 21 |
nicklas |
23 |
|
6185 |
26 Mar 21 |
nicklas |
24 |
/** |
6185 |
26 Mar 21 |
nicklas |
Auto-confirm implementation for sequences imported from FASTQ files. |
6185 |
26 Mar 21 |
nicklas |
The rules are simple: |
6185 |
26 Mar 21 |
nicklas |
27 |
|
6185 |
26 Mar 21 |
nicklas |
The rules are: |
6185 |
26 Mar 21 |
nicklas |
29 |
|
6185 |
26 Mar 21 |
nicklas |
* {@link Job#getStatus()} job status must be {@link net.sf.basedb.core.Job.Status#DONE}. |
6185 |
26 Mar 21 |
nicklas |
* {@link Annotationtype#PT_READS} must be >10M |
6185 |
26 Mar 21 |
nicklas |
* {@link Annotationtype#FRAGMENT_SIZE_AVG} and {@link Annotationtype#FRAGMENT_SIZE_STDEV} |
6185 |
26 Mar 21 |
nicklas |
must be >0 |
6185 |
26 Mar 21 |
nicklas |
34 |
|
6185 |
26 Mar 21 |
nicklas |
Auto-confirmation will then continue with the Legacy pipeline and Hisat alignment. |
6185 |
26 Mar 21 |
nicklas |
36 |
|
6185 |
26 Mar 21 |
nicklas |
@author nicklas |
6185 |
26 Mar 21 |
nicklas |
@since 4.32 |
6185 |
26 Mar 21 |
nicklas |
39 |
*/ |
6185 |
26 Mar 21 |
nicklas |
40 |
public class FastqImportAutoConfirmer |
6185 |
26 Mar 21 |
nicklas |
41 |
extends AutoConfirmer<DerivedBioAssay> |
6185 |
26 Mar 21 |
nicklas |
42 |
{ |
6185 |
26 Mar 21 |
nicklas |
43 |
|
7077 |
27 Mar 23 |
nicklas |
44 |
private static final ExtensionsLogger logger = |
7077 |
27 Mar 23 |
nicklas |
45 |
ExtensionsLog.getLogger(AutoConfirmService.ID, true).wrap(LoggerFactory.getLogger(FastqImportAutoConfirmer.class)); |
6185 |
26 Mar 21 |
nicklas |
46 |
|
6185 |
26 Mar 21 |
nicklas |
47 |
public FastqImportAutoConfirmer(DerivedBioAssay aligned) |
6185 |
26 Mar 21 |
nicklas |
48 |
{ |
6185 |
26 Mar 21 |
nicklas |
49 |
super(aligned, aligned.getJob()); |
6185 |
26 Mar 21 |
nicklas |
50 |
} |
6185 |
26 Mar 21 |
nicklas |
51 |
|
6185 |
26 Mar 21 |
nicklas |
52 |
/** |
6185 |
26 Mar 21 |
nicklas |
Passes if the job ended successfully and PT_READS and fragment size values are |
6185 |
26 Mar 21 |
nicklas |
within limits. |
6185 |
26 Mar 21 |
nicklas |
55 |
*/ |
6185 |
26 Mar 21 |
nicklas |
56 |
@Override |
6185 |
26 Mar 21 |
nicklas |
57 |
public boolean checkRules(DbControl dc, AutoConfirmManager manager) |
6185 |
26 Mar 21 |
nicklas |
58 |
{ |
6185 |
26 Mar 21 |
nicklas |
59 |
DerivedBioAssay merged = item(dc); |
6185 |
26 Mar 21 |
nicklas |
60 |
Job job = merged.getJob(); |
6185 |
26 Mar 21 |
nicklas |
61 |
|
6185 |
26 Mar 21 |
nicklas |
62 |
if (job.getStatus() != Job.Status.DONE) return false; |
6185 |
26 Mar 21 |
nicklas |
63 |
|
6185 |
26 Mar 21 |
nicklas |
64 |
boolean debug = Boolean.TRUE.equals(job.getParameterValue("debug")); |
6185 |
26 Mar 21 |
nicklas |
65 |
long minPtReads = debug ? DemuxAutoConfirmer.MIN_PT_READS_DEBUG : DemuxAutoConfirmer.MIN_PT_READS; |
6185 |
26 Mar 21 |
nicklas |
66 |
|
6185 |
26 Mar 21 |
nicklas |
67 |
Long ptReads = (Long)Annotationtype.PT_READS.getAnnotationValue(dc, merged); |
6185 |
26 Mar 21 |
nicklas |
68 |
Integer fragmentSizeAvg = (Integer)Annotationtype.FRAGMENT_SIZE_AVG.getAnnotationValue(dc, merged); |
6185 |
26 Mar 21 |
nicklas |
69 |
Integer fragmentSizeStd = (Integer)Annotationtype.FRAGMENT_SIZE_STDEV.getAnnotationValue(dc, merged); |
6185 |
26 Mar 21 |
nicklas |
70 |
|
6185 |
26 Mar 21 |
nicklas |
71 |
boolean passes = ptReads != null && fragmentSizeAvg != null && fragmentSizeStd != null; |
6185 |
26 Mar 21 |
nicklas |
72 |
passes &= ptReads > minPtReads; |
6185 |
26 Mar 21 |
nicklas |
73 |
passes &= fragmentSizeAvg > 0; |
6185 |
26 Mar 21 |
nicklas |
74 |
passes &= fragmentSizeStd > 0; |
6185 |
26 Mar 21 |
nicklas |
75 |
|
6185 |
26 Mar 21 |
nicklas |
76 |
return passes; |
6185 |
26 Mar 21 |
nicklas |
77 |
} |
6185 |
26 Mar 21 |
nicklas |
78 |
|
6185 |
26 Mar 21 |
nicklas |
79 |
/** |
6185 |
26 Mar 21 |
nicklas |
Auto-confirm and add to next steps in the pipeline. |
6185 |
26 Mar 21 |
nicklas |
81 |
*/ |
6185 |
26 Mar 21 |
nicklas |
82 |
@Override |
6185 |
26 Mar 21 |
nicklas |
83 |
public boolean autoConfirm(DbControl dc, AutoConfirmManager manager) |
6185 |
26 Mar 21 |
nicklas |
84 |
{ |
6185 |
26 Mar 21 |
nicklas |
85 |
DerivedBioAssay merged = item(dc); |
6185 |
26 Mar 21 |
nicklas |
86 |
|
6185 |
26 Mar 21 |
nicklas |
87 |
Annotationtype.ANALYSIS_RESULT.setAnnotationValue(dc, merged, MergedSequences.MERGE_SUCCESSFUL); |
6185 |
26 Mar 21 |
nicklas |
// Add to pipeline processing lists |
6185 |
26 Mar 21 |
nicklas |
// Not really needed for auto-confirm, but in case startNextStep() fails |
6185 |
26 Mar 21 |
nicklas |
// we want the bioassays to show up in the manual lists |
6185 |
26 Mar 21 |
nicklas |
91 |
BiomaterialList.LEGACY_PIPELINE.load(dc).add(merged); |
6185 |
26 Mar 21 |
nicklas |
92 |
BiomaterialList.HISAT_PIPELINE.load(dc).add(merged); |
6811 |
25 Aug 22 |
nicklas |
93 |
BiomaterialList.HISAT_2023_PIPELINE.load(dc).add(merged); |
6809 |
24 Aug 22 |
nicklas |
94 |
|
6185 |
26 Mar 21 |
nicklas |
95 |
ActivityDef.FASTQ_IMPORT_AUTOCONFIRMED.merge(dc, 1).setUser("Auto-confirm"); |
6185 |
26 Mar 21 |
nicklas |
96 |
return true; |
6185 |
26 Mar 21 |
nicklas |
97 |
} |
6185 |
26 Mar 21 |
nicklas |
98 |
|
6185 |
26 Mar 21 |
nicklas |
99 |
/** |
6185 |
26 Mar 21 |
nicklas |
Schedule Legacy pipeline and Hisat alignment to run on the cluster. |
6185 |
26 Mar 21 |
nicklas |
101 |
*/ |
6185 |
26 Mar 21 |
nicklas |
102 |
@Override |
6185 |
26 Mar 21 |
nicklas |
103 |
public boolean startNextStep(SessionControl sc, AutoConfirmManager manager) |
6185 |
26 Mar 21 |
nicklas |
104 |
{ |
6185 |
26 Mar 21 |
nicklas |
105 |
DbControl dc = null; |
6185 |
26 Mar 21 |
nicklas |
106 |
try |
6185 |
26 Mar 21 |
nicklas |
107 |
{ |
6599 |
22 Feb 22 |
nicklas |
108 |
dc = sc.newDbControl("Reggie: Auto-confirm FASTQ import"); |
6185 |
26 Mar 21 |
nicklas |
109 |
|
6185 |
26 Mar 21 |
nicklas |
110 |
DerivedBioAssay merged = item(dc); |
6185 |
26 Mar 21 |
nicklas |
111 |
Job job = merged.getJob(); |
6185 |
26 Mar 21 |
nicklas |
112 |
|
6185 |
26 Mar 21 |
nicklas |
113 |
boolean debug = Boolean.TRUE.equals(job.getParameterValue("debug")); |
6185 |
26 Mar 21 |
nicklas |
114 |
Integer priority = (Integer)job.getParameterValue("priority"); |
6982 |
17 Jan 23 |
nicklas |
115 |
String partition = job.getParameterValue("partition"); |
6185 |
26 Mar 21 |
nicklas |
116 |
|
6185 |
26 Mar 21 |
nicklas |
117 |
String clusterId = job.getServer(); |
6185 |
26 Mar 21 |
nicklas |
118 |
OpenGridCluster cluster = OpenGridService.getInstance().getClusterById(dc, clusterId); |
6185 |
26 Mar 21 |
nicklas |
119 |
if (cluster == null) |
6185 |
26 Mar 21 |
nicklas |
120 |
{ |
6185 |
26 Mar 21 |
nicklas |
// If we don't have required items, abort and revert to manual start |
6185 |
26 Mar 21 |
nicklas |
122 |
return false; |
6185 |
26 Mar 21 |
nicklas |
123 |
} |
6185 |
26 Mar 21 |
nicklas |
124 |
|
6185 |
26 Mar 21 |
nicklas |
// Pipelines for further processing |
6185 |
26 Mar 21 |
nicklas |
126 |
ItemList hisatPipeline = BiomaterialList.HISAT_PIPELINE.load(dc); |
6185 |
26 Mar 21 |
nicklas |
127 |
boolean hisatDisabled = "Disable".equals(Annotationtype.AUTO_PROCESSING.getAnnotationValue(dc, hisatPipeline)); |
6811 |
25 Aug 22 |
nicklas |
128 |
ItemList hisat2023Pipeline = BiomaterialList.HISAT_2023_PIPELINE.load(dc); |
6809 |
24 Aug 22 |
nicklas |
129 |
boolean hisat2023Disabled = "Disable".equals(Annotationtype.AUTO_PROCESSING.getAnnotationValue(dc, hisat2023Pipeline)); |
6185 |
26 Mar 21 |
nicklas |
130 |
ItemList legacyPipeline = BiomaterialList.LEGACY_PIPELINE.load(dc); |
6185 |
26 Mar 21 |
nicklas |
131 |
boolean legacyDisabled = "Disable".equals(Annotationtype.AUTO_PROCESSING.getAnnotationValue(dc, legacyPipeline)); |
6185 |
26 Mar 21 |
nicklas |
132 |
|
6809 |
24 Aug 22 |
nicklas |
133 |
if (legacyDisabled && hisatDisabled && hisat2023Disabled) return false; |
6185 |
26 Mar 21 |
nicklas |
134 |
|
6185 |
26 Mar 21 |
nicklas |
135 |
List<MergedSequences> mergedSequences = Collections.singletonList(MergedSequences.getById(dc, merged.getId())); |
6185 |
26 Mar 21 |
nicklas |
136 |
dc.close(); |
6185 |
26 Mar 21 |
nicklas |
137 |
|
6674 |
11 Apr 22 |
nicklas |
138 |
BatchConfig batchConfig = new BatchConfig(); |
6636 |
09 Mar 22 |
nicklas |
139 |
if (!hisatDisabled) |
6185 |
26 Mar 21 |
nicklas |
140 |
{ |
6636 |
09 Mar 22 |
nicklas |
// Create Hisat jobs |
6185 |
26 Mar 21 |
nicklas |
142 |
try |
6185 |
26 Mar 21 |
nicklas |
143 |
{ |
6636 |
09 Mar 22 |
nicklas |
144 |
dc = sc.newDbControl("Reggie: Auto-confirm FASTQ import - start Hisat pipeline"); |
6637 |
09 Mar 22 |
nicklas |
// Select a cluster that has been configured for Hisat |
6637 |
09 Mar 22 |
nicklas |
146 |
OpenGridCluster hisatCluster = ScriptUtil.autoSelectClusterWithConfig(dc, "align-hisat", cluster); |
6982 |
17 Jan 23 |
nicklas |
147 |
DemuxAutoConfirmer.submitHisatJobs(dc, mergedSequences, hisatCluster, batchConfig, debug, priority, cluster == hisatCluster ? partition : null); |
6185 |
26 Mar 21 |
nicklas |
148 |
dc.commit(); |
6185 |
26 Mar 21 |
nicklas |
149 |
} |
6185 |
26 Mar 21 |
nicklas |
150 |
catch (RuntimeException ex) |
6185 |
26 Mar 21 |
nicklas |
151 |
{ |
6636 |
09 Mar 22 |
nicklas |
152 |
logger.error("Could not create Hisat jobs", ex); |
6185 |
26 Mar 21 |
nicklas |
153 |
} |
6185 |
26 Mar 21 |
nicklas |
154 |
} |
6809 |
24 Aug 22 |
nicklas |
155 |
if (!hisat2023Disabled) |
6809 |
24 Aug 22 |
nicklas |
156 |
{ |
6809 |
24 Aug 22 |
nicklas |
// Create Hisat2023 jobs |
6809 |
24 Aug 22 |
nicklas |
158 |
try |
6809 |
24 Aug 22 |
nicklas |
159 |
{ |
6809 |
24 Aug 22 |
nicklas |
160 |
dc = sc.newDbControl("Reggie: Auto-confirm FASTQ import - start Hisat/2023 pipeline"); |
6809 |
24 Aug 22 |
nicklas |
// Select a cluster that has been configured for Hisat |
6811 |
25 Aug 22 |
nicklas |
162 |
OpenGridCluster hisatCluster = ScriptUtil.autoSelectClusterWithConfig(dc, "align-hisat-2023", cluster); |
6982 |
17 Jan 23 |
nicklas |
163 |
DemuxAutoConfirmer.submitHisat2023Jobs(dc, mergedSequences, hisatCluster, batchConfig, debug, priority, cluster == hisatCluster ? partition : null); |
6809 |
24 Aug 22 |
nicklas |
164 |
dc.commit(); |
6809 |
24 Aug 22 |
nicklas |
165 |
} |
6809 |
24 Aug 22 |
nicklas |
166 |
catch (RuntimeException ex) |
6809 |
24 Aug 22 |
nicklas |
167 |
{ |
6809 |
24 Aug 22 |
nicklas |
168 |
logger.error("Could not create Hisat/2023 jobs", ex); |
6809 |
24 Aug 22 |
nicklas |
169 |
} |
6809 |
24 Aug 22 |
nicklas |
170 |
} |
6636 |
09 Mar 22 |
nicklas |
// Create Tophat/Cufflinks jobs |
6636 |
09 Mar 22 |
nicklas |
// We use separate transactions so that if one pipeline can fail without affecting the other |
6636 |
09 Mar 22 |
nicklas |
173 |
if (!legacyDisabled) |
6185 |
26 Mar 21 |
nicklas |
174 |
{ |
6185 |
26 Mar 21 |
nicklas |
175 |
try |
6185 |
26 Mar 21 |
nicklas |
176 |
{ |
6636 |
09 Mar 22 |
nicklas |
177 |
dc = sc.newDbControl("Reggie: Auto-confirm FASTQ import - start Legacy pipeline"); |
6637 |
09 Mar 22 |
nicklas |
178 |
OpenGridCluster legacyCluster = ScriptUtil.autoSelectClusterWithConfig(dc, "cufflinks", cluster); |
6982 |
17 Jan 23 |
nicklas |
179 |
DemuxAutoConfirmer.submitLegacyJobs(dc, mergedSequences, legacyCluster, batchConfig, debug, -1023, cluster == legacyCluster ? partition : null); |
6185 |
26 Mar 21 |
nicklas |
180 |
dc.commit(); |
6185 |
26 Mar 21 |
nicklas |
181 |
} |
6185 |
26 Mar 21 |
nicklas |
182 |
catch (RuntimeException ex) |
6185 |
26 Mar 21 |
nicklas |
183 |
{ |
6636 |
09 Mar 22 |
nicklas |
184 |
logger.error("Could not create Tophat/Cufflinks jobs", ex); |
6185 |
26 Mar 21 |
nicklas |
185 |
} |
6185 |
26 Mar 21 |
nicklas |
186 |
} |
6185 |
26 Mar 21 |
nicklas |
187 |
} |
6185 |
26 Mar 21 |
nicklas |
188 |
finally |
6185 |
26 Mar 21 |
nicklas |
189 |
{ |
6185 |
26 Mar 21 |
nicklas |
190 |
if (dc != null) dc.close(); |
6185 |
26 Mar 21 |
nicklas |
191 |
} |
6185 |
26 Mar 21 |
nicklas |
192 |
return true; |
6185 |
26 Mar 21 |
nicklas |
193 |
} |
6185 |
26 Mar 21 |
nicklas |
194 |
|
6185 |
26 Mar 21 |
nicklas |
195 |
|
6185 |
26 Mar 21 |
nicklas |
196 |
} |