6816 |
26 Aug 22 |
nicklas |
1 |
package net.sf.basedb.reggie.autoconfirm; |
6816 |
26 Aug 22 |
nicklas |
2 |
|
6823 |
29 Aug 22 |
nicklas |
3 |
import java.util.Collections; |
6823 |
29 Aug 22 |
nicklas |
4 |
import java.util.List; |
6823 |
29 Aug 22 |
nicklas |
5 |
|
6816 |
26 Aug 22 |
nicklas |
6 |
import org.slf4j.LoggerFactory; |
6816 |
26 Aug 22 |
nicklas |
7 |
|
6816 |
26 Aug 22 |
nicklas |
8 |
import net.sf.basedb.core.Annotatable; |
6816 |
26 Aug 22 |
nicklas |
9 |
import net.sf.basedb.core.DbControl; |
6816 |
26 Aug 22 |
nicklas |
10 |
import net.sf.basedb.core.DerivedBioAssay; |
6816 |
26 Aug 22 |
nicklas |
11 |
import net.sf.basedb.core.ItemList; |
6816 |
26 Aug 22 |
nicklas |
12 |
import net.sf.basedb.core.Job; |
6816 |
26 Aug 22 |
nicklas |
13 |
import net.sf.basedb.core.Protocol; |
6816 |
26 Aug 22 |
nicklas |
14 |
import net.sf.basedb.core.SessionControl; |
6816 |
26 Aug 22 |
nicklas |
15 |
import net.sf.basedb.core.Software; |
6823 |
29 Aug 22 |
nicklas |
16 |
import net.sf.basedb.opengrid.JobDefinition; |
6816 |
26 Aug 22 |
nicklas |
17 |
import net.sf.basedb.opengrid.OpenGridCluster; |
6816 |
26 Aug 22 |
nicklas |
18 |
import net.sf.basedb.opengrid.config.BatchConfig; |
6816 |
26 Aug 22 |
nicklas |
19 |
import net.sf.basedb.opengrid.service.OpenGridService; |
6816 |
26 Aug 22 |
nicklas |
20 |
import net.sf.basedb.reggie.activity.ActivityDef; |
6816 |
26 Aug 22 |
nicklas |
21 |
import net.sf.basedb.reggie.dao.AlignedSequences; |
6816 |
26 Aug 22 |
nicklas |
22 |
import net.sf.basedb.reggie.dao.Annotationtype; |
6816 |
26 Aug 22 |
nicklas |
23 |
import net.sf.basedb.reggie.dao.Arraydesign; |
6816 |
26 Aug 22 |
nicklas |
24 |
import net.sf.basedb.reggie.dao.BiomaterialList; |
6816 |
26 Aug 22 |
nicklas |
25 |
import net.sf.basedb.reggie.dao.Pipeline; |
6816 |
26 Aug 22 |
nicklas |
26 |
import net.sf.basedb.reggie.dao.Rawdatatype; |
6816 |
26 Aug 22 |
nicklas |
27 |
import net.sf.basedb.reggie.dao.Rna; |
6816 |
26 Aug 22 |
nicklas |
28 |
import net.sf.basedb.reggie.dao.Subtype; |
6823 |
29 Aug 22 |
nicklas |
29 |
import net.sf.basedb.reggie.grid.ScriptUtil; |
6823 |
29 Aug 22 |
nicklas |
30 |
import net.sf.basedb.reggie.grid.StringTie2023JobCreator; |
7077 |
27 Mar 23 |
nicklas |
31 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7077 |
27 Mar 23 |
nicklas |
32 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
6816 |
26 Aug 22 |
nicklas |
33 |
import net.sf.basedb.util.filter.Filter; |
6816 |
26 Aug 22 |
nicklas |
34 |
|
6816 |
26 Aug 22 |
nicklas |
35 |
/** |
6816 |
26 Aug 22 |
nicklas |
Auto-confirm implementation for sequences aligned with Hisat/2023. |
6816 |
26 Aug 22 |
nicklas |
The rules are simple: |
6816 |
26 Aug 22 |
nicklas |
38 |
|
6816 |
26 Aug 22 |
nicklas |
* {@link DerivedBioAssay#getJob()} job status must be {@link net.sf.basedb.core.Job.Status#DONE}. |
6816 |
26 Aug 22 |
nicklas |
40 |
|
6816 |
26 Aug 22 |
nicklas |
Auto-confirmation will then continue with StringTie if the number of aligned |
6816 |
26 Aug 22 |
nicklas |
pairs is higher than 1 million. If the number of aligned pairs is less than |
6816 |
26 Aug 22 |
nicklas |
5 millions the RNA is flagged. |
6816 |
26 Aug 22 |
nicklas |
44 |
|
6816 |
26 Aug 22 |
nicklas |
@author nicklas |
6816 |
26 Aug 22 |
nicklas |
46 |
*/ |
6816 |
26 Aug 22 |
nicklas |
47 |
public class Hisat2023AutoConfirmer |
6816 |
26 Aug 22 |
nicklas |
48 |
extends AutoConfirmer<DerivedBioAssay> |
6816 |
26 Aug 22 |
nicklas |
49 |
{ |
6816 |
26 Aug 22 |
nicklas |
50 |
|
7077 |
27 Mar 23 |
nicklas |
51 |
private static final ExtensionsLogger logger = |
7077 |
27 Mar 23 |
nicklas |
52 |
ExtensionsLog.getLogger(AutoConfirmService.ID, true).wrap(LoggerFactory.getLogger(Hisat2023AutoConfirmer.class)); |
6816 |
26 Aug 22 |
nicklas |
53 |
|
6816 |
26 Aug 22 |
nicklas |
54 |
|
6816 |
26 Aug 22 |
nicklas |
55 |
/** |
6816 |
26 Aug 22 |
nicklas |
At least 5 million aligned pairs is required for |
6816 |
26 Aug 22 |
nicklas |
alignment to pass without flagging. |
6816 |
26 Aug 22 |
nicklas |
58 |
*/ |
6816 |
26 Aug 22 |
nicklas |
59 |
public static final long MIN_ALIGNED_PAIRS_FLAG = 5*M; |
6816 |
26 Aug 22 |
nicklas |
60 |
|
6816 |
26 Aug 22 |
nicklas |
61 |
/** |
6816 |
26 Aug 22 |
nicklas |
At least 1 million aligned pairs is required for |
6816 |
26 Aug 22 |
nicklas |
alignment to pass but with flagging RNA. |
6816 |
26 Aug 22 |
nicklas |
64 |
*/ |
6816 |
26 Aug 22 |
nicklas |
65 |
public static final long MIN_ALIGNED_PAIRS_STOP = 1*M; |
6816 |
26 Aug 22 |
nicklas |
66 |
|
6816 |
26 Aug 22 |
nicklas |
67 |
public static final long MIN_ALIGNED_PAIRS_FLAG_DEBUG = 200*k; |
6816 |
26 Aug 22 |
nicklas |
68 |
public static final long MIN_ALIGNED_PAIRS_STOP_DEBUG = 150*k; |
6816 |
26 Aug 22 |
nicklas |
69 |
|
6816 |
26 Aug 22 |
nicklas |
70 |
public Hisat2023AutoConfirmer(DerivedBioAssay aligned) |
6816 |
26 Aug 22 |
nicklas |
71 |
{ |
6816 |
26 Aug 22 |
nicklas |
72 |
super(aligned, aligned.getJob()); |
6816 |
26 Aug 22 |
nicklas |
73 |
} |
6816 |
26 Aug 22 |
nicklas |
74 |
|
6816 |
26 Aug 22 |
nicklas |
75 |
/** |
6816 |
26 Aug 22 |
nicklas |
Passes if the job ended successfully and HET percentage is below the |
6816 |
26 Aug 22 |
nicklas |
limit. |
6816 |
26 Aug 22 |
nicklas |
78 |
*/ |
6816 |
26 Aug 22 |
nicklas |
79 |
@Override |
6816 |
26 Aug 22 |
nicklas |
80 |
public boolean checkRules(DbControl dc, AutoConfirmManager manager) |
6816 |
26 Aug 22 |
nicklas |
81 |
{ |
6816 |
26 Aug 22 |
nicklas |
82 |
DerivedBioAssay aligned = item(dc); |
6816 |
26 Aug 22 |
nicklas |
83 |
Job job = aligned.getJob(); |
6816 |
26 Aug 22 |
nicklas |
84 |
|
6816 |
26 Aug 22 |
nicklas |
85 |
if (job.getStatus() != Job.Status.DONE) return false; |
6816 |
26 Aug 22 |
nicklas |
86 |
|
6816 |
26 Aug 22 |
nicklas |
87 |
return true; |
6816 |
26 Aug 22 |
nicklas |
88 |
} |
6816 |
26 Aug 22 |
nicklas |
89 |
|
6816 |
26 Aug 22 |
nicklas |
90 |
/** |
6816 |
26 Aug 22 |
nicklas |
Flag RNA if aligned pairs is less than 5 million or HET percentage is |
6816 |
26 Aug 22 |
nicklas |
higher than the limit. Continue with StringTie if aligned pairs is more |
6816 |
26 Aug 22 |
nicklas |
than 1 million. |
7122 |
21 Apr 23 |
nicklas |
94 |
|
7122 |
21 Apr 23 |
nicklas |
NOTE! As long as we have the old Hisat/StringTie pipeline as the main |
7122 |
21 Apr 23 |
nicklas |
pipeline, we do not flag RNA here. But, we leave most of the code and |
7122 |
21 Apr 23 |
nicklas |
logic in case we swith in the future. |
6816 |
26 Aug 22 |
nicklas |
98 |
*/ |
6816 |
26 Aug 22 |
nicklas |
99 |
@Override |
6816 |
26 Aug 22 |
nicklas |
100 |
public boolean autoConfirm(DbControl dc, AutoConfirmManager manager) |
6816 |
26 Aug 22 |
nicklas |
101 |
{ |
6816 |
26 Aug 22 |
nicklas |
102 |
DerivedBioAssay aligned = item(dc); |
6816 |
26 Aug 22 |
nicklas |
103 |
Job job = aligned.getJob(); |
6816 |
26 Aug 22 |
nicklas |
104 |
boolean debug = Boolean.TRUE.equals(job.getParameterValue("debug")); |
6816 |
26 Aug 22 |
nicklas |
105 |
|
6816 |
26 Aug 22 |
nicklas |
106 |
long minAlignedPairsFlag = debug ? MIN_ALIGNED_PAIRS_FLAG_DEBUG : MIN_ALIGNED_PAIRS_FLAG; |
6816 |
26 Aug 22 |
nicklas |
107 |
long minAlignedPairsStop = debug ? MIN_ALIGNED_PAIRS_STOP_DEBUG : MIN_ALIGNED_PAIRS_STOP; |
6816 |
26 Aug 22 |
nicklas |
108 |
|
6816 |
26 Aug 22 |
nicklas |
// Reset auto-processing so that it shows up in the manual wizard if |
6816 |
26 Aug 22 |
nicklas |
// starting the next step fails |
6816 |
26 Aug 22 |
nicklas |
111 |
Annotationtype.AUTO_PROCESSING.setAnnotationValue(dc, aligned, null); |
6816 |
26 Aug 22 |
nicklas |
112 |
Long alignedPairs = (Long)Annotationtype.ALIGNED_PAIRS.getAnnotationValue(dc, aligned); |
6816 |
26 Aug 22 |
nicklas |
113 |
|
6816 |
26 Aug 22 |
nicklas |
114 |
String flagRNA = null; |
6816 |
26 Aug 22 |
nicklas |
115 |
boolean stop = false; |
6816 |
26 Aug 22 |
nicklas |
116 |
if (alignedPairs == null || alignedPairs < minAlignedPairsStop) |
6816 |
26 Aug 22 |
nicklas |
117 |
{ |
6816 |
26 Aug 22 |
nicklas |
// Flag and stop |
6816 |
26 Aug 22 |
nicklas |
119 |
flagRNA = Rna.FLAG_ALIGN_FAILED; |
6816 |
26 Aug 22 |
nicklas |
120 |
stop = true; |
6816 |
26 Aug 22 |
nicklas |
121 |
} |
6816 |
26 Aug 22 |
nicklas |
122 |
else if (alignedPairs < minAlignedPairsFlag) |
6816 |
26 Aug 22 |
nicklas |
123 |
{ |
6816 |
26 Aug 22 |
nicklas |
// Flag and continue |
6816 |
26 Aug 22 |
nicklas |
125 |
flagRNA = Rna.FLAG_ALIGN_FAILED; |
6816 |
26 Aug 22 |
nicklas |
126 |
} |
6816 |
26 Aug 22 |
nicklas |
127 |
|
6816 |
26 Aug 22 |
nicklas |
128 |
if (flagRNA != null) |
6816 |
26 Aug 22 |
nicklas |
129 |
{ |
7122 |
21 Apr 23 |
nicklas |
// We do not actually flag in this pipeline (see comment above) |
7122 |
21 Apr 23 |
nicklas |
// manager.flagRna(dc, aligned.getExtract(), flagRNA); |
6816 |
26 Aug 22 |
nicklas |
132 |
} |
6816 |
26 Aug 22 |
nicklas |
133 |
|
6816 |
26 Aug 22 |
nicklas |
134 |
if (stop) |
6816 |
26 Aug 22 |
nicklas |
135 |
{ |
6816 |
26 Aug 22 |
nicklas |
136 |
Annotationtype.ANALYSIS_RESULT.setAnnotationValue(dc, aligned, AlignedSequences.ALIGN_FAILED); |
6816 |
26 Aug 22 |
nicklas |
137 |
} |
6816 |
26 Aug 22 |
nicklas |
138 |
else |
6816 |
26 Aug 22 |
nicklas |
139 |
{ |
6816 |
26 Aug 22 |
nicklas |
140 |
Annotationtype.ANALYSIS_RESULT.setAnnotationValue(dc, aligned, AlignedSequences.ALIGN_SUCCESSFUL); |
6816 |
26 Aug 22 |
nicklas |
141 |
|
6816 |
26 Aug 22 |
nicklas |
// Pipelines for further processing |
6816 |
26 Aug 22 |
nicklas |
143 |
BiomaterialList.STRINGTIE_2023_PIPELINE.load(dc).add(aligned); |
6816 |
26 Aug 22 |
nicklas |
144 |
ActivityDef.HISAT_AUTOCONFIRMED.merge(dc, 1).setUser("Auto-confirm"); |
6816 |
26 Aug 22 |
nicklas |
145 |
} |
6816 |
26 Aug 22 |
nicklas |
146 |
|
6816 |
26 Aug 22 |
nicklas |
147 |
return !stop; |
6816 |
26 Aug 22 |
nicklas |
148 |
} |
6816 |
26 Aug 22 |
nicklas |
149 |
|
6816 |
26 Aug 22 |
nicklas |
150 |
/** |
6816 |
26 Aug 22 |
nicklas |
Schedule StringTie/2023 analysis to run on the cluster. |
6816 |
26 Aug 22 |
nicklas |
152 |
*/ |
6816 |
26 Aug 22 |
nicklas |
153 |
@Override |
6816 |
26 Aug 22 |
nicklas |
154 |
public boolean startNextStep(SessionControl sc, AutoConfirmManager manager) |
6816 |
26 Aug 22 |
nicklas |
155 |
{ |
6816 |
26 Aug 22 |
nicklas |
156 |
DbControl dc = null; |
6816 |
26 Aug 22 |
nicklas |
157 |
try |
6816 |
26 Aug 22 |
nicklas |
158 |
{ |
6816 |
26 Aug 22 |
nicklas |
159 |
dc = sc.newDbControl("Reggie: Auto-confirm Hisat/2023"); |
6816 |
26 Aug 22 |
nicklas |
160 |
|
6816 |
26 Aug 22 |
nicklas |
161 |
DerivedBioAssay aligned = item(dc); |
6816 |
26 Aug 22 |
nicklas |
162 |
Job job = aligned.getJob(); |
6816 |
26 Aug 22 |
nicklas |
163 |
|
6816 |
26 Aug 22 |
nicklas |
164 |
boolean debug = Boolean.TRUE.equals(job.getParameterValue("debug")); |
6816 |
26 Aug 22 |
nicklas |
165 |
Integer priority = (Integer)job.getParameterValue("priority"); |
6982 |
17 Jan 23 |
nicklas |
166 |
String partition = job.getParameterValue("partition"); |
6816 |
26 Aug 22 |
nicklas |
167 |
|
6816 |
26 Aug 22 |
nicklas |
168 |
String clusterId = job.getServer(); |
6816 |
26 Aug 22 |
nicklas |
169 |
OpenGridCluster cluster = OpenGridService.getInstance().getClusterById(dc, clusterId); |
6816 |
26 Aug 22 |
nicklas |
170 |
if (cluster == null) |
6816 |
26 Aug 22 |
nicklas |
171 |
{ |
6816 |
26 Aug 22 |
nicklas |
// If we don't have required items, abort and revert to manual start |
6816 |
26 Aug 22 |
nicklas |
173 |
return false; |
6816 |
26 Aug 22 |
nicklas |
174 |
} |
6816 |
26 Aug 22 |
nicklas |
175 |
|
6816 |
26 Aug 22 |
nicklas |
// Pipelines for further processing |
6816 |
26 Aug 22 |
nicklas |
177 |
ItemList stringtiePipeline = BiomaterialList.STRINGTIE_2023_PIPELINE.load(dc); |
6816 |
26 Aug 22 |
nicklas |
178 |
boolean stringtieDisabled = "Disable".equals(Annotationtype.AUTO_PROCESSING.getAnnotationValue(dc, stringtiePipeline)); |
6816 |
26 Aug 22 |
nicklas |
179 |
|
6816 |
26 Aug 22 |
nicklas |
180 |
if (stringtieDisabled) return false; |
6816 |
26 Aug 22 |
nicklas |
181 |
dc.close(); |
6816 |
26 Aug 22 |
nicklas |
182 |
|
6816 |
26 Aug 22 |
nicklas |
183 |
BatchConfig batchConfig = new BatchConfig(); |
6816 |
26 Aug 22 |
nicklas |
// Create StringTie job |
6816 |
26 Aug 22 |
nicklas |
185 |
if (!stringtieDisabled) |
6816 |
26 Aug 22 |
nicklas |
186 |
{ |
6816 |
26 Aug 22 |
nicklas |
187 |
try |
6816 |
26 Aug 22 |
nicklas |
188 |
{ |
6816 |
26 Aug 22 |
nicklas |
189 |
dc = sc.newDbControl("Reggie: Auto-confirm Hisat/2023 - start StringTie/2023"); |
6816 |
26 Aug 22 |
nicklas |
190 |
|
6819 |
26 Aug 22 |
nicklas |
191 |
Filter<Annotatable> pipelineFilter = Annotationtype.PIPELINE.createFilter(Pipeline.RNASEQ_STRINGTIE_2023.getName()); |
6819 |
26 Aug 22 |
nicklas |
192 |
Arraydesign design = Arraydesign.getLatestProjectDefault(dc, Rawdatatype.STRINGTIE.getVariantId(), pipelineFilter); |
6819 |
26 Aug 22 |
nicklas |
193 |
Protocol protocol = (Protocol)Subtype.FEATURE_EXTRACTION_PROTOCOL.getLatestProjectDefault(dc, pipelineFilter); |
6819 |
26 Aug 22 |
nicklas |
194 |
Software software = (Software)Subtype.FEATURE_EXTRACTION_SOFTWARE.getLatestProjectDefault(dc, pipelineFilter); |
6816 |
26 Aug 22 |
nicklas |
195 |
|
6816 |
26 Aug 22 |
nicklas |
196 |
if (logger.isDebugEnabled()) |
6816 |
26 Aug 22 |
nicklas |
197 |
{ |
6816 |
26 Aug 22 |
nicklas |
198 |
logger.debug("Starting StringTie/2023 jobs on cluster: " + clusterId); |
6816 |
26 Aug 22 |
nicklas |
199 |
logger.debug("StringTie Software/protocol: " + software + "/" + protocol); |
6816 |
26 Aug 22 |
nicklas |
200 |
logger.debug("Array design: " + design); |
6816 |
26 Aug 22 |
nicklas |
201 |
} |
6823 |
29 Aug 22 |
nicklas |
202 |
|
6982 |
17 Jan 23 |
nicklas |
203 |
OpenGridCluster stringtieCluster = ScriptUtil.autoSelectClusterWithConfig(dc, "stringtie-2023", cluster); |
6823 |
29 Aug 22 |
nicklas |
204 |
StringTie2023JobCreator jobCreator = new StringTie2023JobCreator(); |
6816 |
26 Aug 22 |
nicklas |
205 |
jobCreator.setArrayDesign(design == null ? null : design.getItem()); |
6816 |
26 Aug 22 |
nicklas |
206 |
jobCreator.setProtocol(protocol); |
6816 |
26 Aug 22 |
nicklas |
207 |
jobCreator.setSoftware(software); |
6816 |
26 Aug 22 |
nicklas |
208 |
jobCreator.setAutoConfirm(true); |
6816 |
26 Aug 22 |
nicklas |
209 |
jobCreator.setDebug(debug); |
6816 |
26 Aug 22 |
nicklas |
210 |
jobCreator.setPriority(priority); |
6982 |
17 Jan 23 |
nicklas |
211 |
if (cluster == stringtieCluster) jobCreator.setPartition(partition); |
6816 |
26 Aug 22 |
nicklas |
212 |
jobCreator.setBatchConfig(batchConfig); |
6816 |
26 Aug 22 |
nicklas |
213 |
|
6816 |
26 Aug 22 |
nicklas |
214 |
List<JobDefinition> jobDefs = jobCreator.createStringTieJobs(dc, stringtieCluster, Collections.singletonList(AlignedSequences.getById(dc, aligned.getId()))); |
6816 |
26 Aug 22 |
nicklas |
215 |
ScriptUtil.submitJobs(dc, stringtieCluster, jobDefs); |
6823 |
29 Aug 22 |
nicklas |
216 |
|
6816 |
26 Aug 22 |
nicklas |
217 |
dc.commit(); |
6816 |
26 Aug 22 |
nicklas |
218 |
} |
6816 |
26 Aug 22 |
nicklas |
219 |
catch (RuntimeException ex) |
6816 |
26 Aug 22 |
nicklas |
220 |
{ |
6816 |
26 Aug 22 |
nicklas |
221 |
logger.error("Could not create StringTie/2023 job", ex); |
6816 |
26 Aug 22 |
nicklas |
222 |
} |
6816 |
26 Aug 22 |
nicklas |
223 |
} |
6816 |
26 Aug 22 |
nicklas |
224 |
} |
6816 |
26 Aug 22 |
nicklas |
225 |
finally |
6816 |
26 Aug 22 |
nicklas |
226 |
{ |
6816 |
26 Aug 22 |
nicklas |
227 |
if (dc != null) dc.close(); |
6816 |
26 Aug 22 |
nicklas |
228 |
} |
6816 |
26 Aug 22 |
nicklas |
229 |
return true; |
6816 |
26 Aug 22 |
nicklas |
230 |
} |
6816 |
26 Aug 22 |
nicklas |
231 |
} |