6357 |
26 Aug 21 |
nicklas |
1 |
package net.sf.basedb.reggie.grid; |
6357 |
26 Aug 21 |
nicklas |
2 |
|
6361 |
30 Aug 21 |
nicklas |
3 |
import java.io.InputStream; |
6657 |
30 Mar 22 |
nicklas |
4 |
import java.io.StringWriter; |
6359 |
27 Aug 21 |
nicklas |
5 |
import java.util.ArrayList; |
6357 |
26 Aug 21 |
nicklas |
6 |
import java.util.List; |
6357 |
26 Aug 21 |
nicklas |
7 |
|
6360 |
27 Aug 21 |
nicklas |
8 |
import org.jdom2.Element; |
6360 |
27 Aug 21 |
nicklas |
9 |
import org.json.simple.JSONObject; |
6357 |
26 Aug 21 |
nicklas |
10 |
|
6361 |
30 Aug 21 |
nicklas |
11 |
import net.sf.basedb.core.AnyToAny; |
6361 |
30 Aug 21 |
nicklas |
12 |
import net.sf.basedb.core.BaseException; |
6357 |
26 Aug 21 |
nicklas |
13 |
import net.sf.basedb.core.DbControl; |
6359 |
27 Aug 21 |
nicklas |
14 |
import net.sf.basedb.core.DerivedBioAssay; |
6361 |
30 Aug 21 |
nicklas |
15 |
import net.sf.basedb.core.Directory; |
6359 |
27 Aug 21 |
nicklas |
16 |
import net.sf.basedb.core.File; |
6361 |
30 Aug 21 |
nicklas |
17 |
import net.sf.basedb.core.FileServer; |
6359 |
27 Aug 21 |
nicklas |
18 |
import net.sf.basedb.core.ItemList; |
6359 |
27 Aug 21 |
nicklas |
19 |
import net.sf.basedb.core.ItemNotFoundException; |
6359 |
27 Aug 21 |
nicklas |
20 |
import net.sf.basedb.core.ItemParameterType; |
6361 |
30 Aug 21 |
nicklas |
21 |
import net.sf.basedb.core.ItemSubtype; |
6357 |
26 Aug 21 |
nicklas |
22 |
import net.sf.basedb.core.Job; |
6361 |
30 Aug 21 |
nicklas |
23 |
import net.sf.basedb.core.Path; |
6359 |
27 Aug 21 |
nicklas |
24 |
import net.sf.basedb.core.RawBioAssay; |
6357 |
26 Aug 21 |
nicklas |
25 |
import net.sf.basedb.core.SessionControl; |
6357 |
26 Aug 21 |
nicklas |
26 |
import net.sf.basedb.core.Software; |
6359 |
27 Aug 21 |
nicklas |
27 |
import net.sf.basedb.core.StringParameterType; |
6357 |
26 Aug 21 |
nicklas |
28 |
import net.sf.basedb.opengrid.JobDefinition; |
6357 |
26 Aug 21 |
nicklas |
29 |
import net.sf.basedb.opengrid.JobStatus; |
6357 |
26 Aug 21 |
nicklas |
30 |
import net.sf.basedb.opengrid.OpenGridCluster; |
6357 |
26 Aug 21 |
nicklas |
31 |
import net.sf.basedb.opengrid.OpenGridSession; |
6359 |
27 Aug 21 |
nicklas |
32 |
import net.sf.basedb.opengrid.ScriptBuilder; |
6359 |
27 Aug 21 |
nicklas |
33 |
import net.sf.basedb.opengrid.config.ClusterConfig; |
6359 |
27 Aug 21 |
nicklas |
34 |
import net.sf.basedb.opengrid.config.JobConfig; |
6657 |
30 Mar 22 |
nicklas |
35 |
import net.sf.basedb.opengrid.filetransfer.StringUploadSource; |
6357 |
26 Aug 21 |
nicklas |
36 |
import net.sf.basedb.opengrid.service.JobCompletionHandler; |
6359 |
27 Aug 21 |
nicklas |
37 |
import net.sf.basedb.reggie.Reggie; |
6359 |
27 Aug 21 |
nicklas |
38 |
import net.sf.basedb.reggie.XmlConfig; |
6359 |
27 Aug 21 |
nicklas |
39 |
import net.sf.basedb.reggie.dao.Annotationtype; |
6359 |
27 Aug 21 |
nicklas |
40 |
import net.sf.basedb.reggie.dao.BiomaterialList; |
6359 |
27 Aug 21 |
nicklas |
41 |
import net.sf.basedb.reggie.dao.Datafiletype; |
6361 |
30 Aug 21 |
nicklas |
42 |
import net.sf.basedb.reggie.dao.Fileserver; |
6359 |
27 Aug 21 |
nicklas |
43 |
import net.sf.basedb.reggie.dao.Library; |
6357 |
26 Aug 21 |
nicklas |
44 |
import net.sf.basedb.reggie.dao.Rawbioassay; |
6359 |
27 Aug 21 |
nicklas |
45 |
import net.sf.basedb.reggie.dao.Subtype; |
6361 |
30 Aug 21 |
nicklas |
46 |
import net.sf.basedb.reggie.vcf.VcfData; |
6361 |
30 Aug 21 |
nicklas |
47 |
import net.sf.basedb.reggie.vcf.VcfParser; |
6361 |
30 Aug 21 |
nicklas |
48 |
import net.sf.basedb.util.FileUtil; |
6359 |
27 Aug 21 |
nicklas |
49 |
import net.sf.basedb.util.Values; |
6657 |
30 Mar 22 |
nicklas |
50 |
import net.sf.basedb.util.export.TableWriter; |
6357 |
26 Aug 21 |
nicklas |
51 |
|
6357 |
26 Aug 21 |
nicklas |
52 |
/** |
6357 |
26 Aug 21 |
nicklas |
Helper class for creating genotyping |
6357 |
26 Aug 21 |
nicklas |
script and send it to the cluster for execution. |
6357 |
26 Aug 21 |
nicklas |
55 |
|
6357 |
26 Aug 21 |
nicklas |
@author nicklas |
6389 |
15 Sep 21 |
nicklas |
@since 4.32 |
6357 |
26 Aug 21 |
nicklas |
58 |
*/ |
6357 |
26 Aug 21 |
nicklas |
59 |
public class TargetedGenotypeJobCreator |
6674 |
11 Apr 22 |
nicklas |
60 |
extends AbstractJobCreator |
6357 |
26 Aug 21 |
nicklas |
61 |
{ |
6360 |
27 Aug 21 |
nicklas |
62 |
/** |
6360 |
27 Aug 21 |
nicklas |
Get a list with all targeted genotype definitions that has been configured in reggie-config.xml |
6360 |
27 Aug 21 |
nicklas |
64 |
*/ |
6360 |
27 Aug 21 |
nicklas |
65 |
public static List<TargetedGenotypeDefinition> getConfiguredTargets() |
6360 |
27 Aug 21 |
nicklas |
66 |
{ |
6360 |
27 Aug 21 |
nicklas |
67 |
XmlConfig cfg = Reggie.getConfig(); |
6360 |
27 Aug 21 |
nicklas |
68 |
List<Element> elements = cfg.getElements("targeted-genotyping/target"); |
6360 |
27 Aug 21 |
nicklas |
69 |
List<TargetedGenotypeDefinition> models = new ArrayList<>(); |
6360 |
27 Aug 21 |
nicklas |
70 |
for (Element e : elements) |
6360 |
27 Aug 21 |
nicklas |
71 |
{ |
6360 |
27 Aug 21 |
nicklas |
72 |
models.add(new TargetedGenotypeDefinition(e)); |
6360 |
27 Aug 21 |
nicklas |
73 |
} |
6360 |
27 Aug 21 |
nicklas |
74 |
return models; |
6360 |
27 Aug 21 |
nicklas |
75 |
} |
6360 |
27 Aug 21 |
nicklas |
76 |
|
6360 |
27 Aug 21 |
nicklas |
77 |
/** |
6365 |
02 Sep 21 |
nicklas |
Get an array with the names of all targeted genotype definitions. |
6365 |
02 Sep 21 |
nicklas |
79 |
*/ |
6365 |
02 Sep 21 |
nicklas |
80 |
public static String[] getConfiguredTargetNames() |
6365 |
02 Sep 21 |
nicklas |
81 |
{ |
6365 |
02 Sep 21 |
nicklas |
82 |
XmlConfig cfg = Reggie.getConfig(); |
6365 |
02 Sep 21 |
nicklas |
83 |
List<Element> elements = cfg.getElements("targeted-genotyping/target"); |
6365 |
02 Sep 21 |
nicklas |
84 |
String[] names = new String[elements.size()]; |
6365 |
02 Sep 21 |
nicklas |
85 |
int i = 0; |
6365 |
02 Sep 21 |
nicklas |
86 |
for (Element e : elements) |
6365 |
02 Sep 21 |
nicklas |
87 |
{ |
6365 |
02 Sep 21 |
nicklas |
88 |
names[i] = e.getAttributeValue("name"); |
6365 |
02 Sep 21 |
nicklas |
89 |
i++; |
6365 |
02 Sep 21 |
nicklas |
90 |
} |
6365 |
02 Sep 21 |
nicklas |
91 |
return names; |
6365 |
02 Sep 21 |
nicklas |
92 |
|
6365 |
02 Sep 21 |
nicklas |
93 |
} |
6365 |
02 Sep 21 |
nicklas |
94 |
|
6365 |
02 Sep 21 |
nicklas |
95 |
/** |
6360 |
27 Aug 21 |
nicklas |
Get the targeted genotype definition with the given name. Return null |
6360 |
27 Aug 21 |
nicklas |
if no target with the given name can be found. |
6360 |
27 Aug 21 |
nicklas |
98 |
*/ |
6360 |
27 Aug 21 |
nicklas |
99 |
public static TargetedGenotypeDefinition getTargetByName(String name) |
6360 |
27 Aug 21 |
nicklas |
100 |
{ |
6365 |
02 Sep 21 |
nicklas |
101 |
if (name == null) return null; |
6360 |
27 Aug 21 |
nicklas |
102 |
XmlConfig cfg = Reggie.getConfig(); |
6360 |
27 Aug 21 |
nicklas |
103 |
Element e = cfg.getElement("targeted-genotyping/target[@name='"+name+"']"); |
6360 |
27 Aug 21 |
nicklas |
104 |
return e == null ? null : new TargetedGenotypeDefinition(e); |
6360 |
27 Aug 21 |
nicklas |
105 |
} |
6360 |
27 Aug 21 |
nicklas |
106 |
|
6357 |
26 Aug 21 |
nicklas |
107 |
private Software software; |
6357 |
26 Aug 21 |
nicklas |
108 |
|
6357 |
26 Aug 21 |
nicklas |
109 |
public TargetedGenotypeJobCreator() |
6357 |
26 Aug 21 |
nicklas |
110 |
{} |
6357 |
26 Aug 21 |
nicklas |
111 |
|
6357 |
26 Aug 21 |
nicklas |
112 |
/** |
6357 |
26 Aug 21 |
nicklas |
Set the software item to set on created VCF files. |
6357 |
26 Aug 21 |
nicklas |
114 |
*/ |
6357 |
26 Aug 21 |
nicklas |
115 |
public void setSoftware(Software software) |
6357 |
26 Aug 21 |
nicklas |
116 |
{ |
6357 |
26 Aug 21 |
nicklas |
117 |
this.software = software; |
6357 |
26 Aug 21 |
nicklas |
118 |
} |
6357 |
26 Aug 21 |
nicklas |
119 |
|
6357 |
26 Aug 21 |
nicklas |
120 |
/** |
6357 |
26 Aug 21 |
nicklas |
Schedule jobs on the given cluster for running targeted genotype analysis. |
6357 |
26 Aug 21 |
nicklas |
@return A list with the corresponding jobs in BASE |
6357 |
26 Aug 21 |
nicklas |
123 |
*/ |
6360 |
27 Aug 21 |
nicklas |
124 |
public List<JobDefinition> createGenotypeJobs(DbControl dc, OpenGridCluster cluster, List<Rawbioassay> variantCalls, List<TargetedGenotypeDefinition> targets) |
6357 |
26 Aug 21 |
nicklas |
125 |
{ |
6359 |
27 Aug 21 |
nicklas |
126 |
SessionControl sc = dc.getSessionControl(); |
6359 |
27 Aug 21 |
nicklas |
127 |
|
6359 |
27 Aug 21 |
nicklas |
128 |
ClusterConfig clusterCfg = cluster.getConfig(); |
6359 |
27 Aug 21 |
nicklas |
129 |
XmlConfig cfg = Reggie.getConfig(cluster.getId()); |
6359 |
27 Aug 21 |
nicklas |
130 |
if (cfg == null) |
6359 |
27 Aug 21 |
nicklas |
131 |
{ |
6359 |
27 Aug 21 |
nicklas |
132 |
throw new ItemNotFoundException("No configuration in reggie-config.xml for cluster: " + cluster.getId()); |
6359 |
27 Aug 21 |
nicklas |
133 |
} |
6359 |
27 Aug 21 |
nicklas |
134 |
String parameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, software); |
6359 |
27 Aug 21 |
nicklas |
135 |
|
6359 |
27 Aug 21 |
nicklas |
// Get global options |
6693 |
22 Apr 22 |
nicklas |
137 |
String global_env = ScriptUtil.multilineIndent(cfg.getConfig("global-env")); |
6657 |
30 Mar 22 |
nicklas |
138 |
String projectArchive = cfg.getRequiredConfig("project-archive", null); |
6657 |
30 Mar 22 |
nicklas |
139 |
String externalArchive = cfg.getConfig("external-archive", null, projectArchive); |
6359 |
27 Aug 21 |
nicklas |
140 |
|
6359 |
27 Aug 21 |
nicklas |
// Options for the programs |
7372 |
06 Oct 23 |
nicklas |
142 |
String targeted_submit = cfg.getConfig("targeted-genotyping/submit", parameterSet, null); |
7372 |
06 Oct 23 |
nicklas |
143 |
String targeted_submit_debug = cfg.getConfig("targeted-genotyping/submit-debug", parameterSet, null); |
6657 |
30 Mar 22 |
nicklas |
144 |
String targeted_env = ScriptUtil.multilineIndent(cfg.getRequiredConfig("targeted-genotyping/env", parameterSet)); |
6657 |
30 Mar 22 |
nicklas |
145 |
String targeted_envdebug = ScriptUtil.multilineIndent(cfg.getConfig("targeted-genotyping/env-debug", parameterSet, null)); |
6657 |
30 Mar 22 |
nicklas |
146 |
String targeted_execute = ScriptUtil.multilineIndent(cfg.getConfig("targeted-genotyping/execute", parameterSet, "./targeted-genotyping.sh")); |
6359 |
27 Aug 21 |
nicklas |
147 |
|
6359 |
27 Aug 21 |
nicklas |
// Selected items must be removed from this list |
6359 |
27 Aug 21 |
nicklas |
149 |
ItemList tgtPipeline = BiomaterialList.TARGETED_GENOTYPE_PIPELINE.load(dc); |
6359 |
27 Aug 21 |
nicklas |
150 |
|
6359 |
27 Aug 21 |
nicklas |
// Options common for all jobs |
6359 |
27 Aug 21 |
nicklas |
152 |
JobConfig jobConfig = new JobConfig(); |
6359 |
27 Aug 21 |
nicklas |
153 |
if (priority != null) jobConfig.setPriority(priority); |
7372 |
06 Oct 23 |
nicklas |
154 |
if (partition != null) jobConfig.setSbatchOption("partition", ScriptUtil.checkValidScriptParameter(partition)); |
7372 |
06 Oct 23 |
nicklas |
155 |
jobConfig.convertOptionsTo(clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
156 |
if (submitOptionsOverride != null) |
7372 |
06 Oct 23 |
nicklas |
157 |
{ |
7372 |
06 Oct 23 |
nicklas |
158 |
ScriptUtil.addSubmitOptions(jobConfig, submitOptionsOverride, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
159 |
} |
7372 |
06 Oct 23 |
nicklas |
160 |
else |
7372 |
06 Oct 23 |
nicklas |
161 |
{ |
7372 |
06 Oct 23 |
nicklas |
162 |
ScriptUtil.addSubmitOptions(jobConfig, targeted_submit, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
163 |
if (debug) ScriptUtil.addSubmitOptions(jobConfig, targeted_submit_debug, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
164 |
} |
6359 |
27 Aug 21 |
nicklas |
165 |
|
6359 |
27 Aug 21 |
nicklas |
// We submit one job for each raw bioassay to the cluster |
6359 |
27 Aug 21 |
nicklas |
167 |
List<JobDefinition> jobDefs = new ArrayList<JobDefinition>(variantCalls.size()); |
6359 |
27 Aug 21 |
nicklas |
168 |
|
6359 |
27 Aug 21 |
nicklas |
169 |
for (Rawbioassay rba : variantCalls) |
6359 |
27 Aug 21 |
nicklas |
170 |
{ |
6359 |
27 Aug 21 |
nicklas |
171 |
rba = Rawbioassay.getById(dc, rba.getId()); // Ensure item is loaded in this transaction |
6359 |
27 Aug 21 |
nicklas |
172 |
|
6359 |
27 Aug 21 |
nicklas |
// Get some information about the aligned data that we need |
6359 |
27 Aug 21 |
nicklas |
174 |
DerivedBioAssay aligned = rba.getAlignedSequences(dc).getDerivedBioAssay(); |
6359 |
27 Aug 21 |
nicklas |
175 |
RawBioAssay raw = rba.getRawBioAssay(); |
6359 |
27 Aug 21 |
nicklas |
176 |
tgtPipeline.removeItem(raw); |
6359 |
27 Aug 21 |
nicklas |
177 |
|
6359 |
27 Aug 21 |
nicklas |
178 |
Library lib = rba.getLibrary(dc); |
6359 |
27 Aug 21 |
nicklas |
179 |
boolean isExternal = Reggie.isExternalItem(rba.getName()); |
6657 |
30 Mar 22 |
nicklas |
180 |
String archiveFolder = isExternal ? externalArchive : projectArchive; |
6359 |
27 Aug 21 |
nicklas |
181 |
String bamFolder = ScriptUtil.checkValidPath((String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, aligned), true, false); |
6359 |
27 Aug 21 |
nicklas |
182 |
File bamFile = Datafiletype.BAM.getFile(dc, aligned); |
6359 |
27 Aug 21 |
nicklas |
183 |
String bamName = ScriptUtil.checkValidScriptParameter(bamFile.getName()); |
6359 |
27 Aug 21 |
nicklas |
184 |
String vcfFolder = ScriptUtil.checkValidPath((String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, raw), true, false); |
6359 |
27 Aug 21 |
nicklas |
185 |
|
6359 |
27 Aug 21 |
nicklas |
// Create job |
6359 |
27 Aug 21 |
nicklas |
187 |
Job gtJob = Job.getNew(dc, null, null, null); |
6359 |
27 Aug 21 |
nicklas |
188 |
gtJob.setItemSubtype(Subtype.TARGETED_GENOTYPING_JOB.get(dc)); |
6359 |
27 Aug 21 |
nicklas |
189 |
gtJob.setPluginVersion("reggie-"+Reggie.VERSION); |
6359 |
27 Aug 21 |
nicklas |
190 |
gtJob.setSendMessage(Values.getBoolean(sc.getUserClientSetting("plugins.sendmessage"), false)); |
6359 |
27 Aug 21 |
nicklas |
191 |
gtJob.setName("Run targeted genotyping " + rba.getName()); |
6359 |
27 Aug 21 |
nicklas |
192 |
gtJob.setParameterValue("variantCall", new ItemParameterType<RawBioAssay>(RawBioAssay.class, null), raw); |
6361 |
30 Aug 21 |
nicklas |
193 |
List<String> targetNames = new ArrayList<>(); |
6361 |
30 Aug 21 |
nicklas |
194 |
for (TargetedGenotypeDefinition target : targets) |
6361 |
30 Aug 21 |
nicklas |
195 |
{ |
6361 |
30 Aug 21 |
nicklas |
196 |
targetNames.add(target.getName()); |
6361 |
30 Aug 21 |
nicklas |
197 |
} |
6367 |
06 Sep 21 |
nicklas |
198 |
gtJob.setParameterValues("targets", new StringParameterType(255, null, true, 0, 0, 0), targetNames); |
6359 |
27 Aug 21 |
nicklas |
199 |
if (software != null) |
6359 |
27 Aug 21 |
nicklas |
200 |
{ |
6359 |
27 Aug 21 |
nicklas |
201 |
gtJob.setParameterValue("software", new ItemParameterType<Software>(Software.class, null), software); |
6359 |
27 Aug 21 |
nicklas |
202 |
} |
6359 |
27 Aug 21 |
nicklas |
203 |
|
6359 |
27 Aug 21 |
nicklas |
204 |
if (debug) gtJob.setName(gtJob.getName() + " (debug)"); |
6981 |
17 Jan 23 |
nicklas |
205 |
if (partition != null) gtJob.setParameterValue("partition", new StringParameterType(), partition); |
7372 |
06 Oct 23 |
nicklas |
206 |
if (submitOptionsOverride != null) gtJob.setParameterValue("jobOptions", new StringParameterType(), submitOptionsOverride); |
6359 |
27 Aug 21 |
nicklas |
// Register a handler for auto-confirmation (TargetedGenotypeAutoConfirmer) |
6359 |
27 Aug 21 |
nicklas |
208 |
if (autoConfirm) |
6359 |
27 Aug 21 |
nicklas |
209 |
{ |
6359 |
27 Aug 21 |
nicklas |
210 |
gtJob.setParameterValue("AutoConfirmHandler", new StringParameterType(), "TargetedGenotypeAutoConfirmer"); |
6359 |
27 Aug 21 |
nicklas |
211 |
} |
6359 |
27 Aug 21 |
nicklas |
212 |
dc.saveItem(gtJob); |
6359 |
27 Aug 21 |
nicklas |
213 |
|
6657 |
30 Mar 22 |
nicklas |
// Write information needed for each target to 'target_info.txt' |
6657 |
30 Mar 22 |
nicklas |
215 |
StringWriter target_info = new StringWriter(); |
6657 |
30 Mar 22 |
nicklas |
216 |
TableWriter target_info_writer = new TableWriter(target_info); |
6657 |
30 Mar 22 |
nicklas |
217 |
target_info_writer.setDataSeparator(","); |
6657 |
30 Mar 22 |
nicklas |
218 |
for (TargetedGenotypeDefinition target : targets) |
6657 |
30 Mar 22 |
nicklas |
219 |
{ |
6657 |
30 Mar 22 |
nicklas |
220 |
target_info_writer.tablePrintData(target.getName(), target.getVcfFile()); |
6657 |
30 Mar 22 |
nicklas |
221 |
} |
6657 |
30 Mar 22 |
nicklas |
222 |
|
6359 |
27 Aug 21 |
nicklas |
223 |
ScriptBuilder script = new ScriptBuilder(); |
6665 |
05 Apr 22 |
nicklas |
224 |
script.cmd(debug ? "set -eox pipefail" : "set -eo pipefail"); // 'set -e' is not enough to catch failures if a command is piped |
6657 |
30 Mar 22 |
nicklas |
225 |
|
6359 |
27 Aug 21 |
nicklas |
// Set file permissions based on consent or external group! |
6359 |
27 Aug 21 |
nicklas |
227 |
String externalGroup = isExternal ? Reggie.getExternalGroup(aligned.getName()) : null; |
6657 |
30 Mar 22 |
nicklas |
228 |
ScriptUtil.setUmaskForItem(dc, lib, externalGroup, script); |
6693 |
22 Apr 22 |
nicklas |
229 |
script.newLine(); |
6657 |
30 Mar 22 |
nicklas |
230 |
|
6693 |
22 Apr 22 |
nicklas |
231 |
script.cmd(global_env); |
6657 |
30 Mar 22 |
nicklas |
232 |
script.export("ArchiveFolder", archiveFolder); |
6657 |
30 Mar 22 |
nicklas |
233 |
script.export("BamFile", "${ArchiveFolder}"+bamFolder+"/"+bamName); |
6657 |
30 Mar 22 |
nicklas |
234 |
script.export("VcfFolder", "${ArchiveFolder}"+vcfFolder); |
6359 |
27 Aug 21 |
nicklas |
235 |
script.newLine(); |
6657 |
30 Mar 22 |
nicklas |
236 |
script.cmd(targeted_env); |
6657 |
30 Mar 22 |
nicklas |
237 |
if (debug) script.cmd(targeted_envdebug); |
6657 |
30 Mar 22 |
nicklas |
238 |
script.cmd(targeted_execute); |
6359 |
27 Aug 21 |
nicklas |
239 |
|
6359 |
27 Aug 21 |
nicklas |
240 |
if (externalGroup != null) |
6359 |
27 Aug 21 |
nicklas |
241 |
{ |
6364 |
31 Aug 21 |
nicklas |
242 |
ScriptUtil.addChgrp(externalGroup, "${VcfFolder}/genotype_*.vcf", raw.getName(), null, script); |
6359 |
27 Aug 21 |
nicklas |
243 |
} |
6359 |
27 Aug 21 |
nicklas |
244 |
|
6674 |
11 Apr 22 |
nicklas |
245 |
JobDefinition jobDef = new JobDefinition("TargetedGenotypeAnalysis", jobConfig, batchConfig, gtJob); |
6657 |
30 Mar 22 |
nicklas |
246 |
jobDef.addFile(new StringUploadSource("target_info.txt", target_info.toString())); |
6657 |
30 Mar 22 |
nicklas |
247 |
jobDef.addFile(ScriptUtil.upload("targeted-genotyping.sh")); |
6657 |
30 Mar 22 |
nicklas |
248 |
jobDef.addFile(ScriptUtil.upload("variantcall-utils.sh")); |
6657 |
30 Mar 22 |
nicklas |
249 |
jobDef.addFile(ScriptUtil.upload("reggie-utils.sh")); |
6657 |
30 Mar 22 |
nicklas |
250 |
jobDef.addFile(ScriptUtil.upload("stdwrap.sh")); |
6657 |
30 Mar 22 |
nicklas |
251 |
jobDef.addFile(ScriptUtil.upload("stderrwrap.sh")); |
6359 |
27 Aug 21 |
nicklas |
252 |
jobDef.setDebug(debug); |
6359 |
27 Aug 21 |
nicklas |
253 |
jobDef.setCmd(script.toString()); |
6359 |
27 Aug 21 |
nicklas |
254 |
jobDefs.add(jobDef); |
6359 |
27 Aug 21 |
nicklas |
255 |
} |
6359 |
27 Aug 21 |
nicklas |
256 |
|
6359 |
27 Aug 21 |
nicklas |
257 |
return jobDefs; |
6357 |
26 Aug 21 |
nicklas |
258 |
} |
6357 |
26 Aug 21 |
nicklas |
259 |
|
6357 |
26 Aug 21 |
nicklas |
260 |
|
6357 |
26 Aug 21 |
nicklas |
261 |
/** |
6357 |
26 Aug 21 |
nicklas |
Job completion handler for genotype analysis jobs. The handler downloads the |
6357 |
26 Aug 21 |
nicklas |
'files.out' file and create links to the files. |
6357 |
26 Aug 21 |
nicklas |
264 |
*/ |
6357 |
26 Aug 21 |
nicklas |
265 |
public static class TargetedGenotypeJobCompletionHandler |
6357 |
26 Aug 21 |
nicklas |
266 |
implements JobCompletionHandler |
6357 |
26 Aug 21 |
nicklas |
267 |
{ |
6357 |
26 Aug 21 |
nicklas |
268 |
|
6357 |
26 Aug 21 |
nicklas |
269 |
public TargetedGenotypeJobCompletionHandler() |
6357 |
26 Aug 21 |
nicklas |
270 |
{} |
6357 |
26 Aug 21 |
nicklas |
271 |
|
6357 |
26 Aug 21 |
nicklas |
272 |
@Override |
6357 |
26 Aug 21 |
nicklas |
273 |
public String jobCompleted(SessionControl sc, OpenGridSession session, Job job, JobStatus status) |
6357 |
26 Aug 21 |
nicklas |
274 |
{ |
6361 |
30 Aug 21 |
nicklas |
275 |
String jobName = status.getName(); |
6361 |
30 Aug 21 |
nicklas |
276 |
String files = session.getJobFileAsString(jobName, "files.out", "UTF-8"); |
6361 |
30 Aug 21 |
nicklas |
277 |
String msg = parseFiles(sc, job, files); |
6361 |
30 Aug 21 |
nicklas |
278 |
return msg == null ? "Targeted genotyping completed." : msg; |
6357 |
26 Aug 21 |
nicklas |
279 |
} |
6361 |
30 Aug 21 |
nicklas |
280 |
|
6361 |
30 Aug 21 |
nicklas |
281 |
private String parseFiles(SessionControl sc, Job job, String filesOut) |
6361 |
30 Aug 21 |
nicklas |
282 |
{ |
6361 |
30 Aug 21 |
nicklas |
283 |
|
6361 |
30 Aug 21 |
nicklas |
284 |
DbControl dc = null; |
6361 |
30 Aug 21 |
nicklas |
285 |
String msg = null; |
6361 |
30 Aug 21 |
nicklas |
286 |
try |
6361 |
30 Aug 21 |
nicklas |
287 |
{ |
6599 |
22 Feb 22 |
nicklas |
288 |
dc = sc.newDbControl("Reggie: Target genotyping completed handler"); |
6361 |
30 Aug 21 |
nicklas |
289 |
|
6361 |
30 Aug 21 |
nicklas |
290 |
RawBioAssay vCall = (RawBioAssay)job.getParameterValue("variantCall"); |
6361 |
30 Aug 21 |
nicklas |
291 |
Software software = (Software)job.getParameterValue("software"); |
6361 |
30 Aug 21 |
nicklas |
292 |
|
6361 |
30 Aug 21 |
nicklas |
// Create file links |
6361 |
30 Aug 21 |
nicklas |
294 |
boolean useExternalProjectArchive = Reggie.isExternalItem(vCall.getName()); |
6361 |
30 Aug 21 |
nicklas |
295 |
FileServer fileArchive = useExternalProjectArchive ? Fileserver.EXTERNAL_ARCHIVE.load(dc) : Fileserver.PROJECT_ARCHIVE.load(dc); |
6361 |
30 Aug 21 |
nicklas |
296 |
String analysisDir = useExternalProjectArchive ? Reggie.EXTERNAL_ANALYSIS_DIR : Reggie.SECONDARY_ANALYSIS_DIR; |
6361 |
30 Aug 21 |
nicklas |
297 |
|
6361 |
30 Aug 21 |
nicklas |
298 |
String dataFilesFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, vCall); |
6361 |
30 Aug 21 |
nicklas |
299 |
String baseFolder = Reggie.convertDataFilesFolderToBaseFolder(dataFilesFolder); |
6361 |
30 Aug 21 |
nicklas |
300 |
Directory localDataDir = Directory.getNew(dc, new Path(analysisDir+baseFolder, Path.Type.DIRECTORY)); |
6361 |
30 Aug 21 |
nicklas |
301 |
ItemSubtype vcfType = Subtype.VARIANT_CALL_FORMAT.load(dc); |
6361 |
30 Aug 21 |
nicklas |
302 |
|
6361 |
30 Aug 21 |
nicklas |
303 |
int lineNo = 0; |
6361 |
30 Aug 21 |
nicklas |
304 |
int numVcfFiles = 0; |
6361 |
30 Aug 21 |
nicklas |
305 |
int numGenotypes = 0; |
6388 |
15 Sep 21 |
nicklas |
306 |
int numVariants = 0; |
6365 |
02 Sep 21 |
nicklas |
307 |
String currentTarget = null; |
6388 |
15 Sep 21 |
nicklas |
308 |
TargetedGenotypeDefinition currentTargetDef = null; |
6361 |
30 Aug 21 |
nicklas |
309 |
for (String line : filesOut.split("\n")) |
6361 |
30 Aug 21 |
nicklas |
310 |
{ |
6361 |
30 Aug 21 |
nicklas |
311 |
lineNo++; |
6365 |
02 Sep 21 |
nicklas |
312 |
if (line.startsWith("[") && line.endsWith("]")) |
6365 |
02 Sep 21 |
nicklas |
313 |
{ |
6365 |
02 Sep 21 |
nicklas |
314 |
currentTarget = line.substring(1, line.length()-1); |
6388 |
15 Sep 21 |
nicklas |
315 |
currentTargetDef = getTargetByName(currentTarget); |
6365 |
02 Sep 21 |
nicklas |
316 |
continue; |
6365 |
02 Sep 21 |
nicklas |
317 |
} |
6361 |
30 Aug 21 |
nicklas |
318 |
|
6361 |
30 Aug 21 |
nicklas |
319 |
File f = File.getFile(dc, localDataDir, line.substring(line.lastIndexOf("/")+1), true); |
6361 |
30 Aug 21 |
nicklas |
320 |
f.setFileServer(fileArchive); |
6361 |
30 Aug 21 |
nicklas |
321 |
String fileUrl = "sftp://" + fileArchive.getHost() + dataFilesFolder + "/" + f.getName(); |
6361 |
30 Aug 21 |
nicklas |
322 |
try |
6361 |
30 Aug 21 |
nicklas |
323 |
{ |
6361 |
30 Aug 21 |
nicklas |
324 |
f.setUrl(fileUrl, true); |
6361 |
30 Aug 21 |
nicklas |
325 |
} |
6361 |
30 Aug 21 |
nicklas |
326 |
catch (RuntimeException ex) |
6361 |
30 Aug 21 |
nicklas |
327 |
{ |
6361 |
30 Aug 21 |
nicklas |
328 |
f.setUrl(fileUrl, false); |
6365 |
02 Sep 21 |
nicklas |
329 |
} |
6365 |
02 Sep 21 |
nicklas |
330 |
if (currentTarget != null) |
6365 |
02 Sep 21 |
nicklas |
331 |
{ |
6365 |
02 Sep 21 |
nicklas |
332 |
Annotationtype.TARGETED_GENOTYPE.setAnnotationValue(dc, f, currentTarget); |
6365 |
02 Sep 21 |
nicklas |
333 |
} |
6361 |
30 Aug 21 |
nicklas |
334 |
if (!f.isInDatabase()) dc.saveItem(f); |
6361 |
30 Aug 21 |
nicklas |
335 |
|
6361 |
30 Aug 21 |
nicklas |
336 |
AnyToAny link = AnyToAny.getNewOrExisting(dc, vCall, f.getName(), f, false); |
6361 |
30 Aug 21 |
nicklas |
337 |
if (!link.isInDatabase()) dc.saveItem(link); |
6410 |
20 Sep 21 |
nicklas |
338 |
|
6410 |
20 Sep 21 |
nicklas |
339 |
List<String> description = new ArrayList<String>(); |
6388 |
15 Sep 21 |
nicklas |
340 |
|
6361 |
30 Aug 21 |
nicklas |
341 |
if (f.getName().endsWith(".vcf")) |
6361 |
30 Aug 21 |
nicklas |
342 |
{ |
6361 |
30 Aug 21 |
nicklas |
343 |
numVcfFiles++; |
6361 |
30 Aug 21 |
nicklas |
344 |
InputStream vcfIn = null; |
6361 |
30 Aug 21 |
nicklas |
345 |
try |
6361 |
30 Aug 21 |
nicklas |
346 |
{ |
6368 |
06 Sep 21 |
nicklas |
347 |
VcfParser vcfParser = new VcfParser(); |
6368 |
06 Sep 21 |
nicklas |
348 |
vcfParser.setUseLineNoAsId(true); |
6361 |
30 Aug 21 |
nicklas |
349 |
vcfIn = f.getDownloadStream(0); |
6361 |
30 Aug 21 |
nicklas |
350 |
VcfData vcf = vcfParser.parse(vcfIn, f.getName()); |
6388 |
15 Sep 21 |
nicklas |
351 |
int numTargetGenotypes = vcfParser.getSnpCount(); |
6388 |
15 Sep 21 |
nicklas |
352 |
int numTargetVariants = vcf.getHomAltCount()+vcf.getHetCount(); |
6388 |
15 Sep 21 |
nicklas |
353 |
numGenotypes += numTargetGenotypes; |
6388 |
15 Sep 21 |
nicklas |
354 |
numVariants += numTargetVariants; |
6388 |
15 Sep 21 |
nicklas |
355 |
Annotationtype.NUM_TARGETED_GENOTYPES.setAnnotationValue(dc, f, numTargetGenotypes); |
6388 |
15 Sep 21 |
nicklas |
356 |
Annotationtype.NUM_TARGETED_VARIANTS.setAnnotationValue(dc, f, numTargetVariants); |
6410 |
20 Sep 21 |
nicklas |
357 |
description.add( |
6410 |
20 Sep 21 |
nicklas |
358 |
"Found "+(numTargetVariants==1?"1 variant":numTargetVariants+" variants")+" in " + |
6410 |
20 Sep 21 |
nicklas |
359 |
(numTargetGenotypes==1?"1 genotyped target.":numTargetGenotypes+ " genotyped targets.") |
6410 |
20 Sep 21 |
nicklas |
360 |
); |
6361 |
30 Aug 21 |
nicklas |
361 |
} |
6361 |
30 Aug 21 |
nicklas |
362 |
catch (Exception ex) |
6361 |
30 Aug 21 |
nicklas |
363 |
{ |
6361 |
30 Aug 21 |
nicklas |
364 |
throw new BaseException("Could not parse '" + f.getName() + "'", ex); |
6361 |
30 Aug 21 |
nicklas |
365 |
} |
6361 |
30 Aug 21 |
nicklas |
366 |
finally |
6361 |
30 Aug 21 |
nicklas |
367 |
{ |
6361 |
30 Aug 21 |
nicklas |
368 |
FileUtil.close(vcfIn); |
6361 |
30 Aug 21 |
nicklas |
369 |
} |
6361 |
30 Aug 21 |
nicklas |
370 |
} |
6410 |
20 Sep 21 |
nicklas |
371 |
|
6411 |
20 Sep 21 |
nicklas |
372 |
if (currentTargetDef != null) |
6411 |
20 Sep 21 |
nicklas |
373 |
{ |
6411 |
20 Sep 21 |
nicklas |
374 |
description.add(currentTargetDef.getDescription()); |
6411 |
20 Sep 21 |
nicklas |
375 |
} |
6410 |
20 Sep 21 |
nicklas |
376 |
f.setDescription(Reggie.joinTexts(description)); |
6410 |
20 Sep 21 |
nicklas |
377 |
if (software != null) |
6410 |
20 Sep 21 |
nicklas |
378 |
{ |
6411 |
20 Sep 21 |
nicklas |
379 |
description.add("Created with " + software.getName()); |
6410 |
20 Sep 21 |
nicklas |
380 |
AnyToAny swLink = AnyToAny.getNewOrExisting(dc, f, "software", software, false); |
6410 |
20 Sep 21 |
nicklas |
381 |
if (!swLink.isInDatabase()) dc.saveItem(swLink); |
6410 |
20 Sep 21 |
nicklas |
382 |
} |
6411 |
20 Sep 21 |
nicklas |
383 |
link.setDescription(Reggie.joinTexts(description)); |
6361 |
30 Aug 21 |
nicklas |
384 |
} |
6388 |
15 Sep 21 |
nicklas |
385 |
msg = "Genotyped " + numGenotypes + " targets in " + (numVcfFiles==1?"1 files":numVcfFiles+" files") + |
6388 |
15 Sep 21 |
nicklas |
386 |
". Found " + (numVariants==1?"1 variant":numVariants+" variants")+"."; |
6361 |
30 Aug 21 |
nicklas |
387 |
dc.commit(); |
6361 |
30 Aug 21 |
nicklas |
388 |
} |
6361 |
30 Aug 21 |
nicklas |
389 |
finally |
6361 |
30 Aug 21 |
nicklas |
390 |
{ |
6361 |
30 Aug 21 |
nicklas |
391 |
if (dc != null) dc.close(); |
6361 |
30 Aug 21 |
nicklas |
392 |
} |
6361 |
30 Aug 21 |
nicklas |
393 |
|
6361 |
30 Aug 21 |
nicklas |
394 |
return msg == null ? "" : msg; |
6361 |
30 Aug 21 |
nicklas |
395 |
} |
6361 |
30 Aug 21 |
nicklas |
396 |
|
6360 |
27 Aug 21 |
nicklas |
397 |
} |
6360 |
27 Aug 21 |
nicklas |
398 |
|
6360 |
27 Aug 21 |
nicklas |
399 |
public static class TargetedGenotypeDefinition |
6360 |
27 Aug 21 |
nicklas |
400 |
{ |
6360 |
27 Aug 21 |
nicklas |
401 |
private final String name; |
6360 |
27 Aug 21 |
nicklas |
402 |
private final String description; |
6360 |
27 Aug 21 |
nicklas |
403 |
private final String vcfFile; |
6357 |
26 Aug 21 |
nicklas |
404 |
|
6360 |
27 Aug 21 |
nicklas |
405 |
TargetedGenotypeDefinition(Element e) |
6360 |
27 Aug 21 |
nicklas |
406 |
{ |
6360 |
27 Aug 21 |
nicklas |
407 |
this.name = e.getAttributeValue("name"); |
6360 |
27 Aug 21 |
nicklas |
408 |
this.vcfFile = e.getTextTrim(); |
6360 |
27 Aug 21 |
nicklas |
409 |
this.description = e.getAttributeValue("description"); |
6360 |
27 Aug 21 |
nicklas |
410 |
} |
6360 |
27 Aug 21 |
nicklas |
411 |
|
6360 |
27 Aug 21 |
nicklas |
412 |
/** |
6360 |
27 Aug 21 |
nicklas |
Get the name of the target. Configured in reggie-config.xml, "name" attribute of the "target" tag. |
6360 |
27 Aug 21 |
nicklas |
414 |
*/ |
6360 |
27 Aug 21 |
nicklas |
415 |
public String getName() |
6360 |
27 Aug 21 |
nicklas |
416 |
{ |
6360 |
27 Aug 21 |
nicklas |
417 |
return name; |
6360 |
27 Aug 21 |
nicklas |
418 |
} |
6360 |
27 Aug 21 |
nicklas |
419 |
|
6360 |
27 Aug 21 |
nicklas |
420 |
/** |
6360 |
27 Aug 21 |
nicklas |
Get a description of the target definition. |
6360 |
27 Aug 21 |
nicklas |
422 |
*/ |
6360 |
27 Aug 21 |
nicklas |
423 |
public String getDescription() |
6360 |
27 Aug 21 |
nicklas |
424 |
{ |
6360 |
27 Aug 21 |
nicklas |
425 |
return description; |
6360 |
27 Aug 21 |
nicklas |
426 |
} |
6360 |
27 Aug 21 |
nicklas |
427 |
|
6360 |
27 Aug 21 |
nicklas |
428 |
/** |
6360 |
27 Aug 21 |
nicklas |
Get the name of the VCF file with variant definitions. Configured in reggie-config.xml, |
6360 |
27 Aug 21 |
nicklas |
value of the "target" tag. |
6360 |
27 Aug 21 |
nicklas |
431 |
*/ |
6360 |
27 Aug 21 |
nicklas |
432 |
public String getVcfFile() |
6360 |
27 Aug 21 |
nicklas |
433 |
{ |
6360 |
27 Aug 21 |
nicklas |
434 |
return vcfFile; |
6360 |
27 Aug 21 |
nicklas |
435 |
} |
6360 |
27 Aug 21 |
nicklas |
436 |
|
6360 |
27 Aug 21 |
nicklas |
437 |
public JSONObject asJSONObject() |
6360 |
27 Aug 21 |
nicklas |
438 |
{ |
6360 |
27 Aug 21 |
nicklas |
439 |
JSONObject json = new JSONObject(); |
6360 |
27 Aug 21 |
nicklas |
440 |
json.put("name", name); |
6360 |
27 Aug 21 |
nicklas |
441 |
json.put("description", description); |
6360 |
27 Aug 21 |
nicklas |
442 |
json.put("vcfFile", vcfFile); |
6360 |
27 Aug 21 |
nicklas |
443 |
return json; |
6360 |
27 Aug 21 |
nicklas |
444 |
} |
6357 |
26 Aug 21 |
nicklas |
445 |
|
6357 |
26 Aug 21 |
nicklas |
446 |
} |
6360 |
27 Aug 21 |
nicklas |
447 |
|
6357 |
26 Aug 21 |
nicklas |
448 |
} |