5030 |
16 Oct 18 |
nicklas |
1 |
package net.sf.basedb.reggie.grid; |
5030 |
16 Oct 18 |
nicklas |
2 |
|
5049 |
23 Oct 18 |
nicklas |
3 |
import java.io.IOException; |
5039 |
19 Oct 18 |
nicklas |
4 |
import java.io.InputStream; |
5032 |
16 Oct 18 |
nicklas |
5 |
import java.util.ArrayList; |
5030 |
16 Oct 18 |
nicklas |
6 |
import java.util.List; |
5046 |
22 Oct 18 |
nicklas |
7 |
import java.util.Locale; |
5030 |
16 Oct 18 |
nicklas |
8 |
|
5030 |
16 Oct 18 |
nicklas |
9 |
import org.slf4j.LoggerFactory; |
5030 |
16 Oct 18 |
nicklas |
10 |
|
5033 |
17 Oct 18 |
nicklas |
11 |
import net.sf.basedb.core.AnyToAny; |
5030 |
16 Oct 18 |
nicklas |
12 |
import net.sf.basedb.core.DbControl; |
5032 |
16 Oct 18 |
nicklas |
13 |
import net.sf.basedb.core.DerivedBioAssay; |
5033 |
17 Oct 18 |
nicklas |
14 |
import net.sf.basedb.core.Directory; |
5032 |
16 Oct 18 |
nicklas |
15 |
import net.sf.basedb.core.File; |
5033 |
17 Oct 18 |
nicklas |
16 |
import net.sf.basedb.core.FileServer; |
5032 |
16 Oct 18 |
nicklas |
17 |
import net.sf.basedb.core.ItemList; |
5032 |
16 Oct 18 |
nicklas |
18 |
import net.sf.basedb.core.ItemNotFoundException; |
5033 |
17 Oct 18 |
nicklas |
19 |
import net.sf.basedb.core.ItemParameterType; |
5039 |
19 Oct 18 |
nicklas |
20 |
import net.sf.basedb.core.ItemSubtype; |
5030 |
16 Oct 18 |
nicklas |
21 |
import net.sf.basedb.core.Job; |
5033 |
17 Oct 18 |
nicklas |
22 |
import net.sf.basedb.core.Path; |
5030 |
16 Oct 18 |
nicklas |
23 |
import net.sf.basedb.core.SessionControl; |
5030 |
16 Oct 18 |
nicklas |
24 |
import net.sf.basedb.core.Software; |
6021 |
23 Oct 20 |
nicklas |
25 |
import net.sf.basedb.core.StringParameterType; |
5030 |
16 Oct 18 |
nicklas |
26 |
import net.sf.basedb.opengrid.JobDefinition; |
5030 |
16 Oct 18 |
nicklas |
27 |
import net.sf.basedb.opengrid.JobStatus; |
5030 |
16 Oct 18 |
nicklas |
28 |
import net.sf.basedb.opengrid.OpenGridCluster; |
5030 |
16 Oct 18 |
nicklas |
29 |
import net.sf.basedb.opengrid.OpenGridSession; |
5032 |
16 Oct 18 |
nicklas |
30 |
import net.sf.basedb.opengrid.ScriptBuilder; |
5032 |
16 Oct 18 |
nicklas |
31 |
import net.sf.basedb.opengrid.config.ClusterConfig; |
5032 |
16 Oct 18 |
nicklas |
32 |
import net.sf.basedb.opengrid.config.JobConfig; |
5030 |
16 Oct 18 |
nicklas |
33 |
import net.sf.basedb.opengrid.service.JobCompletionHandler; |
5032 |
16 Oct 18 |
nicklas |
34 |
import net.sf.basedb.reggie.Reggie; |
5032 |
16 Oct 18 |
nicklas |
35 |
import net.sf.basedb.reggie.XmlConfig; |
5040 |
19 Oct 18 |
nicklas |
36 |
import net.sf.basedb.reggie.baf.BafData; |
5050 |
24 Oct 18 |
nicklas |
37 |
import net.sf.basedb.reggie.baf.MBafOptions; |
5040 |
19 Oct 18 |
nicklas |
38 |
import net.sf.basedb.reggie.baf.MBafParser; |
5049 |
23 Oct 18 |
nicklas |
39 |
import net.sf.basedb.reggie.baf.MBafPlot; |
5050 |
24 Oct 18 |
nicklas |
40 |
import net.sf.basedb.reggie.baf.Region; |
5049 |
23 Oct 18 |
nicklas |
41 |
import net.sf.basedb.reggie.baf.MBafPlot.Result; |
5030 |
16 Oct 18 |
nicklas |
42 |
import net.sf.basedb.reggie.dao.AlignedSequences; |
5032 |
16 Oct 18 |
nicklas |
43 |
import net.sf.basedb.reggie.dao.Annotationtype; |
5032 |
16 Oct 18 |
nicklas |
44 |
import net.sf.basedb.reggie.dao.BiomaterialList; |
5032 |
16 Oct 18 |
nicklas |
45 |
import net.sf.basedb.reggie.dao.Datafiletype; |
5033 |
17 Oct 18 |
nicklas |
46 |
import net.sf.basedb.reggie.dao.Fileserver; |
5032 |
16 Oct 18 |
nicklas |
47 |
import net.sf.basedb.reggie.dao.Library; |
5032 |
16 Oct 18 |
nicklas |
48 |
import net.sf.basedb.reggie.dao.Subtype; |
5039 |
19 Oct 18 |
nicklas |
49 |
import net.sf.basedb.util.FileUtil; |
5032 |
16 Oct 18 |
nicklas |
50 |
import net.sf.basedb.util.Values; |
7079 |
27 Mar 23 |
nicklas |
51 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7079 |
27 Mar 23 |
nicklas |
52 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
5030 |
16 Oct 18 |
nicklas |
53 |
|
5030 |
16 Oct 18 |
nicklas |
54 |
/** |
5030 |
16 Oct 18 |
nicklas |
Helper class for creating items needed for generating mBAF analysis |
5030 |
16 Oct 18 |
nicklas |
script and send it to the cluster for execution. |
5030 |
16 Oct 18 |
nicklas |
57 |
|
5030 |
16 Oct 18 |
nicklas |
@author nicklas |
5030 |
16 Oct 18 |
nicklas |
@since 4.20 |
5030 |
16 Oct 18 |
nicklas |
60 |
*/ |
5030 |
16 Oct 18 |
nicklas |
61 |
public class MBafJobCreator |
6674 |
11 Apr 22 |
nicklas |
62 |
extends AbstractJobCreator |
5030 |
16 Oct 18 |
nicklas |
63 |
{ |
5030 |
16 Oct 18 |
nicklas |
64 |
private Software software; |
5030 |
16 Oct 18 |
nicklas |
65 |
|
5030 |
16 Oct 18 |
nicklas |
66 |
public MBafJobCreator() |
5030 |
16 Oct 18 |
nicklas |
67 |
{} |
5030 |
16 Oct 18 |
nicklas |
68 |
|
5030 |
16 Oct 18 |
nicklas |
69 |
/** |
5030 |
16 Oct 18 |
nicklas |
Set the software item to set on created VCF files. |
5030 |
16 Oct 18 |
nicklas |
71 |
*/ |
5030 |
16 Oct 18 |
nicklas |
72 |
public void setSoftware(Software software) |
5030 |
16 Oct 18 |
nicklas |
73 |
{ |
5030 |
16 Oct 18 |
nicklas |
74 |
this.software = software; |
5030 |
16 Oct 18 |
nicklas |
75 |
} |
5030 |
16 Oct 18 |
nicklas |
76 |
|
5030 |
16 Oct 18 |
nicklas |
77 |
/** |
5030 |
16 Oct 18 |
nicklas |
Schedule jobs on the given cluster for running mBAF analysis. |
5030 |
16 Oct 18 |
nicklas |
@return A list with the corresponding jobs in BASE |
5030 |
16 Oct 18 |
nicklas |
80 |
*/ |
5030 |
16 Oct 18 |
nicklas |
81 |
public List<JobDefinition> createMBafJobs(DbControl dc, OpenGridCluster cluster, List<AlignedSequences> alignedSequences) |
5030 |
16 Oct 18 |
nicklas |
82 |
{ |
5032 |
16 Oct 18 |
nicklas |
83 |
SessionControl sc = dc.getSessionControl(); |
5032 |
16 Oct 18 |
nicklas |
84 |
|
5032 |
16 Oct 18 |
nicklas |
85 |
ClusterConfig clusterCfg = cluster.getConfig(); |
5032 |
16 Oct 18 |
nicklas |
86 |
XmlConfig cfg = Reggie.getConfig(cluster.getId()); |
5032 |
16 Oct 18 |
nicklas |
87 |
if (cfg == null) |
5032 |
16 Oct 18 |
nicklas |
88 |
{ |
5032 |
16 Oct 18 |
nicklas |
89 |
throw new ItemNotFoundException("No configuration in reggie-config.xml for cluster: " + cluster.getId()); |
5032 |
16 Oct 18 |
nicklas |
90 |
} |
5032 |
16 Oct 18 |
nicklas |
91 |
String parameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, software); |
5032 |
16 Oct 18 |
nicklas |
92 |
|
5032 |
16 Oct 18 |
nicklas |
// Get global options |
6693 |
22 Apr 22 |
nicklas |
94 |
String global_env = ScriptUtil.multilineIndent(cfg.getConfig("global-env")); |
6653 |
23 Mar 22 |
nicklas |
95 |
String projectArchive = cfg.getRequiredConfig("project-archive", null); |
6653 |
23 Mar 22 |
nicklas |
96 |
String externalArchive = cfg.getConfig("external-archive", null, projectArchive); |
5032 |
16 Oct 18 |
nicklas |
97 |
|
6653 |
23 Mar 22 |
nicklas |
// HaplotypeCaller |
7372 |
06 Oct 23 |
nicklas |
99 |
String mbaf_submit = cfg.getConfig("mbaf/submit", parameterSet, null); |
7372 |
06 Oct 23 |
nicklas |
100 |
String mbaf_submit_debug = cfg.getConfig("mbaf/submit-debug", parameterSet, null); |
6653 |
23 Mar 22 |
nicklas |
101 |
String mbaf_env = ScriptUtil.multilineIndent(cfg.getRequiredConfig("mbaf/env", parameterSet)); |
6665 |
05 Apr 22 |
nicklas |
102 |
String mbaf_envdebug = ScriptUtil.multilineIndent(cfg.getConfig("mbaf/env-debug", parameterSet, null)); |
6653 |
23 Mar 22 |
nicklas |
103 |
String mbaf_execute = ScriptUtil.multilineIndent(cfg.getConfig("mbaf/execute", parameterSet, "./mbaf.sh")); |
5032 |
16 Oct 18 |
nicklas |
104 |
|
5032 |
16 Oct 18 |
nicklas |
// Selected items must be removed from this list |
5032 |
16 Oct 18 |
nicklas |
106 |
ItemList mbafPipeline = BiomaterialList.MBAF_PIPELINE.load(dc); |
5032 |
16 Oct 18 |
nicklas |
107 |
|
5032 |
16 Oct 18 |
nicklas |
// Options common for all jobs |
5032 |
16 Oct 18 |
nicklas |
109 |
JobConfig jobConfig = new JobConfig(); |
5032 |
16 Oct 18 |
nicklas |
110 |
if (priority != null) jobConfig.setPriority(priority); |
7372 |
06 Oct 23 |
nicklas |
111 |
if (partition != null) jobConfig.setSbatchOption("partition", ScriptUtil.checkValidScriptParameter(partition)); |
7372 |
06 Oct 23 |
nicklas |
112 |
jobConfig.convertOptionsTo(clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
113 |
if (submitOptionsOverride != null) |
7372 |
06 Oct 23 |
nicklas |
114 |
{ |
7372 |
06 Oct 23 |
nicklas |
115 |
ScriptUtil.addSubmitOptions(jobConfig, submitOptionsOverride, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
116 |
} |
7372 |
06 Oct 23 |
nicklas |
117 |
else |
7372 |
06 Oct 23 |
nicklas |
118 |
{ |
7372 |
06 Oct 23 |
nicklas |
119 |
ScriptUtil.addSubmitOptions(jobConfig, mbaf_submit, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
120 |
if (debug) ScriptUtil.addSubmitOptions(jobConfig, mbaf_submit_debug, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
121 |
} |
5032 |
16 Oct 18 |
nicklas |
122 |
|
5032 |
16 Oct 18 |
nicklas |
// We submit one job for each raw bioassay to the cluster |
5032 |
16 Oct 18 |
nicklas |
124 |
List<JobDefinition> jobDefs = new ArrayList<JobDefinition>(alignedSequences.size()); |
5032 |
16 Oct 18 |
nicklas |
125 |
|
5032 |
16 Oct 18 |
nicklas |
126 |
for (AlignedSequences as : alignedSequences) |
5032 |
16 Oct 18 |
nicklas |
127 |
{ |
5032 |
16 Oct 18 |
nicklas |
128 |
as = AlignedSequences.getById(dc, as.getId()); // Ensure item is loaded in this transaction |
5032 |
16 Oct 18 |
nicklas |
129 |
|
5032 |
16 Oct 18 |
nicklas |
// Get some information about the aligned data that we need |
5032 |
16 Oct 18 |
nicklas |
131 |
DerivedBioAssay aligned = as.getDerivedBioAssay(); |
5364 |
16 Apr 19 |
nicklas |
132 |
mbafPipeline.removeItem(aligned); |
5032 |
16 Oct 18 |
nicklas |
133 |
|
5032 |
16 Oct 18 |
nicklas |
134 |
Library lib = Library.get(aligned.getExtract()); |
5596 |
11 Sep 19 |
nicklas |
135 |
boolean isExternal = Reggie.isExternalItem(aligned.getName()); |
6653 |
23 Mar 22 |
nicklas |
136 |
String archiveFolder = isExternal ? externalArchive : projectArchive; |
5032 |
16 Oct 18 |
nicklas |
137 |
String bamFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, aligned); |
5032 |
16 Oct 18 |
nicklas |
138 |
File bamFile = Datafiletype.BAM.getFile(dc, aligned); |
5032 |
16 Oct 18 |
nicklas |
139 |
|
5032 |
16 Oct 18 |
nicklas |
// Create job |
5032 |
16 Oct 18 |
nicklas |
141 |
Job mBafJob = Job.getNew(dc, null, null, null); |
5032 |
16 Oct 18 |
nicklas |
142 |
mBafJob.setItemSubtype(Subtype.MBAF_JOB.get(dc)); |
5032 |
16 Oct 18 |
nicklas |
143 |
mBafJob.setPluginVersion("reggie-"+Reggie.VERSION); |
5032 |
16 Oct 18 |
nicklas |
144 |
mBafJob.setSendMessage(Values.getBoolean(sc.getUserClientSetting("plugins.sendmessage"), false)); |
5032 |
16 Oct 18 |
nicklas |
145 |
mBafJob.setName("Run mBAF analysis " + aligned.getName()); |
5033 |
17 Oct 18 |
nicklas |
146 |
mBafJob.setParameterValue("alignment", new ItemParameterType<DerivedBioAssay>(DerivedBioAssay.class, null), aligned); |
5057 |
29 Oct 18 |
nicklas |
147 |
if (software != null) |
5057 |
29 Oct 18 |
nicklas |
148 |
{ |
5057 |
29 Oct 18 |
nicklas |
149 |
mBafJob.setParameterValue("software", new ItemParameterType<Software>(Software.class, null), software); |
5057 |
29 Oct 18 |
nicklas |
150 |
} |
5057 |
29 Oct 18 |
nicklas |
151 |
|
5032 |
16 Oct 18 |
nicklas |
152 |
if (debug) mBafJob.setName(mBafJob.getName() + " (debug)"); |
6981 |
17 Jan 23 |
nicklas |
153 |
if (partition != null) mBafJob.setParameterValue("partition", new StringParameterType(), partition); |
6022 |
26 Oct 20 |
nicklas |
// Register a handler for auto-confirmation (MBafAutoConfirmer) |
5035 |
17 Oct 18 |
nicklas |
155 |
if (autoConfirm) |
5035 |
17 Oct 18 |
nicklas |
156 |
{ |
6022 |
26 Oct 20 |
nicklas |
157 |
mBafJob.setParameterValue("AutoConfirmHandler", new StringParameterType(), "MBafAutoConfirmer"); |
5035 |
17 Oct 18 |
nicklas |
158 |
} |
7372 |
06 Oct 23 |
nicklas |
159 |
if (submitOptionsOverride != null) mBafJob.setParameterValue("jobOptions", new StringParameterType(), submitOptionsOverride); |
6022 |
26 Oct 20 |
nicklas |
160 |
dc.saveItem(mBafJob); |
5035 |
17 Oct 18 |
nicklas |
161 |
|
5032 |
16 Oct 18 |
nicklas |
// Checks to make sure no bad things are included in script file |
5032 |
16 Oct 18 |
nicklas |
163 |
String bamName = ScriptUtil.checkValidScriptParameter(bamFile.getName()); |
5032 |
16 Oct 18 |
nicklas |
164 |
|
5032 |
16 Oct 18 |
nicklas |
165 |
ScriptBuilder script = new ScriptBuilder(); |
6665 |
05 Apr 22 |
nicklas |
166 |
script.cmd(debug ? "set -ex" : "set -e"); |
5596 |
11 Sep 19 |
nicklas |
// Set file permissions based on consent or external group! |
5596 |
11 Sep 19 |
nicklas |
168 |
String externalGroup = isExternal ? Reggie.getExternalGroup(aligned.getName()) : null; |
6626 |
04 Mar 22 |
nicklas |
169 |
ScriptUtil.setUmaskForItem(dc, lib, externalGroup, script); |
6631 |
08 Mar 22 |
nicklas |
170 |
script.newLine(); |
6693 |
22 Apr 22 |
nicklas |
171 |
script.cmd(global_env); |
6653 |
23 Mar 22 |
nicklas |
172 |
script.export("ArchiveFolder", archiveFolder); |
6653 |
23 Mar 22 |
nicklas |
173 |
script.export("BamFolder", "${ArchiveFolder}"+bamFolder); |
6631 |
08 Mar 22 |
nicklas |
174 |
script.export("BamName", bamName.replace(".bam", "")); |
6631 |
08 Mar 22 |
nicklas |
175 |
script.newLine(); |
6653 |
23 Mar 22 |
nicklas |
176 |
script.cmd(mbaf_env); |
6665 |
05 Apr 22 |
nicklas |
177 |
if (debug) script.cmd(mbaf_envdebug); |
6628 |
07 Mar 22 |
nicklas |
178 |
script.cmd(mbaf_execute); |
5596 |
11 Sep 19 |
nicklas |
179 |
if (externalGroup != null) |
5596 |
11 Sep 19 |
nicklas |
180 |
{ |
5930 |
06 May 20 |
nicklas |
181 |
ScriptUtil.addChgrp(externalGroup, "${BamFolder}/mbaf*", aligned.getName(), null, script); |
5596 |
11 Sep 19 |
nicklas |
182 |
} |
5596 |
11 Sep 19 |
nicklas |
183 |
|
6674 |
11 Apr 22 |
nicklas |
184 |
JobDefinition jobDef = new JobDefinition("mBafAnalysis", jobConfig, batchConfig, mBafJob); |
6628 |
07 Mar 22 |
nicklas |
185 |
jobDef.addFile(ScriptUtil.upload("mbaf.sh")); |
6628 |
07 Mar 22 |
nicklas |
186 |
jobDef.addFile(ScriptUtil.upload("reggie-utils.sh")); |
6628 |
07 Mar 22 |
nicklas |
187 |
jobDef.addFile(ScriptUtil.upload("stdwrap.sh")); |
5032 |
16 Oct 18 |
nicklas |
188 |
jobDef.setDebug(debug); |
6628 |
07 Mar 22 |
nicklas |
189 |
jobDef.setCmd(script.toString()); |
5032 |
16 Oct 18 |
nicklas |
190 |
jobDefs.add(jobDef); |
5032 |
16 Oct 18 |
nicklas |
191 |
} |
5032 |
16 Oct 18 |
nicklas |
192 |
|
5032 |
16 Oct 18 |
nicklas |
193 |
return jobDefs; |
5030 |
16 Oct 18 |
nicklas |
194 |
} |
5030 |
16 Oct 18 |
nicklas |
195 |
|
5030 |
16 Oct 18 |
nicklas |
196 |
|
5030 |
16 Oct 18 |
nicklas |
197 |
/** |
5030 |
16 Oct 18 |
nicklas |
Job completion handler for mBAF analysis jobs. The handler downloads the |
5030 |
16 Oct 18 |
nicklas |
'files.out' file and create links to the files. |
5030 |
16 Oct 18 |
nicklas |
200 |
*/ |
5030 |
16 Oct 18 |
nicklas |
201 |
public static class MBafJobCompletionHandler |
5030 |
16 Oct 18 |
nicklas |
202 |
implements JobCompletionHandler |
5030 |
16 Oct 18 |
nicklas |
203 |
{ |
7079 |
27 Mar 23 |
nicklas |
204 |
private static final ExtensionsLogger logger = |
7079 |
27 Mar 23 |
nicklas |
205 |
ExtensionsLog.getLogger(JobCompletionHandlerFactory.ID, true).wrap(LoggerFactory.getLogger(MBafJobCompletionHandler.class)); |
5030 |
16 Oct 18 |
nicklas |
206 |
|
5030 |
16 Oct 18 |
nicklas |
207 |
public MBafJobCompletionHandler() |
5030 |
16 Oct 18 |
nicklas |
208 |
{} |
5030 |
16 Oct 18 |
nicklas |
209 |
|
5030 |
16 Oct 18 |
nicklas |
210 |
@Override |
5030 |
16 Oct 18 |
nicklas |
211 |
public String jobCompleted(SessionControl sc, OpenGridSession session, Job job, JobStatus status) |
5030 |
16 Oct 18 |
nicklas |
212 |
{ |
5033 |
17 Oct 18 |
nicklas |
213 |
String jobName = status.getName(); |
5033 |
17 Oct 18 |
nicklas |
214 |
String files = session.getJobFileAsString(jobName, "files.out", "UTF-8"); |
5033 |
17 Oct 18 |
nicklas |
215 |
String msg = parseFiles(sc, job, files); |
5033 |
17 Oct 18 |
nicklas |
216 |
return "mBAF analysis completed. " + msg; |
5030 |
16 Oct 18 |
nicklas |
217 |
} |
5030 |
16 Oct 18 |
nicklas |
218 |
|
5033 |
17 Oct 18 |
nicklas |
219 |
private String parseFiles(SessionControl sc, Job job, String filesOut) |
5033 |
17 Oct 18 |
nicklas |
220 |
{ |
5033 |
17 Oct 18 |
nicklas |
221 |
|
5033 |
17 Oct 18 |
nicklas |
222 |
DbControl dc = null; |
5033 |
17 Oct 18 |
nicklas |
223 |
String msg = null; |
5033 |
17 Oct 18 |
nicklas |
224 |
try |
5033 |
17 Oct 18 |
nicklas |
225 |
{ |
6599 |
22 Feb 22 |
nicklas |
226 |
dc = sc.newDbControl("Reggie: mBAF analysis completed handler"); |
5033 |
17 Oct 18 |
nicklas |
227 |
|
5033 |
17 Oct 18 |
nicklas |
228 |
DerivedBioAssay aligned = (DerivedBioAssay)job.getParameterValue("alignment"); |
5033 |
17 Oct 18 |
nicklas |
229 |
aligned = DerivedBioAssay.getById(dc, aligned.getId()); |
5057 |
29 Oct 18 |
nicklas |
230 |
Software software = (Software)job.getParameterValue("software"); |
5033 |
17 Oct 18 |
nicklas |
231 |
|
5033 |
17 Oct 18 |
nicklas |
// Create file links |
5553 |
12 Aug 19 |
nicklas |
233 |
boolean useExternalProjectArchive = Reggie.isExternalItem(aligned.getName()); |
5033 |
17 Oct 18 |
nicklas |
234 |
FileServer fileArchive = useExternalProjectArchive ? Fileserver.EXTERNAL_ARCHIVE.load(dc) : Fileserver.PROJECT_ARCHIVE.load(dc); |
5033 |
17 Oct 18 |
nicklas |
235 |
String analysisDir = useExternalProjectArchive ? Reggie.EXTERNAL_ANALYSIS_DIR : Reggie.SECONDARY_ANALYSIS_DIR; |
5033 |
17 Oct 18 |
nicklas |
236 |
|
5033 |
17 Oct 18 |
nicklas |
237 |
String dataFilesFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, aligned); |
5033 |
17 Oct 18 |
nicklas |
238 |
String baseFolder = Reggie.convertDataFilesFolderToBaseFolder(dataFilesFolder); |
5033 |
17 Oct 18 |
nicklas |
239 |
Directory localDataDir = Directory.getNew(dc, new Path(analysisDir+baseFolder, Path.Type.DIRECTORY)); |
5039 |
19 Oct 18 |
nicklas |
240 |
ItemSubtype vcfType = Subtype.VARIANT_CALL_FORMAT.load(dc); |
5033 |
17 Oct 18 |
nicklas |
241 |
|
5033 |
17 Oct 18 |
nicklas |
242 |
int lineNo = 0; |
5033 |
17 Oct 18 |
nicklas |
243 |
for (String line : filesOut.split("\n")) |
5033 |
17 Oct 18 |
nicklas |
244 |
{ |
5033 |
17 Oct 18 |
nicklas |
245 |
lineNo++; |
5033 |
17 Oct 18 |
nicklas |
246 |
|
5033 |
17 Oct 18 |
nicklas |
247 |
File f = File.getFile(dc, localDataDir, line.substring(line.lastIndexOf("/")+1), true); |
5033 |
17 Oct 18 |
nicklas |
248 |
f.setFileServer(fileArchive); |
5033 |
17 Oct 18 |
nicklas |
249 |
String fileUrl = "sftp://" + fileArchive.getHost() + dataFilesFolder + "/" + f.getName(); |
5033 |
17 Oct 18 |
nicklas |
250 |
try |
5033 |
17 Oct 18 |
nicklas |
251 |
{ |
5033 |
17 Oct 18 |
nicklas |
252 |
f.setUrl(fileUrl, true); |
5033 |
17 Oct 18 |
nicklas |
253 |
} |
5033 |
17 Oct 18 |
nicklas |
254 |
catch (RuntimeException ex) |
5033 |
17 Oct 18 |
nicklas |
255 |
{ |
5033 |
17 Oct 18 |
nicklas |
256 |
f.setUrl(fileUrl, false); |
5033 |
17 Oct 18 |
nicklas |
257 |
} |
5033 |
17 Oct 18 |
nicklas |
258 |
|
5033 |
17 Oct 18 |
nicklas |
259 |
if (!f.isInDatabase()) |
5033 |
17 Oct 18 |
nicklas |
260 |
{ |
5033 |
17 Oct 18 |
nicklas |
261 |
dc.saveItem(f); |
5033 |
17 Oct 18 |
nicklas |
262 |
} |
5033 |
17 Oct 18 |
nicklas |
263 |
|
5064 |
31 Oct 18 |
nicklas |
264 |
AnyToAny link = AnyToAny.getNewOrExisting(dc, aligned, f.getName(), f, false); |
5033 |
17 Oct 18 |
nicklas |
265 |
if (!link.isInDatabase()) dc.saveItem(link); |
5039 |
19 Oct 18 |
nicklas |
266 |
|
5039 |
19 Oct 18 |
nicklas |
267 |
if ("mbaf_genotype.vcf".equals(f.getName())) |
5039 |
19 Oct 18 |
nicklas |
268 |
{ |
5039 |
19 Oct 18 |
nicklas |
269 |
f.setMimeTypeAuto("text/plain", vcfType); |
5057 |
29 Oct 18 |
nicklas |
270 |
if (software != null) |
5057 |
29 Oct 18 |
nicklas |
271 |
{ |
5057 |
29 Oct 18 |
nicklas |
272 |
link.setDescription("Created with " + software.getName()); |
5064 |
31 Oct 18 |
nicklas |
273 |
AnyToAny swLink = AnyToAny.getNewOrExisting(dc, f, "software", software, false); |
5057 |
29 Oct 18 |
nicklas |
274 |
if (!swLink.isInDatabase()) dc.saveItem(swLink); |
5057 |
29 Oct 18 |
nicklas |
275 |
} |
5057 |
29 Oct 18 |
nicklas |
276 |
|
5050 |
24 Oct 18 |
nicklas |
277 |
MBafOptions options = new MBafOptions(); |
5050 |
24 Oct 18 |
nicklas |
278 |
MBafParser parser = new MBafParser(Region.defaultRegions(), options); |
5040 |
19 Oct 18 |
nicklas |
279 |
BafData bafData = parseVcf(parser, f); |
5040 |
19 Oct 18 |
nicklas |
280 |
if (bafData != null) |
5039 |
19 Oct 18 |
nicklas |
281 |
{ |
5054 |
26 Oct 18 |
nicklas |
282 |
BafData.Metrics metrics = bafData.collectMetrics(); |
5054 |
26 Oct 18 |
nicklas |
283 |
metrics.updateAnnotations(dc, aligned); |
5054 |
26 Oct 18 |
nicklas |
284 |
msg = "Found " + metrics.snpCount + " SNPs"; |
5054 |
26 Oct 18 |
nicklas |
285 |
msg += ", " + metrics.filteredCount + " passed filter"; |
5054 |
26 Oct 18 |
nicklas |
286 |
if (metrics.numRegions > 0) |
5046 |
22 Oct 18 |
nicklas |
287 |
{ |
5054 |
26 Oct 18 |
nicklas |
288 |
msg += ", " + metrics.numSignificantRegions + " of " + metrics.numRegions + " regions have significant mBAF (p<"+options.getSignificantPVal()+")"; |
5054 |
26 Oct 18 |
nicklas |
289 |
msg += ", " + metrics.minRegion.getRegion().getName() + " has p-value " + String.format(Locale.ENGLISH, "%.3e", metrics.minP); |
5046 |
22 Oct 18 |
nicklas |
290 |
} |
5051 |
24 Oct 18 |
nicklas |
291 |
else |
5051 |
24 Oct 18 |
nicklas |
292 |
{ |
5051 |
24 Oct 18 |
nicklas |
293 |
msg += ", no regions with enough SNPs for mBAF calculations"; |
5051 |
24 Oct 18 |
nicklas |
294 |
} |
5050 |
24 Oct 18 |
nicklas |
295 |
|
5431 |
16 May 19 |
nicklas |
296 |
MBafPlot plot = null; |
5431 |
16 May 19 |
nicklas |
297 |
try |
5049 |
23 Oct 18 |
nicklas |
298 |
{ |
5431 |
16 May 19 |
nicklas |
299 |
plot = new MBafPlot(options); |
5431 |
16 May 19 |
nicklas |
300 |
Result result = plot.run(dc, aligned, bafData); |
5431 |
16 May 19 |
nicklas |
301 |
if (result.getExitStatus() != 0) |
5051 |
24 Oct 18 |
nicklas |
302 |
{ |
5431 |
16 May 19 |
nicklas |
303 |
throw new RuntimeException(result.getStderr(), result.getException()); |
5051 |
24 Oct 18 |
nicklas |
304 |
} |
5431 |
16 May 19 |
nicklas |
305 |
else if (result.resultFileExists("mbafplot.png")) |
5051 |
24 Oct 18 |
nicklas |
306 |
{ |
5431 |
16 May 19 |
nicklas |
307 |
File png = File.getFile(dc, localDataDir, "mbaf_genotype.png", true); |
5431 |
16 May 19 |
nicklas |
308 |
png.setMimeTypeAuto("image/png", null); |
5431 |
16 May 19 |
nicklas |
309 |
if (!png.isInDatabase()) dc.saveItem(png); |
5431 |
16 May 19 |
nicklas |
310 |
AnyToAny pngLink = AnyToAny.getNewOrExisting(dc, aligned, png.getName(), png, false); |
5431 |
16 May 19 |
nicklas |
311 |
if (!pngLink.isInDatabase()) dc.saveItem(pngLink); |
5431 |
16 May 19 |
nicklas |
312 |
|
5431 |
16 May 19 |
nicklas |
313 |
try |
5431 |
16 May 19 |
nicklas |
314 |
{ |
5431 |
16 May 19 |
nicklas |
315 |
result.saveResultFile("mbafplot.png", png.getUploadStream(false), true); |
5431 |
16 May 19 |
nicklas |
316 |
} |
5431 |
16 May 19 |
nicklas |
317 |
catch (IOException ex) |
5431 |
16 May 19 |
nicklas |
318 |
{ |
5431 |
16 May 19 |
nicklas |
319 |
throw new RuntimeException(ex); |
5431 |
16 May 19 |
nicklas |
320 |
} |
5051 |
24 Oct 18 |
nicklas |
321 |
} |
5049 |
23 Oct 18 |
nicklas |
322 |
} |
5431 |
16 May 19 |
nicklas |
323 |
finally |
5431 |
16 May 19 |
nicklas |
324 |
{ |
5431 |
16 May 19 |
nicklas |
325 |
if (plot != null) plot.removeWorkDir(); |
5431 |
16 May 19 |
nicklas |
326 |
} |
5039 |
19 Oct 18 |
nicklas |
327 |
} |
5039 |
19 Oct 18 |
nicklas |
328 |
} |
5033 |
17 Oct 18 |
nicklas |
329 |
} |
5033 |
17 Oct 18 |
nicklas |
330 |
|
5033 |
17 Oct 18 |
nicklas |
331 |
dc.commit(); |
5033 |
17 Oct 18 |
nicklas |
332 |
} |
5033 |
17 Oct 18 |
nicklas |
333 |
finally |
5033 |
17 Oct 18 |
nicklas |
334 |
{ |
5033 |
17 Oct 18 |
nicklas |
335 |
if (dc != null) dc.close(); |
5033 |
17 Oct 18 |
nicklas |
336 |
} |
5033 |
17 Oct 18 |
nicklas |
337 |
|
5033 |
17 Oct 18 |
nicklas |
338 |
return msg == null ? "" : msg; |
5033 |
17 Oct 18 |
nicklas |
339 |
} |
5039 |
19 Oct 18 |
nicklas |
340 |
|
5039 |
19 Oct 18 |
nicklas |
341 |
/** |
5039 |
19 Oct 18 |
nicklas |
Helper method for copying the VCF file from the file server |
5039 |
19 Oct 18 |
nicklas |
while at the same time parsing it and extracting genotype |
5039 |
19 Oct 18 |
nicklas |
information and statistics. |
5039 |
19 Oct 18 |
nicklas |
345 |
*/ |
5040 |
19 Oct 18 |
nicklas |
346 |
private BafData parseVcf(MBafParser parser, File vcfFile) |
5039 |
19 Oct 18 |
nicklas |
347 |
{ |
5039 |
19 Oct 18 |
nicklas |
// Stream for copying the vcfFile |
5039 |
19 Oct 18 |
nicklas |
349 |
InputStream fromFileServer = null; |
5039 |
19 Oct 18 |
nicklas |
350 |
|
5040 |
19 Oct 18 |
nicklas |
351 |
BafData bafData = null; |
5039 |
19 Oct 18 |
nicklas |
352 |
try |
5039 |
19 Oct 18 |
nicklas |
353 |
{ |
5039 |
19 Oct 18 |
nicklas |
354 |
fromFileServer = vcfFile.getDownloadStream(0); |
5040 |
19 Oct 18 |
nicklas |
355 |
bafData = parser.parse(fromFileServer, vcfFile.getName()); |
5039 |
19 Oct 18 |
nicklas |
356 |
} |
5039 |
19 Oct 18 |
nicklas |
357 |
catch (Exception ex) |
5039 |
19 Oct 18 |
nicklas |
358 |
{ |
5039 |
19 Oct 18 |
nicklas |
359 |
logger.warn("Could not parse VCF file: "+ vcfFile, ex); |
5039 |
19 Oct 18 |
nicklas |
360 |
} |
5039 |
19 Oct 18 |
nicklas |
361 |
finally |
5039 |
19 Oct 18 |
nicklas |
362 |
{ |
5039 |
19 Oct 18 |
nicklas |
363 |
FileUtil.close(fromFileServer); |
5039 |
19 Oct 18 |
nicklas |
364 |
} |
5040 |
19 Oct 18 |
nicklas |
365 |
return bafData; |
5039 |
19 Oct 18 |
nicklas |
366 |
} |
5030 |
16 Oct 18 |
nicklas |
367 |
} |
5030 |
16 Oct 18 |
nicklas |
368 |
} |