4661 |
29 Jan 18 |
nicklas |
1 |
package net.sf.basedb.reggie.grid; |
4661 |
29 Jan 18 |
nicklas |
2 |
|
4669 |
02 Feb 18 |
nicklas |
3 |
import java.io.IOException; |
4669 |
02 Feb 18 |
nicklas |
4 |
import java.io.InputStream; |
4664 |
30 Jan 18 |
nicklas |
5 |
import java.util.ArrayList; |
4669 |
02 Feb 18 |
nicklas |
6 |
import java.util.HashSet; |
4661 |
29 Jan 18 |
nicklas |
7 |
import java.util.List; |
4669 |
02 Feb 18 |
nicklas |
8 |
import java.util.Set; |
6010 |
18 Sep 20 |
nicklas |
9 |
import java.util.regex.Matcher; |
4669 |
02 Feb 18 |
nicklas |
10 |
import java.util.regex.Pattern; |
4661 |
29 Jan 18 |
nicklas |
11 |
|
4661 |
29 Jan 18 |
nicklas |
12 |
import org.slf4j.LoggerFactory; |
4661 |
29 Jan 18 |
nicklas |
13 |
|
4664 |
30 Jan 18 |
nicklas |
14 |
import net.sf.basedb.core.AnyToAny; |
4661 |
29 Jan 18 |
nicklas |
15 |
import net.sf.basedb.core.ArrayDesign; |
4669 |
02 Feb 18 |
nicklas |
16 |
import net.sf.basedb.core.DataFileType; |
4661 |
29 Jan 18 |
nicklas |
17 |
import net.sf.basedb.core.DbControl; |
4664 |
30 Jan 18 |
nicklas |
18 |
import net.sf.basedb.core.DerivedBioAssay; |
4664 |
30 Jan 18 |
nicklas |
19 |
import net.sf.basedb.core.Directory; |
4664 |
30 Jan 18 |
nicklas |
20 |
import net.sf.basedb.core.File; |
4664 |
30 Jan 18 |
nicklas |
21 |
import net.sf.basedb.core.FileServer; |
4669 |
02 Feb 18 |
nicklas |
22 |
import net.sf.basedb.core.FileSetMember; |
4664 |
30 Jan 18 |
nicklas |
23 |
import net.sf.basedb.core.ItemList; |
4661 |
29 Jan 18 |
nicklas |
24 |
import net.sf.basedb.core.ItemNotFoundException; |
4669 |
02 Feb 18 |
nicklas |
25 |
import net.sf.basedb.core.ItemSubtype; |
4664 |
30 Jan 18 |
nicklas |
26 |
import net.sf.basedb.core.Job; |
4664 |
30 Jan 18 |
nicklas |
27 |
import net.sf.basedb.core.Path; |
4661 |
29 Jan 18 |
nicklas |
28 |
import net.sf.basedb.core.Protocol; |
4661 |
29 Jan 18 |
nicklas |
29 |
import net.sf.basedb.core.RawBioAssay; |
6010 |
18 Sep 20 |
nicklas |
30 |
import net.sf.basedb.core.Sample; |
4661 |
29 Jan 18 |
nicklas |
31 |
import net.sf.basedb.core.SessionControl; |
4661 |
29 Jan 18 |
nicklas |
32 |
import net.sf.basedb.core.Software; |
5547 |
07 Aug 19 |
nicklas |
33 |
import net.sf.basedb.core.StringParameterType; |
4661 |
29 Jan 18 |
nicklas |
34 |
import net.sf.basedb.opengrid.JobDefinition; |
4664 |
30 Jan 18 |
nicklas |
35 |
import net.sf.basedb.opengrid.JobStatus; |
4661 |
29 Jan 18 |
nicklas |
36 |
import net.sf.basedb.opengrid.OpenGridCluster; |
4664 |
30 Jan 18 |
nicklas |
37 |
import net.sf.basedb.opengrid.OpenGridSession; |
4664 |
30 Jan 18 |
nicklas |
38 |
import net.sf.basedb.opengrid.ScriptBuilder; |
4661 |
29 Jan 18 |
nicklas |
39 |
import net.sf.basedb.opengrid.config.ClusterConfig; |
4664 |
30 Jan 18 |
nicklas |
40 |
import net.sf.basedb.opengrid.config.JobConfig; |
4664 |
30 Jan 18 |
nicklas |
41 |
import net.sf.basedb.opengrid.service.JobCompletionHandler; |
4661 |
29 Jan 18 |
nicklas |
42 |
import net.sf.basedb.reggie.Reggie; |
4661 |
29 Jan 18 |
nicklas |
43 |
import net.sf.basedb.reggie.XmlConfig; |
4661 |
29 Jan 18 |
nicklas |
44 |
import net.sf.basedb.reggie.dao.AlignedSequences; |
4661 |
29 Jan 18 |
nicklas |
45 |
import net.sf.basedb.reggie.dao.Annotationtype; |
4664 |
30 Jan 18 |
nicklas |
46 |
import net.sf.basedb.reggie.dao.BiomaterialList; |
4664 |
30 Jan 18 |
nicklas |
47 |
import net.sf.basedb.reggie.dao.Datafiletype; |
6010 |
18 Sep 20 |
nicklas |
48 |
import net.sf.basedb.reggie.dao.DemuxedSequences; |
5790 |
13 Dec 19 |
nicklas |
49 |
import net.sf.basedb.reggie.dao.DoNotUse; |
4664 |
30 Jan 18 |
nicklas |
50 |
import net.sf.basedb.reggie.dao.Fileserver; |
4664 |
30 Jan 18 |
nicklas |
51 |
import net.sf.basedb.reggie.dao.Library; |
6010 |
18 Sep 20 |
nicklas |
52 |
import net.sf.basedb.reggie.dao.MaskedSequences; |
6010 |
18 Sep 20 |
nicklas |
53 |
import net.sf.basedb.reggie.dao.MergedSequences; |
5547 |
07 Aug 19 |
nicklas |
54 |
import net.sf.basedb.reggie.dao.Pipeline; |
4664 |
30 Jan 18 |
nicklas |
55 |
import net.sf.basedb.reggie.dao.Rawbioassay; |
4665 |
31 Jan 18 |
nicklas |
56 |
import net.sf.basedb.reggie.dao.Rawdatatype; |
4664 |
30 Jan 18 |
nicklas |
57 |
import net.sf.basedb.reggie.dao.Subtype; |
4669 |
02 Feb 18 |
nicklas |
58 |
import net.sf.basedb.util.FileUtil; |
4664 |
30 Jan 18 |
nicklas |
59 |
import net.sf.basedb.util.Values; |
7079 |
27 Mar 23 |
nicklas |
60 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7079 |
27 Mar 23 |
nicklas |
61 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
4669 |
02 Feb 18 |
nicklas |
62 |
import net.sf.basedb.util.parser.FlatFileParser; |
4669 |
02 Feb 18 |
nicklas |
63 |
import net.sf.basedb.util.parser.FlatFileParser.LineType; |
4661 |
29 Jan 18 |
nicklas |
64 |
|
4661 |
29 Jan 18 |
nicklas |
65 |
/** |
4663 |
29 Jan 18 |
nicklas |
Helper class for creating items needed for executing StringTie as |
4663 |
29 Jan 18 |
nicklas |
well as generating the StringTie script and send it to the cluster for |
4661 |
29 Jan 18 |
nicklas |
execution. |
4661 |
29 Jan 18 |
nicklas |
69 |
|
4661 |
29 Jan 18 |
nicklas |
@author nicklas |
4661 |
29 Jan 18 |
nicklas |
@since 4.15 |
4661 |
29 Jan 18 |
nicklas |
72 |
*/ |
4663 |
29 Jan 18 |
nicklas |
73 |
public class StringTieJobCreator |
6674 |
11 Apr 22 |
nicklas |
74 |
extends AbstractJobCreator |
4661 |
29 Jan 18 |
nicklas |
75 |
{ |
6010 |
18 Sep 20 |
nicklas |
76 |
/** |
6010 |
18 Sep 20 |
nicklas |
Sum all parts of the read string that generate |
6010 |
18 Sep 20 |
nicklas |
an ouput read (eg. all T) |
6010 |
18 Sep 20 |
nicklas |
@since 4.27.4 |
6010 |
18 Sep 20 |
nicklas |
80 |
*/ |
6010 |
18 Sep 20 |
nicklas |
81 |
public static int getTotalReadSize(String readString) |
6010 |
18 Sep 20 |
nicklas |
82 |
{ |
6010 |
18 Sep 20 |
nicklas |
83 |
Pattern p = Pattern.compile("(\\d+)T"); |
6010 |
18 Sep 20 |
nicklas |
84 |
Matcher m = p.matcher(readString); |
6010 |
18 Sep 20 |
nicklas |
85 |
int totalReadSize = 0; |
6010 |
18 Sep 20 |
nicklas |
86 |
while (m.find()) |
6010 |
18 Sep 20 |
nicklas |
87 |
{ |
6010 |
18 Sep 20 |
nicklas |
88 |
totalReadSize += Values.getInt(m.group(1)); |
6010 |
18 Sep 20 |
nicklas |
89 |
} |
6010 |
18 Sep 20 |
nicklas |
90 |
return totalReadSize; |
6010 |
18 Sep 20 |
nicklas |
91 |
} |
6010 |
18 Sep 20 |
nicklas |
92 |
|
6010 |
18 Sep 20 |
nicklas |
93 |
/** |
6010 |
18 Sep 20 |
nicklas |
Helper method for getting the read string from all demuxed sequences and |
6010 |
18 Sep 20 |
nicklas |
calculating the average length. |
6010 |
18 Sep 20 |
nicklas |
@since 4.27.4 |
6010 |
18 Sep 20 |
nicklas |
97 |
*/ |
6010 |
18 Sep 20 |
nicklas |
98 |
public static int getAverageReadSize(DbControl dc, AlignedSequences aligned) |
6010 |
18 Sep 20 |
nicklas |
99 |
{ |
6010 |
18 Sep 20 |
nicklas |
100 |
MaskedSequences masked = aligned.getMaskedSequences(dc); |
6010 |
18 Sep 20 |
nicklas |
101 |
MergedSequences merged = masked.getMergedSequences(dc); |
6010 |
18 Sep 20 |
nicklas |
102 |
List<DemuxedSequences> dxList = merged.getDemuxedSequences(dc); |
6010 |
18 Sep 20 |
nicklas |
103 |
|
6010 |
18 Sep 20 |
nicklas |
104 |
int totalReadSize = 0; |
6010 |
18 Sep 20 |
nicklas |
105 |
for (DemuxedSequences dx : dxList) |
6010 |
18 Sep 20 |
nicklas |
106 |
{ |
6010 |
18 Sep 20 |
nicklas |
107 |
String readString = (String)Annotationtype.READ_STRING.getAnnotationValue(dc, dx.getItem()); |
6010 |
18 Sep 20 |
nicklas |
108 |
totalReadSize += getTotalReadSize(readString); |
6010 |
18 Sep 20 |
nicklas |
109 |
} |
6010 |
18 Sep 20 |
nicklas |
110 |
|
6010 |
18 Sep 20 |
nicklas |
111 |
return totalReadSize / dxList.size(); |
6010 |
18 Sep 20 |
nicklas |
112 |
} |
6010 |
18 Sep 20 |
nicklas |
113 |
|
6010 |
18 Sep 20 |
nicklas |
114 |
|
4661 |
29 Jan 18 |
nicklas |
115 |
private Software software; |
4661 |
29 Jan 18 |
nicklas |
116 |
private Protocol protocol; |
4661 |
29 Jan 18 |
nicklas |
117 |
private ArrayDesign arrayDesign; |
4661 |
29 Jan 18 |
nicklas |
118 |
|
4663 |
29 Jan 18 |
nicklas |
119 |
public StringTieJobCreator() |
4661 |
29 Jan 18 |
nicklas |
120 |
{} |
4661 |
29 Jan 18 |
nicklas |
121 |
|
4661 |
29 Jan 18 |
nicklas |
122 |
/** |
4661 |
29 Jan 18 |
nicklas |
Set the software item to set on created RawBioAssay:s. |
4661 |
29 Jan 18 |
nicklas |
@see RawBioAssay#setSoftware(Software) |
4661 |
29 Jan 18 |
nicklas |
125 |
*/ |
4661 |
29 Jan 18 |
nicklas |
126 |
public void setSoftware(Software software) |
4661 |
29 Jan 18 |
nicklas |
127 |
{ |
4661 |
29 Jan 18 |
nicklas |
128 |
this.software = software; |
4661 |
29 Jan 18 |
nicklas |
129 |
} |
4661 |
29 Jan 18 |
nicklas |
130 |
|
4661 |
29 Jan 18 |
nicklas |
131 |
/** |
4661 |
29 Jan 18 |
nicklas |
Set the protocol item to set on created RawBioAssay:s |
4661 |
29 Jan 18 |
nicklas |
@see RawBioAssay#setProtocol(Protocol) |
4661 |
29 Jan 18 |
nicklas |
134 |
*/ |
4661 |
29 Jan 18 |
nicklas |
135 |
public void setProtocol(Protocol protocol) |
4661 |
29 Jan 18 |
nicklas |
136 |
{ |
4661 |
29 Jan 18 |
nicklas |
137 |
this.protocol = protocol; |
4661 |
29 Jan 18 |
nicklas |
138 |
} |
4661 |
29 Jan 18 |
nicklas |
139 |
|
4661 |
29 Jan 18 |
nicklas |
140 |
/** |
4661 |
29 Jan 18 |
nicklas |
Set the array design item to set on created RawBioAssay:s |
4661 |
29 Jan 18 |
nicklas |
@see RawBioAssay#setArrayDesign(ArrayDesign) |
4661 |
29 Jan 18 |
nicklas |
143 |
*/ |
4661 |
29 Jan 18 |
nicklas |
144 |
public void setArrayDesign(ArrayDesign design) |
4661 |
29 Jan 18 |
nicklas |
145 |
{ |
4661 |
29 Jan 18 |
nicklas |
146 |
this.arrayDesign = design; |
4661 |
29 Jan 18 |
nicklas |
147 |
} |
4661 |
29 Jan 18 |
nicklas |
148 |
|
4661 |
29 Jan 18 |
nicklas |
149 |
/** |
4661 |
29 Jan 18 |
nicklas |
Create a child items for all given aligneded sequences and schedule |
4663 |
29 Jan 18 |
nicklas |
jobs on the given cluster for running StringTie. |
4661 |
29 Jan 18 |
nicklas |
@return A list with the corresponding jobs in BASE |
4661 |
29 Jan 18 |
nicklas |
153 |
*/ |
4663 |
29 Jan 18 |
nicklas |
154 |
public List<JobDefinition> createStringTieJobs(DbControl dc, OpenGridCluster cluster, List<AlignedSequences> alignedSequences) |
4661 |
29 Jan 18 |
nicklas |
155 |
{ |
4661 |
29 Jan 18 |
nicklas |
156 |
/* |
4663 |
29 Jan 18 |
nicklas |
System.out.println("createStringTieJobs:"); |
4661 |
29 Jan 18 |
nicklas |
System.out.println("software:" + software); |
4661 |
29 Jan 18 |
nicklas |
System.out.println("protocol:" + protocol); |
4661 |
29 Jan 18 |
nicklas |
System.out.println("design:" + arrayDesign); |
4661 |
29 Jan 18 |
nicklas |
161 |
*/ |
4661 |
29 Jan 18 |
nicklas |
162 |
|
4661 |
29 Jan 18 |
nicklas |
163 |
SessionControl sc = dc.getSessionControl(); |
4661 |
29 Jan 18 |
nicklas |
164 |
|
4661 |
29 Jan 18 |
nicklas |
165 |
ClusterConfig clusterCfg = cluster.getConfig(); |
4661 |
29 Jan 18 |
nicklas |
166 |
XmlConfig cfg = Reggie.getConfig(cluster.getId()); |
4661 |
29 Jan 18 |
nicklas |
167 |
if (cfg == null) |
4661 |
29 Jan 18 |
nicklas |
168 |
{ |
4661 |
29 Jan 18 |
nicklas |
169 |
throw new ItemNotFoundException("No configuration in reggie-config.xml for cluster: " + cluster.getId()); |
4661 |
29 Jan 18 |
nicklas |
170 |
} |
4661 |
29 Jan 18 |
nicklas |
171 |
|
4661 |
29 Jan 18 |
nicklas |
172 |
String parameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, software); |
4661 |
29 Jan 18 |
nicklas |
173 |
|
4664 |
30 Jan 18 |
nicklas |
// Get global options |
6693 |
22 Apr 22 |
nicklas |
175 |
String global_env = ScriptUtil.multilineIndent(cfg.getConfig("global-env")); |
6653 |
23 Mar 22 |
nicklas |
176 |
String projectArchive = cfg.getRequiredConfig("project-archive", null); |
6653 |
23 Mar 22 |
nicklas |
177 |
String externalArchive = cfg.getConfig("external-archive", null, projectArchive); |
4664 |
30 Jan 18 |
nicklas |
178 |
|
4664 |
30 Jan 18 |
nicklas |
// Options for the programs |
7372 |
06 Oct 23 |
nicklas |
180 |
String stringtie_submit = cfg.getConfig("stringtie/submit", parameterSet, null); |
7372 |
06 Oct 23 |
nicklas |
181 |
String stringtie_submit_debug = cfg.getConfig("stringtie/submit-debug", parameterSet, null); |
6653 |
23 Mar 22 |
nicklas |
182 |
String stringtie_env = ScriptUtil.multilineIndent(cfg.getRequiredConfig("stringtie/env", parameterSet)); |
6673 |
11 Apr 22 |
nicklas |
183 |
String stringtie_envdebug = ScriptUtil.multilineIndent(cfg.getConfig("stringtie/env-debug", parameterSet, null)); |
6653 |
23 Mar 22 |
nicklas |
184 |
String stringtie_execute = ScriptUtil.multilineIndent(cfg.getConfig("stringtie/execute", parameterSet, "./stringtie.sh")); |
4664 |
30 Jan 18 |
nicklas |
185 |
|
4664 |
30 Jan 18 |
nicklas |
// Selected items must be removed from this list |
4664 |
30 Jan 18 |
nicklas |
187 |
ItemList stringtiePipeline = BiomaterialList.STRINGTIE_PIPELINE.load(dc); |
4664 |
30 Jan 18 |
nicklas |
188 |
|
4667 |
01 Feb 18 |
nicklas |
// Create StringTie raw bioassays |
4667 |
01 Feb 18 |
nicklas |
190 |
Rawdatatype stringTieType = Rawdatatype.STRINGTIE; |
4664 |
30 Jan 18 |
nicklas |
191 |
|
4664 |
30 Jan 18 |
nicklas |
// Options common for all jobs |
4664 |
30 Jan 18 |
nicklas |
193 |
JobConfig jobConfig = new JobConfig(); |
4664 |
30 Jan 18 |
nicklas |
194 |
if (priority != null) jobConfig.setPriority(priority); |
7372 |
06 Oct 23 |
nicklas |
195 |
if (partition != null) jobConfig.setSbatchOption("partition", ScriptUtil.checkValidScriptParameter(partition)); |
7372 |
06 Oct 23 |
nicklas |
196 |
jobConfig.convertOptionsTo(clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
197 |
if (submitOptionsOverride != null) |
7372 |
06 Oct 23 |
nicklas |
198 |
{ |
7372 |
06 Oct 23 |
nicklas |
199 |
ScriptUtil.addSubmitOptions(jobConfig, submitOptionsOverride, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
200 |
} |
7372 |
06 Oct 23 |
nicklas |
201 |
else |
7372 |
06 Oct 23 |
nicklas |
202 |
{ |
7372 |
06 Oct 23 |
nicklas |
203 |
ScriptUtil.addSubmitOptions(jobConfig, stringtie_submit, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
204 |
if (debug) ScriptUtil.addSubmitOptions(jobConfig, stringtie_submit_debug, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
205 |
} |
4661 |
29 Jan 18 |
nicklas |
206 |
|
4664 |
30 Jan 18 |
nicklas |
// We submit one job for each raw bioassay to the cluster |
4664 |
30 Jan 18 |
nicklas |
208 |
List<JobDefinition> jobDefs = new ArrayList<JobDefinition>(alignedSequences.size()); |
4664 |
30 Jan 18 |
nicklas |
209 |
|
4664 |
30 Jan 18 |
nicklas |
210 |
for (AlignedSequences as : alignedSequences) |
4664 |
30 Jan 18 |
nicklas |
211 |
{ |
4664 |
30 Jan 18 |
nicklas |
212 |
as = AlignedSequences.getById(dc, as.getId()); // Ensure item is loaded in this transaction |
4664 |
30 Jan 18 |
nicklas |
213 |
|
4664 |
30 Jan 18 |
nicklas |
// Get some information about the aligned data that we need |
4664 |
30 Jan 18 |
nicklas |
215 |
DerivedBioAssay aligned = as.getDerivedBioAssay(); |
5364 |
16 Apr 19 |
nicklas |
216 |
stringtiePipeline.removeItem(aligned); |
4664 |
30 Jan 18 |
nicklas |
217 |
|
4664 |
30 Jan 18 |
nicklas |
218 |
Library lib = Library.get(aligned.getExtract()); |
6010 |
18 Sep 20 |
nicklas |
219 |
Sample specimen = (Sample)lib.findSingleParent(dc, Subtype.SPECIMEN); |
5596 |
11 Sep 19 |
nicklas |
220 |
boolean isExternal = Reggie.isExternalItem(aligned.getName()); |
6653 |
23 Mar 22 |
nicklas |
221 |
String archiveFolder = isExternal ? externalArchive : projectArchive; |
4664 |
30 Jan 18 |
nicklas |
222 |
String bamFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, aligned); |
4664 |
30 Jan 18 |
nicklas |
223 |
File bamFile = Datafiletype.BAM.getFile(dc, aligned); |
6631 |
08 Mar 22 |
nicklas |
224 |
String bamName = ScriptUtil.checkValidScriptParameter(bamFile.getName()); |
6010 |
18 Sep 20 |
nicklas |
225 |
|
6010 |
18 Sep 20 |
nicklas |
226 |
int readLength = getAverageReadSize(dc, as); |
6010 |
18 Sep 20 |
nicklas |
227 |
|
4664 |
30 Jan 18 |
nicklas |
// Create job |
4664 |
30 Jan 18 |
nicklas |
229 |
Job stringTieJob = Job.getNew(dc, null, null, null); |
4664 |
30 Jan 18 |
nicklas |
230 |
stringTieJob.setItemSubtype(Subtype.STRINGTIE_JOB.get(dc)); |
4664 |
30 Jan 18 |
nicklas |
231 |
stringTieJob.setPluginVersion("reggie-"+Reggie.VERSION); |
4664 |
30 Jan 18 |
nicklas |
232 |
stringTieJob.setSendMessage(Values.getBoolean(sc.getUserClientSetting("plugins.sendmessage"), false)); |
4664 |
30 Jan 18 |
nicklas |
233 |
stringTieJob.setName("Run StringTie " + aligned.getName()); |
5547 |
07 Aug 19 |
nicklas |
234 |
stringTieJob.setParameterValue("pipeline", new StringParameterType(), Pipeline.RNASEQ_HISAT_STRINGTIE.getId()); |
4664 |
30 Jan 18 |
nicklas |
235 |
if (debug) stringTieJob.setName(stringTieJob.getName() + " (debug)"); |
6981 |
17 Jan 23 |
nicklas |
236 |
if (partition != null) stringTieJob.setParameterValue("partition", new StringParameterType(), partition); |
7372 |
06 Oct 23 |
nicklas |
237 |
if (submitOptionsOverride != null) stringTieJob.setParameterValue("jobOptions", new StringParameterType(), submitOptionsOverride); |
4664 |
30 Jan 18 |
nicklas |
238 |
dc.saveItem(stringTieJob); |
4664 |
30 Jan 18 |
nicklas |
239 |
|
4664 |
30 Jan 18 |
nicklas |
// Create raw bioassay |
6653 |
23 Mar 22 |
nicklas |
241 |
String stringTieName = ScriptUtil.checkValidScriptParameter(as.getNextRawBioAssayName(dc, stringTieType)); |
4667 |
01 Feb 18 |
nicklas |
242 |
RawBioAssay raw = stringTieType.createRawBioAssay(dc); |
5547 |
07 Aug 19 |
nicklas |
243 |
Pipeline.RNASEQ_HISAT_STRINGTIE.setAnnotation(dc, raw); |
4664 |
30 Jan 18 |
nicklas |
244 |
raw.setArrayDesign(arrayDesign); |
4667 |
01 Feb 18 |
nicklas |
245 |
raw.setJob(stringTieJob); |
6010 |
18 Sep 20 |
nicklas |
246 |
raw.setName(stringTieName); |
4664 |
30 Jan 18 |
nicklas |
247 |
raw.setParentExtract(lib.getExtract()); |
4664 |
30 Jan 18 |
nicklas |
248 |
raw.setSoftware(software); |
4664 |
30 Jan 18 |
nicklas |
249 |
raw.setProtocol(protocol); |
4664 |
30 Jan 18 |
nicklas |
250 |
raw.setParentBioAssay(aligned); |
5790 |
13 Dec 19 |
nicklas |
251 |
DoNotUse.copyDoNotUseAnnotations(dc, aligned, raw, false); |
4664 |
30 Jan 18 |
nicklas |
252 |
dc.saveItem(raw); |
4664 |
30 Jan 18 |
nicklas |
253 |
|
4664 |
30 Jan 18 |
nicklas |
254 |
String stringTieFolder = bamFolder + "/"+raw.getName().substring(aligned.getName().length()+1); |
4664 |
30 Jan 18 |
nicklas |
255 |
if (debug && !stringTieFolder.startsWith("/debug")) |
4664 |
30 Jan 18 |
nicklas |
256 |
{ |
4664 |
30 Jan 18 |
nicklas |
257 |
stringTieFolder = "/debug" + stringTieFolder; |
4664 |
30 Jan 18 |
nicklas |
258 |
} |
4664 |
30 Jan 18 |
nicklas |
259 |
Annotationtype.DATA_FILES_FOLDER.setAnnotationValue(dc, raw, stringTieFolder); |
4664 |
30 Jan 18 |
nicklas |
260 |
if (autoConfirm) |
4664 |
30 Jan 18 |
nicklas |
261 |
{ |
4664 |
30 Jan 18 |
nicklas |
262 |
Annotationtype.AUTO_PROCESSING.setAnnotationValue(dc, raw, "AutoConfirm"); |
4664 |
30 Jan 18 |
nicklas |
263 |
} |
6653 |
23 Mar 22 |
nicklas |
264 |
String externalStringTieName = stringTieName; |
6653 |
23 Mar 22 |
nicklas |
265 |
if (specimen != null && specimen.getExternalId() != null) |
6653 |
23 Mar 22 |
nicklas |
266 |
{ |
6653 |
23 Mar 22 |
nicklas |
// Replace SCANB-ID with Sample.externalId |
6653 |
23 Mar 22 |
nicklas |
268 |
externalStringTieName = externalStringTieName.replace(specimen.getName(), specimen.getExternalId()); |
6653 |
23 Mar 22 |
nicklas |
269 |
} |
6653 |
23 Mar 22 |
nicklas |
270 |
|
4664 |
30 Jan 18 |
nicklas |
// Checks to make sure no bad things are included in script file |
4664 |
30 Jan 18 |
nicklas |
272 |
ScriptUtil.checkValidPath(stringTieFolder, true, true); |
4664 |
30 Jan 18 |
nicklas |
273 |
ScriptUtil.checkValidScriptParameter(raw.getName()); |
4664 |
30 Jan 18 |
nicklas |
274 |
ScriptUtil.checkValidScriptParameter(bamFile.getName()); |
4664 |
30 Jan 18 |
nicklas |
275 |
|
4664 |
30 Jan 18 |
nicklas |
276 |
ScriptBuilder script = new ScriptBuilder(); |
6665 |
05 Apr 22 |
nicklas |
277 |
script.cmd(debug ? "set -ex" : "set -e"); |
5596 |
11 Sep 19 |
nicklas |
// Set file permissions based on consent or external group! |
5596 |
11 Sep 19 |
nicklas |
279 |
String externalGroup = isExternal ? Reggie.getExternalGroup(aligned.getName()) : null; |
5596 |
11 Sep 19 |
nicklas |
280 |
ScriptUtil.setUmaskForItem(dc, lib, externalGroup, script); |
4664 |
30 Jan 18 |
nicklas |
281 |
script.newLine(); |
6693 |
22 Apr 22 |
nicklas |
282 |
script.cmd(global_env); |
6653 |
23 Mar 22 |
nicklas |
283 |
script.export("ArchiveFolder", archiveFolder); |
6653 |
23 Mar 22 |
nicklas |
284 |
script.export("BamFolder", "${ArchiveFolder}"+bamFolder); |
6631 |
08 Mar 22 |
nicklas |
285 |
script.export("BamName", bamName.replace(".bam", "")); |
6653 |
23 Mar 22 |
nicklas |
286 |
script.export("StringTieFolder", "${ArchiveFolder}"+stringTieFolder); |
6653 |
23 Mar 22 |
nicklas |
287 |
script.export("ExternalStringTieName", externalStringTieName); |
6631 |
08 Mar 22 |
nicklas |
288 |
script.export("AvgReadLength", Integer.toString(readLength)); |
4664 |
30 Jan 18 |
nicklas |
289 |
script.newLine(); |
6653 |
23 Mar 22 |
nicklas |
290 |
script.cmd(stringtie_env); |
6665 |
05 Apr 22 |
nicklas |
291 |
if (debug) script.cmd(stringtie_envdebug); |
6631 |
08 Mar 22 |
nicklas |
292 |
script.cmd(stringtie_execute); |
5596 |
11 Sep 19 |
nicklas |
293 |
if (externalGroup != null) |
5596 |
11 Sep 19 |
nicklas |
294 |
{ |
6653 |
23 Mar 22 |
nicklas |
295 |
ScriptUtil.addChgrp(externalGroup, "${StringTieFolder}", stringTieName, null, script); |
5596 |
11 Sep 19 |
nicklas |
296 |
} |
4664 |
30 Jan 18 |
nicklas |
297 |
|
6674 |
11 Apr 22 |
nicklas |
298 |
JobDefinition jobDef = new JobDefinition("StringTie", jobConfig, batchConfig, stringTieJob); |
6631 |
08 Mar 22 |
nicklas |
299 |
jobDef.addFile(ScriptUtil.upload("stringtie.sh")); |
6631 |
08 Mar 22 |
nicklas |
300 |
jobDef.addFile(ScriptUtil.upload("reggie-utils.sh")); |
6631 |
08 Mar 22 |
nicklas |
301 |
jobDef.addFile(ScriptUtil.upload("stdwrap.sh")); |
4664 |
30 Jan 18 |
nicklas |
302 |
jobDef.setDebug(debug); |
4664 |
30 Jan 18 |
nicklas |
303 |
jobDef.setCmd(script.toString()); |
4664 |
30 Jan 18 |
nicklas |
304 |
jobDefs.add(jobDef); |
4664 |
30 Jan 18 |
nicklas |
305 |
} |
4664 |
30 Jan 18 |
nicklas |
306 |
|
4664 |
30 Jan 18 |
nicklas |
307 |
return jobDefs; |
4661 |
29 Jan 18 |
nicklas |
308 |
} |
4661 |
29 Jan 18 |
nicklas |
309 |
|
4664 |
30 Jan 18 |
nicklas |
310 |
/** |
4664 |
30 Jan 18 |
nicklas |
Job completion handler for mask/align jobs. The handler downloads the |
4664 |
30 Jan 18 |
nicklas |
'filter.out' file from the job folder and parses out number of |
4664 |
30 Jan 18 |
nicklas |
reads remaining. The information is stored as {@link Annotationtype#READS} |
4664 |
30 Jan 18 |
nicklas |
annotation on FilteredSequences items. |
4664 |
30 Jan 18 |
nicklas |
315 |
*/ |
4664 |
30 Jan 18 |
nicklas |
316 |
public static class StringTieJobCompletionHandler |
4664 |
30 Jan 18 |
nicklas |
317 |
implements JobCompletionHandler |
4664 |
30 Jan 18 |
nicklas |
318 |
{ |
7079 |
27 Mar 23 |
nicklas |
319 |
private static final ExtensionsLogger logger = |
7079 |
27 Mar 23 |
nicklas |
320 |
ExtensionsLog.getLogger(JobCompletionHandlerFactory.ID, true).wrap(LoggerFactory.getLogger(StringTieJobCompletionHandler.class)); |
4664 |
30 Jan 18 |
nicklas |
321 |
|
4664 |
30 Jan 18 |
nicklas |
322 |
public StringTieJobCompletionHandler() |
4664 |
30 Jan 18 |
nicklas |
323 |
{} |
4664 |
30 Jan 18 |
nicklas |
324 |
|
4664 |
30 Jan 18 |
nicklas |
325 |
@Override |
4664 |
30 Jan 18 |
nicklas |
326 |
public String jobCompleted(SessionControl sc, OpenGridSession session, Job job, JobStatus status) |
4664 |
30 Jan 18 |
nicklas |
327 |
{ |
4664 |
30 Jan 18 |
nicklas |
328 |
String jobName = status.getName(); |
4664 |
30 Jan 18 |
nicklas |
329 |
String files = session.getJobFileAsString(jobName, "files.out", "UTF-8"); |
4670 |
05 Feb 18 |
nicklas |
330 |
String msg = parseFiles(sc, job, files); |
4670 |
05 Feb 18 |
nicklas |
331 |
return "StringTie completed. " + msg; |
4664 |
30 Jan 18 |
nicklas |
332 |
} |
4664 |
30 Jan 18 |
nicklas |
333 |
|
4670 |
05 Feb 18 |
nicklas |
334 |
private String parseFiles(SessionControl sc, Job job, String filesOut) |
4664 |
30 Jan 18 |
nicklas |
335 |
{ |
4664 |
30 Jan 18 |
nicklas |
336 |
|
4664 |
30 Jan 18 |
nicklas |
337 |
DbControl dc = null; |
4670 |
05 Feb 18 |
nicklas |
338 |
String msg = null; |
4664 |
30 Jan 18 |
nicklas |
339 |
try |
4664 |
30 Jan 18 |
nicklas |
340 |
{ |
6599 |
22 Feb 22 |
nicklas |
341 |
dc = sc.newDbControl("Reggie: StringTie completed handler"); |
4664 |
30 Jan 18 |
nicklas |
342 |
|
4664 |
30 Jan 18 |
nicklas |
343 |
Rawbioassay rawStringTie = Rawbioassay.getByJob(dc, job); |
4664 |
30 Jan 18 |
nicklas |
344 |
RawBioAssay raw = rawStringTie.getItem(); |
4664 |
30 Jan 18 |
nicklas |
345 |
|
4664 |
30 Jan 18 |
nicklas |
// Create file links |
5553 |
12 Aug 19 |
nicklas |
347 |
boolean useExternalProjectArchive = Reggie.isExternalItem(raw.getName()); |
4664 |
30 Jan 18 |
nicklas |
348 |
FileServer fileArchive = useExternalProjectArchive ? Fileserver.EXTERNAL_ARCHIVE.load(dc) : Fileserver.PROJECT_ARCHIVE.load(dc); |
4664 |
30 Jan 18 |
nicklas |
349 |
String analysisDir = useExternalProjectArchive ? Reggie.EXTERNAL_ANALYSIS_DIR : Reggie.SECONDARY_ANALYSIS_DIR; |
4664 |
30 Jan 18 |
nicklas |
350 |
|
4664 |
30 Jan 18 |
nicklas |
351 |
String dataFilesFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, raw); |
4664 |
30 Jan 18 |
nicklas |
352 |
String baseFolder = Reggie.convertDataFilesFolderToBaseFolder(dataFilesFolder); |
4664 |
30 Jan 18 |
nicklas |
353 |
Directory localDataDir = Directory.getNew(dc, new Path(analysisDir+baseFolder, Path.Type.DIRECTORY)); |
4664 |
30 Jan 18 |
nicklas |
354 |
|
4669 |
02 Feb 18 |
nicklas |
355 |
DataFileType rawData = Datafiletype.GENERIC_RAWDATA.load(dc); |
4669 |
02 Feb 18 |
nicklas |
356 |
ItemSubtype rawType = rawData.getGenericType(); |
4669 |
02 Feb 18 |
nicklas |
357 |
|
4664 |
30 Jan 18 |
nicklas |
358 |
int lineNo = 0; |
4664 |
30 Jan 18 |
nicklas |
359 |
for (String line : filesOut.split("\n")) |
4664 |
30 Jan 18 |
nicklas |
360 |
{ |
4664 |
30 Jan 18 |
nicklas |
361 |
lineNo++; |
4664 |
30 Jan 18 |
nicklas |
362 |
|
4664 |
30 Jan 18 |
nicklas |
363 |
File f = File.getFile(dc, localDataDir, line.substring(line.lastIndexOf("/")+1), true); |
4664 |
30 Jan 18 |
nicklas |
364 |
f.setFileServer(fileArchive); |
4664 |
30 Jan 18 |
nicklas |
365 |
String fileUrl = "sftp://" + fileArchive.getHost() + dataFilesFolder + "/" + f.getName(); |
4664 |
30 Jan 18 |
nicklas |
366 |
try |
4664 |
30 Jan 18 |
nicklas |
367 |
{ |
4664 |
30 Jan 18 |
nicklas |
368 |
f.setUrl(fileUrl, true); |
4664 |
30 Jan 18 |
nicklas |
369 |
} |
4664 |
30 Jan 18 |
nicklas |
370 |
catch (RuntimeException ex) |
4664 |
30 Jan 18 |
nicklas |
371 |
{ |
4664 |
30 Jan 18 |
nicklas |
372 |
f.setUrl(fileUrl, false); |
4664 |
30 Jan 18 |
nicklas |
373 |
} |
4664 |
30 Jan 18 |
nicklas |
374 |
|
4664 |
30 Jan 18 |
nicklas |
375 |
if (!f.isInDatabase()) |
4664 |
30 Jan 18 |
nicklas |
376 |
{ |
4664 |
30 Jan 18 |
nicklas |
377 |
dc.saveItem(f); |
4664 |
30 Jan 18 |
nicklas |
378 |
} |
4669 |
02 Feb 18 |
nicklas |
379 |
|
4669 |
02 Feb 18 |
nicklas |
380 |
if (f.getName().equals("gene.tsv")) |
4669 |
02 Feb 18 |
nicklas |
381 |
{ |
4669 |
02 Feb 18 |
nicklas |
382 |
f.setItemSubtype(rawType); |
4669 |
02 Feb 18 |
nicklas |
383 |
f.setMimeType("text/plain"); |
4669 |
02 Feb 18 |
nicklas |
384 |
f.setCharacterSet("UTF-8"); |
4669 |
02 Feb 18 |
nicklas |
385 |
FileSetMember member = raw.getFileSet().addMember(f, rawData); |
4669 |
02 Feb 18 |
nicklas |
386 |
|
4669 |
02 Feb 18 |
nicklas |
387 |
try |
4669 |
02 Feb 18 |
nicklas |
388 |
{ |
4669 |
02 Feb 18 |
nicklas |
389 |
int numUniqueGenes = parseAndCountGenesInGeneTsv(f); |
4669 |
02 Feb 18 |
nicklas |
390 |
member.setValid(true, null); |
4669 |
02 Feb 18 |
nicklas |
391 |
raw.setNumFileSpots(numUniqueGenes); |
4670 |
05 Feb 18 |
nicklas |
392 |
msg = numUniqueGenes + " genes found in gene.tsv"; |
4669 |
02 Feb 18 |
nicklas |
393 |
} |
4669 |
02 Feb 18 |
nicklas |
394 |
catch (IOException | RuntimeException ex) |
4669 |
02 Feb 18 |
nicklas |
395 |
{ |
4670 |
05 Feb 18 |
nicklas |
396 |
msg = "Could not parse gene.tsv (" + ex.getMessage() + ")"; |
4669 |
02 Feb 18 |
nicklas |
397 |
member.setValid(false, ex.getMessage()); |
4669 |
02 Feb 18 |
nicklas |
398 |
logger.warn("Could not parse file: " + f, ex); |
4669 |
02 Feb 18 |
nicklas |
399 |
} |
4669 |
02 Feb 18 |
nicklas |
400 |
} |
4669 |
02 Feb 18 |
nicklas |
401 |
else |
4669 |
02 Feb 18 |
nicklas |
402 |
{ |
4669 |
02 Feb 18 |
nicklas |
403 |
AnyToAny link = AnyToAny.getNewOrExisting(dc, raw, f.getName(), f, true); |
4669 |
02 Feb 18 |
nicklas |
404 |
if (!link.isInDatabase()) dc.saveItem(link); |
4669 |
02 Feb 18 |
nicklas |
405 |
} |
4664 |
30 Jan 18 |
nicklas |
406 |
|
4664 |
30 Jan 18 |
nicklas |
407 |
} |
4664 |
30 Jan 18 |
nicklas |
408 |
|
4664 |
30 Jan 18 |
nicklas |
409 |
dc.commit(); |
4664 |
30 Jan 18 |
nicklas |
410 |
} |
4664 |
30 Jan 18 |
nicklas |
411 |
finally |
4664 |
30 Jan 18 |
nicklas |
412 |
{ |
4664 |
30 Jan 18 |
nicklas |
413 |
if (dc != null) dc.close(); |
4664 |
30 Jan 18 |
nicklas |
414 |
} |
4670 |
05 Feb 18 |
nicklas |
415 |
|
4670 |
05 Feb 18 |
nicklas |
416 |
return msg == null ? "Could not find gene.tsv" : msg; |
4664 |
30 Jan 18 |
nicklas |
417 |
} |
4669 |
02 Feb 18 |
nicklas |
418 |
|
4669 |
02 Feb 18 |
nicklas |
419 |
/** |
4669 |
02 Feb 18 |
nicklas |
Counts the number of unique "Gene ID" values in the |
4669 |
02 Feb 18 |
nicklas |
gene.tsv file. |
4669 |
02 Feb 18 |
nicklas |
422 |
*/ |
4669 |
02 Feb 18 |
nicklas |
423 |
private int parseAndCountGenesInGeneTsv(File f) |
4669 |
02 Feb 18 |
nicklas |
424 |
throws IOException |
4669 |
02 Feb 18 |
nicklas |
425 |
{ |
4669 |
02 Feb 18 |
nicklas |
426 |
InputStream in = null; |
4669 |
02 Feb 18 |
nicklas |
427 |
Set<String> genes = new HashSet<>(); |
4669 |
02 Feb 18 |
nicklas |
428 |
try |
4669 |
02 Feb 18 |
nicklas |
429 |
{ |
4669 |
02 Feb 18 |
nicklas |
430 |
in = f.getDownloadStream(0); |
4669 |
02 Feb 18 |
nicklas |
431 |
|
4669 |
02 Feb 18 |
nicklas |
432 |
FlatFileParser ffp = new FlatFileParser(); |
4669 |
02 Feb 18 |
nicklas |
433 |
ffp.setDataHeaderRegexp(Pattern.compile("Gene ID\\t.*")); |
4669 |
02 Feb 18 |
nicklas |
434 |
ffp.setDataSplitterRegexp(Pattern.compile("\\t")); |
4669 |
02 Feb 18 |
nicklas |
435 |
|
4669 |
02 Feb 18 |
nicklas |
436 |
ffp.setInputStream(in, "UTF-8"); |
4669 |
02 Feb 18 |
nicklas |
437 |
|
4669 |
02 Feb 18 |
nicklas |
438 |
LineType headerLine = ffp.parseHeaders(); |
4669 |
02 Feb 18 |
nicklas |
439 |
int lineNo = ffp.getParsedLines(); |
4669 |
02 Feb 18 |
nicklas |
440 |
if (headerLine != LineType.DATA_HEADER) |
4669 |
02 Feb 18 |
nicklas |
441 |
{ |
4669 |
02 Feb 18 |
nicklas |
442 |
throw new IOException("File '" + f.getName() + "' line " + lineNo + ": Could not find header line starting with 'Gene ID{tab}...'"); |
4669 |
02 Feb 18 |
nicklas |
443 |
} |
4669 |
02 Feb 18 |
nicklas |
444 |
|
4669 |
02 Feb 18 |
nicklas |
445 |
while (ffp.hasMoreData()) |
4669 |
02 Feb 18 |
nicklas |
446 |
{ |
4669 |
02 Feb 18 |
nicklas |
447 |
FlatFileParser.Data line = ffp.nextData(); |
5364 |
16 Apr 19 |
nicklas |
448 |
String geneId = line.getString(0); |
4669 |
02 Feb 18 |
nicklas |
449 |
genes.add(geneId); |
4669 |
02 Feb 18 |
nicklas |
450 |
} |
4669 |
02 Feb 18 |
nicklas |
451 |
} |
4669 |
02 Feb 18 |
nicklas |
452 |
finally |
4669 |
02 Feb 18 |
nicklas |
453 |
{ |
4669 |
02 Feb 18 |
nicklas |
454 |
FileUtil.close(in); |
4669 |
02 Feb 18 |
nicklas |
455 |
} |
4669 |
02 Feb 18 |
nicklas |
456 |
return genes.size(); |
4669 |
02 Feb 18 |
nicklas |
457 |
} |
4664 |
30 Jan 18 |
nicklas |
458 |
} |
4661 |
29 Jan 18 |
nicklas |
459 |
} |