4288 |
09 Jan 17 |
nicklas |
1 |
package net.sf.basedb.reggie.grid; |
3069 |
09 Jan 15 |
nicklas |
2 |
|
3069 |
09 Jan 15 |
nicklas |
3 |
import java.util.ArrayList; |
4545 |
28 Jun 17 |
nicklas |
4 |
import java.util.Arrays; |
3069 |
09 Jan 15 |
nicklas |
5 |
import java.util.List; |
4543 |
27 Jun 17 |
nicklas |
6 |
import java.util.regex.Matcher; |
4543 |
27 Jun 17 |
nicklas |
7 |
import java.util.regex.Pattern; |
3069 |
09 Jan 15 |
nicklas |
8 |
|
4545 |
28 Jun 17 |
nicklas |
9 |
import org.slf4j.LoggerFactory; |
4545 |
28 Jun 17 |
nicklas |
10 |
|
3069 |
09 Jan 15 |
nicklas |
11 |
import net.sf.basedb.core.AnyToAny; |
3069 |
09 Jan 15 |
nicklas |
12 |
import net.sf.basedb.core.ArrayDesign; |
3069 |
09 Jan 15 |
nicklas |
13 |
import net.sf.basedb.core.DataFileType; |
3069 |
09 Jan 15 |
nicklas |
14 |
import net.sf.basedb.core.DbControl; |
3069 |
09 Jan 15 |
nicklas |
15 |
import net.sf.basedb.core.DerivedBioAssay; |
3069 |
09 Jan 15 |
nicklas |
16 |
import net.sf.basedb.core.Directory; |
3069 |
09 Jan 15 |
nicklas |
17 |
import net.sf.basedb.core.FeatureIdentificationMethod; |
3069 |
09 Jan 15 |
nicklas |
18 |
import net.sf.basedb.core.File; |
3069 |
09 Jan 15 |
nicklas |
19 |
import net.sf.basedb.core.FileServer; |
3069 |
09 Jan 15 |
nicklas |
20 |
import net.sf.basedb.core.FileSetMember; |
6423 |
28 Sep 21 |
nicklas |
21 |
import net.sf.basedb.core.IntegerParameterType; |
4598 |
27 Sep 17 |
nicklas |
22 |
import net.sf.basedb.core.ItemList; |
4272 |
16 Dec 16 |
nicklas |
23 |
import net.sf.basedb.core.ItemNotFoundException; |
3069 |
09 Jan 15 |
nicklas |
24 |
import net.sf.basedb.core.ItemParameterType; |
3069 |
09 Jan 15 |
nicklas |
25 |
import net.sf.basedb.core.ItemSubtype; |
3069 |
09 Jan 15 |
nicklas |
26 |
import net.sf.basedb.core.Job; |
3069 |
09 Jan 15 |
nicklas |
27 |
import net.sf.basedb.core.Path; |
3069 |
09 Jan 15 |
nicklas |
28 |
import net.sf.basedb.core.PluginConfiguration; |
3069 |
09 Jan 15 |
nicklas |
29 |
import net.sf.basedb.core.Protocol; |
3069 |
09 Jan 15 |
nicklas |
30 |
import net.sf.basedb.core.RawBioAssay; |
4543 |
27 Jun 17 |
nicklas |
31 |
import net.sf.basedb.core.Sample; |
3069 |
09 Jan 15 |
nicklas |
32 |
import net.sf.basedb.core.SessionControl; |
3069 |
09 Jan 15 |
nicklas |
33 |
import net.sf.basedb.core.Software; |
5544 |
06 Aug 19 |
nicklas |
34 |
import net.sf.basedb.core.StringParameterType; |
4272 |
16 Dec 16 |
nicklas |
35 |
import net.sf.basedb.opengrid.JobDefinition; |
4272 |
16 Dec 16 |
nicklas |
36 |
import net.sf.basedb.opengrid.JobStatus; |
4272 |
16 Dec 16 |
nicklas |
37 |
import net.sf.basedb.opengrid.OpenGridCluster; |
4272 |
16 Dec 16 |
nicklas |
38 |
import net.sf.basedb.opengrid.OpenGridSession; |
4272 |
16 Dec 16 |
nicklas |
39 |
import net.sf.basedb.opengrid.ScriptBuilder; |
4272 |
16 Dec 16 |
nicklas |
40 |
import net.sf.basedb.opengrid.config.ClusterConfig; |
4272 |
16 Dec 16 |
nicklas |
41 |
import net.sf.basedb.opengrid.config.JobConfig; |
4272 |
16 Dec 16 |
nicklas |
42 |
import net.sf.basedb.opengrid.service.JobCompletionHandler; |
3069 |
09 Jan 15 |
nicklas |
43 |
import net.sf.basedb.reggie.Reggie; |
3118 |
06 Feb 15 |
nicklas |
44 |
import net.sf.basedb.reggie.XmlConfig; |
3069 |
09 Jan 15 |
nicklas |
45 |
import net.sf.basedb.reggie.dao.AlignedSequences; |
3069 |
09 Jan 15 |
nicklas |
46 |
import net.sf.basedb.reggie.dao.Annotationtype; |
4598 |
27 Sep 17 |
nicklas |
47 |
import net.sf.basedb.reggie.dao.BiomaterialList; |
3069 |
09 Jan 15 |
nicklas |
48 |
import net.sf.basedb.reggie.dao.Datafiletype; |
4543 |
27 Jun 17 |
nicklas |
49 |
import net.sf.basedb.reggie.dao.DemuxedSequences; |
5790 |
13 Dec 19 |
nicklas |
50 |
import net.sf.basedb.reggie.dao.DoNotUse; |
3069 |
09 Jan 15 |
nicklas |
51 |
import net.sf.basedb.reggie.dao.Fileserver; |
3793 |
18 Mar 16 |
nicklas |
52 |
import net.sf.basedb.reggie.dao.Library; |
4545 |
28 Jun 17 |
nicklas |
53 |
import net.sf.basedb.reggie.dao.MaskedSequences; |
4543 |
27 Jun 17 |
nicklas |
54 |
import net.sf.basedb.reggie.dao.MergedSequences; |
5543 |
06 Aug 19 |
nicklas |
55 |
import net.sf.basedb.reggie.dao.Pipeline; |
4545 |
28 Jun 17 |
nicklas |
56 |
import net.sf.basedb.reggie.dao.Rawbioassay; |
4665 |
31 Jan 18 |
nicklas |
57 |
import net.sf.basedb.reggie.dao.Rawdatatype; |
4325 |
30 Jan 17 |
nicklas |
58 |
import net.sf.basedb.reggie.dao.Subtype; |
3069 |
09 Jan 15 |
nicklas |
59 |
import net.sf.basedb.util.Values; |
7079 |
27 Mar 23 |
nicklas |
60 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7079 |
27 Mar 23 |
nicklas |
61 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
3069 |
09 Jan 15 |
nicklas |
62 |
|
3069 |
09 Jan 15 |
nicklas |
63 |
/** |
3069 |
09 Jan 15 |
nicklas |
Helper class for creating items needed for executing cufflinks as |
3069 |
09 Jan 15 |
nicklas |
well as generating the cufflinks script and send it to the cluster for |
3069 |
09 Jan 15 |
nicklas |
execution. |
3069 |
09 Jan 15 |
nicklas |
67 |
|
3069 |
09 Jan 15 |
nicklas |
@author nicklas |
3069 |
09 Jan 15 |
nicklas |
@since 3.0 |
3069 |
09 Jan 15 |
nicklas |
70 |
*/ |
3069 |
09 Jan 15 |
nicklas |
71 |
public class CufflinksJobCreator |
6674 |
11 Apr 22 |
nicklas |
72 |
extends AbstractJobCreator |
3069 |
09 Jan 15 |
nicklas |
73 |
{ |
3069 |
09 Jan 15 |
nicklas |
74 |
|
4543 |
27 Jun 17 |
nicklas |
75 |
private Software maskingSoftware; |
4543 |
27 Jun 17 |
nicklas |
76 |
private Protocol maskingProtocol; |
4543 |
27 Jun 17 |
nicklas |
77 |
|
4543 |
27 Jun 17 |
nicklas |
78 |
private Software alignSoftware; |
4543 |
27 Jun 17 |
nicklas |
79 |
private Protocol alignProtocol; |
4543 |
27 Jun 17 |
nicklas |
80 |
|
3069 |
09 Jan 15 |
nicklas |
81 |
private Software software; |
3069 |
09 Jan 15 |
nicklas |
82 |
private Protocol protocol; |
3069 |
09 Jan 15 |
nicklas |
83 |
private ArrayDesign arrayDesign; |
3069 |
09 Jan 15 |
nicklas |
84 |
|
3069 |
09 Jan 15 |
nicklas |
85 |
public CufflinksJobCreator() |
3069 |
09 Jan 15 |
nicklas |
86 |
{} |
3069 |
09 Jan 15 |
nicklas |
87 |
|
4543 |
27 Jun 17 |
nicklas |
88 |
/** |
4543 |
27 Jun 17 |
nicklas |
Set the software item to set on created MaskedSequences. |
4543 |
27 Jun 17 |
nicklas |
@see DerivedBioAssay#setSoftware(Software) |
4543 |
27 Jun 17 |
nicklas |
91 |
*/ |
4543 |
27 Jun 17 |
nicklas |
92 |
public void setMaskingSoftware(Software software) |
4543 |
27 Jun 17 |
nicklas |
93 |
{ |
4543 |
27 Jun 17 |
nicklas |
94 |
this.maskingSoftware = software; |
4543 |
27 Jun 17 |
nicklas |
95 |
} |
3069 |
09 Jan 15 |
nicklas |
96 |
|
3069 |
09 Jan 15 |
nicklas |
97 |
/** |
4543 |
27 Jun 17 |
nicklas |
Set the protocol item to set on created MaskedSequences. |
4543 |
27 Jun 17 |
nicklas |
@see DerivedBioAssay#setProtocol(Protocol) |
4543 |
27 Jun 17 |
nicklas |
100 |
*/ |
4543 |
27 Jun 17 |
nicklas |
101 |
public void setMaskingProtocol(Protocol protocol) |
4543 |
27 Jun 17 |
nicklas |
102 |
{ |
4543 |
27 Jun 17 |
nicklas |
103 |
this.maskingProtocol = protocol; |
4543 |
27 Jun 17 |
nicklas |
104 |
} |
4543 |
27 Jun 17 |
nicklas |
105 |
|
4543 |
27 Jun 17 |
nicklas |
106 |
/** |
4543 |
27 Jun 17 |
nicklas |
Set the software item to set on created AlignedSequences. |
4543 |
27 Jun 17 |
nicklas |
@see DerivedBioAssay#setSoftware(Software) |
4543 |
27 Jun 17 |
nicklas |
109 |
*/ |
4543 |
27 Jun 17 |
nicklas |
110 |
public void setAlignSoftware(Software software) |
4543 |
27 Jun 17 |
nicklas |
111 |
{ |
4543 |
27 Jun 17 |
nicklas |
112 |
this.alignSoftware = software; |
4543 |
27 Jun 17 |
nicklas |
113 |
} |
4543 |
27 Jun 17 |
nicklas |
114 |
|
4543 |
27 Jun 17 |
nicklas |
115 |
/** |
4543 |
27 Jun 17 |
nicklas |
Set the protocol item to set on created AlignedSequences. |
4543 |
27 Jun 17 |
nicklas |
@see DerivedBioAssay#setProtocol(Protocol) |
4543 |
27 Jun 17 |
nicklas |
118 |
*/ |
4543 |
27 Jun 17 |
nicklas |
119 |
public void setAlignProtocol(Protocol protocol) |
4543 |
27 Jun 17 |
nicklas |
120 |
{ |
4543 |
27 Jun 17 |
nicklas |
121 |
this.alignProtocol = protocol; |
4543 |
27 Jun 17 |
nicklas |
122 |
} |
4543 |
27 Jun 17 |
nicklas |
123 |
|
4543 |
27 Jun 17 |
nicklas |
124 |
/** |
3069 |
09 Jan 15 |
nicklas |
Set the software item to set on created RawBioAssay:s. |
3069 |
09 Jan 15 |
nicklas |
@see RawBioAssay#setSoftware(Software) |
3069 |
09 Jan 15 |
nicklas |
127 |
*/ |
3069 |
09 Jan 15 |
nicklas |
128 |
public void setSoftware(Software software) |
3069 |
09 Jan 15 |
nicklas |
129 |
{ |
3069 |
09 Jan 15 |
nicklas |
130 |
this.software = software; |
3069 |
09 Jan 15 |
nicklas |
131 |
} |
3069 |
09 Jan 15 |
nicklas |
132 |
|
3069 |
09 Jan 15 |
nicklas |
133 |
/** |
3069 |
09 Jan 15 |
nicklas |
Set the protocol item to set on created RawBioAssay:s |
3069 |
09 Jan 15 |
nicklas |
@see RawBioAssay#setProtocol(Protocol) |
3069 |
09 Jan 15 |
nicklas |
136 |
*/ |
3069 |
09 Jan 15 |
nicklas |
137 |
public void setProtocol(Protocol protocol) |
3069 |
09 Jan 15 |
nicklas |
138 |
{ |
3069 |
09 Jan 15 |
nicklas |
139 |
this.protocol = protocol; |
3069 |
09 Jan 15 |
nicklas |
140 |
} |
3069 |
09 Jan 15 |
nicklas |
141 |
|
3069 |
09 Jan 15 |
nicklas |
142 |
/** |
3069 |
09 Jan 15 |
nicklas |
Set the array design item to set on created RawBioAssay:s |
3069 |
09 Jan 15 |
nicklas |
@see RawBioAssay#setArrayDesign(ArrayDesign) |
3069 |
09 Jan 15 |
nicklas |
145 |
*/ |
3069 |
09 Jan 15 |
nicklas |
146 |
public void setArrayDesign(ArrayDesign design) |
3069 |
09 Jan 15 |
nicklas |
147 |
{ |
3069 |
09 Jan 15 |
nicklas |
148 |
this.arrayDesign = design; |
3069 |
09 Jan 15 |
nicklas |
149 |
} |
3069 |
09 Jan 15 |
nicklas |
150 |
|
3069 |
09 Jan 15 |
nicklas |
151 |
/** |
4543 |
27 Jun 17 |
nicklas |
Create a child items for all given merged sequences and schedule |
4543 |
27 Jun 17 |
nicklas |
jobs on the given cluster for running tophat and cufflinks. |
3069 |
09 Jan 15 |
nicklas |
@return A list with the corresponding jobs in BASE |
3069 |
09 Jan 15 |
nicklas |
155 |
*/ |
4599 |
28 Sep 17 |
nicklas |
156 |
public List<JobDefinition> createTophatCufflinkJobs(DbControl dc, OpenGridCluster cluster, List<MergedSequences> mergedSequences) |
3069 |
09 Jan 15 |
nicklas |
157 |
{ |
3069 |
09 Jan 15 |
nicklas |
158 |
/* |
3069 |
09 Jan 15 |
nicklas |
System.out.println("createCufflinkJobs:"); |
3069 |
09 Jan 15 |
nicklas |
System.out.println("software:" + software); |
3069 |
09 Jan 15 |
nicklas |
System.out.println("protocol:" + protocol); |
3069 |
09 Jan 15 |
nicklas |
System.out.println("design:" + arrayDesign); |
3069 |
09 Jan 15 |
nicklas |
System.out.println("importer:" + rawImporter); |
3069 |
09 Jan 15 |
nicklas |
164 |
*/ |
3069 |
09 Jan 15 |
nicklas |
165 |
|
3069 |
09 Jan 15 |
nicklas |
166 |
SessionControl sc = dc.getSessionControl(); |
4272 |
16 Dec 16 |
nicklas |
167 |
|
4272 |
16 Dec 16 |
nicklas |
168 |
ClusterConfig clusterCfg = cluster.getConfig(); |
4272 |
16 Dec 16 |
nicklas |
169 |
XmlConfig cfg = Reggie.getConfig(cluster.getId()); |
4272 |
16 Dec 16 |
nicklas |
170 |
if (cfg == null) |
4272 |
16 Dec 16 |
nicklas |
171 |
{ |
4272 |
16 Dec 16 |
nicklas |
172 |
throw new ItemNotFoundException("No configuration in reggie-config.xml for cluster: " + cluster.getId()); |
4272 |
16 Dec 16 |
nicklas |
173 |
} |
3118 |
06 Feb 15 |
nicklas |
174 |
|
4543 |
27 Jun 17 |
nicklas |
175 |
String maskParameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, maskingSoftware); |
4543 |
27 Jun 17 |
nicklas |
176 |
String alignParameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, alignSoftware); |
3118 |
06 Feb 15 |
nicklas |
177 |
String parameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, software); |
3118 |
06 Feb 15 |
nicklas |
178 |
|
3069 |
09 Jan 15 |
nicklas |
// Get global options |
6693 |
22 Apr 22 |
nicklas |
180 |
String global_env = ScriptUtil.multilineIndent(cfg.getConfig("global-env")); |
6675 |
13 Apr 22 |
nicklas |
181 |
String projectArchive = cfg.getRequiredConfig("project-archive", null); |
6675 |
13 Apr 22 |
nicklas |
182 |
String externalArchive = cfg.getConfig("external-archive", null, projectArchive); |
3069 |
09 Jan 15 |
nicklas |
183 |
|
3069 |
09 Jan 15 |
nicklas |
// Options for the programs |
7372 |
06 Oct 23 |
nicklas |
185 |
String legacy_submit = cfg.getConfig("legacy/submit", alignParameterSet, null); |
7372 |
06 Oct 23 |
nicklas |
186 |
String legacy_submit_debug = cfg.getConfig("legacy/submit-debug", alignParameterSet, null); |
6675 |
13 Apr 22 |
nicklas |
187 |
String legacy_env = ScriptUtil.multilineIndent(cfg.getRequiredConfig("legacy/env", alignParameterSet)); |
6675 |
13 Apr 22 |
nicklas |
188 |
String legacy_envdebug = ScriptUtil.multilineIndent(cfg.getConfig("legacy/env-debug", alignParameterSet, null)); |
6675 |
13 Apr 22 |
nicklas |
189 |
String legacy_execute = ScriptUtil.multilineIndent(cfg.getConfig("legacy/execute", alignParameterSet, "./legacy.sh")); |
4543 |
27 Jun 17 |
nicklas |
190 |
|
6675 |
13 Apr 22 |
nicklas |
191 |
int align_adjust_mate_inner_dist = Values.getInt(cfg.getConfig("legacy/adjust-mate-inner-dist", alignParameterSet, "0")); |
6675 |
13 Apr 22 |
nicklas |
192 |
int align_adjust_mate_std_dev = Values.getInt(cfg.getConfig("legacy/adjust-mate-std-dev", alignParameterSet, "0")); |
3069 |
09 Jan 15 |
nicklas |
193 |
|
4598 |
27 Sep 17 |
nicklas |
// Selected items must be removed from this list |
4598 |
27 Sep 17 |
nicklas |
195 |
ItemList legacyPipeline = BiomaterialList.LEGACY_PIPELINE.load(dc); |
4598 |
27 Sep 17 |
nicklas |
196 |
|
4543 |
27 Jun 17 |
nicklas |
// Load common items |
4543 |
27 Jun 17 |
nicklas |
198 |
ItemSubtype maskedType = Subtype.MASKED_SEQUENCES.get(dc); |
4543 |
27 Jun 17 |
nicklas |
199 |
ItemSubtype alignType = Subtype.ALIGNED_SEQUENCES.get(dc); |
4543 |
27 Jun 17 |
nicklas |
200 |
|
4667 |
01 Feb 18 |
nicklas |
201 |
Rawdatatype cufflinksType = Rawdatatype.CUFFLINKS; |
3069 |
09 Jan 15 |
nicklas |
202 |
|
3069 |
09 Jan 15 |
nicklas |
// Job parameter types |
3069 |
09 Jan 15 |
nicklas |
204 |
ItemParameterType<PluginConfiguration> configType = new ItemParameterType<PluginConfiguration>(PluginConfiguration.class, null); |
3069 |
09 Jan 15 |
nicklas |
205 |
ItemParameterType<RawBioAssay> rawType = new ItemParameterType<RawBioAssay>(RawBioAssay.class, null); |
3069 |
09 Jan 15 |
nicklas |
206 |
|
4272 |
16 Dec 16 |
nicklas |
// Options common for all jobs |
4272 |
16 Dec 16 |
nicklas |
208 |
JobConfig jobConfig = new JobConfig(); |
4272 |
16 Dec 16 |
nicklas |
209 |
if (priority != null) jobConfig.setPriority(priority); |
7372 |
06 Oct 23 |
nicklas |
210 |
if (partition != null) jobConfig.setSbatchOption("partition", ScriptUtil.checkValidScriptParameter(partition)); |
7372 |
06 Oct 23 |
nicklas |
211 |
jobConfig.convertOptionsTo(clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
212 |
if (submitOptionsOverride != null) |
7372 |
06 Oct 23 |
nicklas |
213 |
{ |
7372 |
06 Oct 23 |
nicklas |
214 |
ScriptUtil.addSubmitOptions(jobConfig, submitOptionsOverride, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
215 |
} |
7372 |
06 Oct 23 |
nicklas |
216 |
else |
7372 |
06 Oct 23 |
nicklas |
217 |
{ |
7372 |
06 Oct 23 |
nicklas |
218 |
ScriptUtil.addSubmitOptions(jobConfig, legacy_submit, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
219 |
if (debug) ScriptUtil.addSubmitOptions(jobConfig, legacy_submit_debug, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
220 |
} |
4272 |
16 Dec 16 |
nicklas |
221 |
|
3069 |
09 Jan 15 |
nicklas |
// We submit one job for each raw bioassay to the cluster |
4543 |
27 Jun 17 |
nicklas |
223 |
List<JobDefinition> jobDefs = new ArrayList<JobDefinition>(mergedSequences.size()); |
3069 |
09 Jan 15 |
nicklas |
224 |
|
4599 |
28 Sep 17 |
nicklas |
225 |
for (MergedSequences ms : mergedSequences) |
3069 |
09 Jan 15 |
nicklas |
226 |
{ |
4623 |
17 Nov 17 |
nicklas |
227 |
ms = MergedSequences.getById(dc, ms.getId()); // Ensure item is loaded in this transaction |
4623 |
17 Nov 17 |
nicklas |
228 |
|
4599 |
28 Sep 17 |
nicklas |
// Get some information about the aligned data that we need |
4599 |
28 Sep 17 |
nicklas |
230 |
DerivedBioAssay merged = ms.getDerivedBioAssay(); |
5364 |
16 Apr 19 |
nicklas |
231 |
legacyPipeline.removeItem(merged); |
3069 |
09 Jan 15 |
nicklas |
232 |
|
4599 |
28 Sep 17 |
nicklas |
233 |
Library lib = Library.get(merged.getExtract()); |
5596 |
11 Sep 19 |
nicklas |
234 |
boolean isExternal = Reggie.isExternalItem(merged.getName()); |
6675 |
13 Apr 22 |
nicklas |
235 |
String archiveFolder = isExternal ? externalArchive : projectArchive; |
4599 |
28 Sep 17 |
nicklas |
236 |
|
4599 |
28 Sep 17 |
nicklas |
237 |
Sample specimen = (Sample)lib.findSingleParent(dc, Subtype.SPECIMEN); |
4599 |
28 Sep 17 |
nicklas |
238 |
|
4599 |
28 Sep 17 |
nicklas |
239 |
String fastQFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, merged); |
4599 |
28 Sep 17 |
nicklas |
240 |
int fragment_size_avg = ((Number)Annotationtype.FRAGMENT_SIZE_AVG.getAnnotationValue(dc, merged)).intValue(); |
6422 |
28 Sep 21 |
nicklas |
241 |
int fragment_size_std = ((Number)Annotationtype.FRAGMENT_SIZE_STDEV.getAnnotationValue(dc, merged)).intValue(); |
6422 |
28 Sep 21 |
nicklas |
// Get read length from FASTQ files or from the READ_STRING annotation if not available. |
4599 |
28 Sep 17 |
nicklas |
243 |
int totalReadSize = 0; |
6422 |
28 Sep 21 |
nicklas |
244 |
Integer readlength_avg_r1 = (Integer)Annotationtype.READLENGTH_AVG_R1.getAnnotationValue(dc, merged); |
6422 |
28 Sep 21 |
nicklas |
245 |
Integer readlength_avg_r2 = (Integer)Annotationtype.READLENGTH_AVG_R2.getAnnotationValue(dc, merged); |
6422 |
28 Sep 21 |
nicklas |
246 |
if (readlength_avg_r1 != null && readlength_avg_r2 != null) |
3069 |
09 Jan 15 |
nicklas |
247 |
{ |
6422 |
28 Sep 21 |
nicklas |
248 |
totalReadSize = readlength_avg_r1+readlength_avg_r2; |
6422 |
28 Sep 21 |
nicklas |
249 |
} |
6422 |
28 Sep 21 |
nicklas |
250 |
else |
6422 |
28 Sep 21 |
nicklas |
251 |
{ |
6422 |
28 Sep 21 |
nicklas |
252 |
List<DemuxedSequences> dxList = ms.getDemuxedSequences(dc); |
6422 |
28 Sep 21 |
nicklas |
253 |
for (DemuxedSequences dx : dxList) |
4543 |
27 Jun 17 |
nicklas |
254 |
{ |
6422 |
28 Sep 21 |
nicklas |
255 |
String readString = (String)Annotationtype.READ_STRING.getAnnotationValue(dc, dx.getItem()); |
6422 |
28 Sep 21 |
nicklas |
256 |
int rs = getTotalReadSize(readString); |
6422 |
28 Sep 21 |
nicklas |
257 |
if (totalReadSize == 0 || rs < totalReadSize) |
6422 |
28 Sep 21 |
nicklas |
258 |
{ |
6422 |
28 Sep 21 |
nicklas |
259 |
totalReadSize = rs; |
6422 |
28 Sep 21 |
nicklas |
260 |
} |
4543 |
27 Jun 17 |
nicklas |
261 |
} |
4599 |
28 Sep 17 |
nicklas |
262 |
} |
6423 |
28 Sep 21 |
nicklas |
263 |
int mateStdDev = fragment_size_std + align_adjust_mate_std_dev; |
6423 |
28 Sep 21 |
nicklas |
264 |
int mateInnerDist = Math.max(fragment_size_avg - totalReadSize + align_adjust_mate_inner_dist, 1-mateStdDev); |
6423 |
28 Sep 21 |
nicklas |
265 |
|
4599 |
28 Sep 17 |
nicklas |
// Create job |
4599 |
28 Sep 17 |
nicklas |
267 |
Job cufflinksJob = Job.getNew(dc, null, null, null); |
4599 |
28 Sep 17 |
nicklas |
268 |
cufflinksJob.setItemSubtype(Subtype.TOPHAT_CUFFLINKS_JOB.get(dc)); |
4599 |
28 Sep 17 |
nicklas |
269 |
cufflinksJob.setPluginVersion("reggie-"+Reggie.VERSION); |
4599 |
28 Sep 17 |
nicklas |
270 |
cufflinksJob.setSendMessage(Values.getBoolean(sc.getUserClientSetting("plugins.sendmessage"), false)); |
4599 |
28 Sep 17 |
nicklas |
271 |
cufflinksJob.setName("Run Tophat and Cufflinks " + merged.getName()); |
5546 |
07 Aug 19 |
nicklas |
272 |
cufflinksJob.setParameterValue("pipeline", new StringParameterType(), Pipeline.RNASEQ_LEGACY.getId()); |
6423 |
28 Sep 21 |
nicklas |
273 |
cufflinksJob.setParameterValue("--mate-inner-dist", new IntegerParameterType(), mateInnerDist); |
6423 |
28 Sep 21 |
nicklas |
274 |
cufflinksJob.setParameterValue("--mate-std-dev", new IntegerParameterType(), mateStdDev); |
4599 |
28 Sep 17 |
nicklas |
275 |
if (debug) cufflinksJob.setName(cufflinksJob.getName() + " (debug)"); |
6981 |
17 Jan 23 |
nicklas |
276 |
if (partition != null) cufflinksJob.setParameterValue("partition", new StringParameterType(), partition); |
7372 |
06 Oct 23 |
nicklas |
277 |
if (submitOptionsOverride != null) cufflinksJob.setParameterValue("jobOptions", new StringParameterType(), submitOptionsOverride); |
4599 |
28 Sep 17 |
nicklas |
278 |
dc.saveItem(cufflinksJob); |
4599 |
28 Sep 17 |
nicklas |
279 |
|
4599 |
28 Sep 17 |
nicklas |
// Created MASKED derived bioassay set |
4599 |
28 Sep 17 |
nicklas |
281 |
String maskedName = ms.getNextMaskedSequencesName(dc); |
4599 |
28 Sep 17 |
nicklas |
282 |
DerivedBioAssay masked = DerivedBioAssay.getNew(dc, false, cufflinksJob); |
4599 |
28 Sep 17 |
nicklas |
283 |
masked.setItemSubtype(maskedType); |
5546 |
07 Aug 19 |
nicklas |
284 |
Pipeline.RNASEQ_LEGACY.setAnnotation(dc, masked); |
4881 |
03 Jul 18 |
nicklas |
285 |
masked.setName(maskedName); |
4599 |
28 Sep 17 |
nicklas |
286 |
masked.setExtract(merged.getExtract()); |
4599 |
28 Sep 17 |
nicklas |
287 |
masked.setSoftware(maskingSoftware); |
4599 |
28 Sep 17 |
nicklas |
288 |
masked.setProtocol(maskingProtocol); |
4599 |
28 Sep 17 |
nicklas |
289 |
masked.addParent(merged); |
5790 |
13 Dec 19 |
nicklas |
290 |
DoNotUse.copyDoNotUseAnnotations(dc, merged, masked, false); |
4599 |
28 Sep 17 |
nicklas |
291 |
dc.saveItem(masked); |
4599 |
28 Sep 17 |
nicklas |
292 |
|
4599 |
28 Sep 17 |
nicklas |
// Create ALIGNED derived bioassay set |
5705 |
06 Nov 19 |
nicklas |
294 |
String alignedName = masked.getName()+"." + Subtype.ALIGNED_SEQUENCES.getItemSuffix(); // This is safe since the masked item is a new item |
4599 |
28 Sep 17 |
nicklas |
295 |
DerivedBioAssay aligned = DerivedBioAssay.getNew(dc, false, cufflinksJob); |
4599 |
28 Sep 17 |
nicklas |
296 |
aligned.setItemSubtype(alignType); |
5546 |
07 Aug 19 |
nicklas |
297 |
Pipeline.RNASEQ_LEGACY.setAnnotation(dc, aligned); |
4881 |
03 Jul 18 |
nicklas |
298 |
aligned.setName(alignedName); |
4599 |
28 Sep 17 |
nicklas |
299 |
aligned.setExtract(masked.getExtract()); |
4599 |
28 Sep 17 |
nicklas |
300 |
aligned.setSoftware(alignSoftware); |
4599 |
28 Sep 17 |
nicklas |
301 |
aligned.setProtocol(alignProtocol); |
4599 |
28 Sep 17 |
nicklas |
302 |
aligned.addParent(masked); |
5790 |
13 Dec 19 |
nicklas |
303 |
DoNotUse.copyDoNotUseAnnotations(dc, merged, aligned, false); |
4599 |
28 Sep 17 |
nicklas |
304 |
dc.saveItem(aligned); |
4543 |
27 Jun 17 |
nicklas |
305 |
|
4599 |
28 Sep 17 |
nicklas |
// Create raw bioassay |
5705 |
06 Nov 19 |
nicklas |
307 |
String rawName = aligned.getName() + "." + cufflinksType.getItemSuffix(); // This is safe since the aligned item is a new item |
4667 |
01 Feb 18 |
nicklas |
308 |
RawBioAssay raw = cufflinksType.createRawBioAssay(dc); |
5546 |
07 Aug 19 |
nicklas |
309 |
Pipeline.RNASEQ_LEGACY.setAnnotation(dc, raw); |
4599 |
28 Sep 17 |
nicklas |
310 |
raw.setArrayDesign(arrayDesign); |
4599 |
28 Sep 17 |
nicklas |
311 |
raw.getRawDataBatcher(FeatureIdentificationMethod.FEATURE_ID, cufflinksJob); |
4881 |
03 Jul 18 |
nicklas |
312 |
raw.setName(rawName); |
4599 |
28 Sep 17 |
nicklas |
313 |
raw.setParentExtract(lib.getExtract()); |
4599 |
28 Sep 17 |
nicklas |
314 |
raw.setSoftware(software); |
4599 |
28 Sep 17 |
nicklas |
315 |
raw.setProtocol(protocol); |
4599 |
28 Sep 17 |
nicklas |
316 |
raw.setParentBioAssay(aligned); |
5790 |
13 Dec 19 |
nicklas |
317 |
DoNotUse.copyDoNotUseAnnotations(dc, merged, raw, false); |
4599 |
28 Sep 17 |
nicklas |
318 |
dc.saveItem(raw); |
4599 |
28 Sep 17 |
nicklas |
319 |
|
4599 |
28 Sep 17 |
nicklas |
320 |
String cufflinksFolder = fastQFolder + "/"+raw.getName().substring(merged.getName().length()+1); |
4599 |
28 Sep 17 |
nicklas |
321 |
if (debug && !cufflinksFolder.startsWith("/debug")) |
4599 |
28 Sep 17 |
nicklas |
322 |
{ |
4599 |
28 Sep 17 |
nicklas |
323 |
cufflinksFolder = "/debug" + cufflinksFolder; |
4599 |
28 Sep 17 |
nicklas |
324 |
} |
4599 |
28 Sep 17 |
nicklas |
325 |
Annotationtype.DATA_FILES_FOLDER.setAnnotationValue(dc, raw, cufflinksFolder); |
4599 |
28 Sep 17 |
nicklas |
326 |
if (autoConfirm) |
4599 |
28 Sep 17 |
nicklas |
327 |
{ |
4599 |
28 Sep 17 |
nicklas |
328 |
Annotationtype.AUTO_PROCESSING.setAnnotationValue(dc, raw, "AutoConfirm"); |
4599 |
28 Sep 17 |
nicklas |
329 |
} |
4599 |
28 Sep 17 |
nicklas |
330 |
cufflinksJob.setParameterValue("rawBioAssay", rawType, raw); |
3755 |
18 Feb 16 |
nicklas |
331 |
|
4599 |
28 Sep 17 |
nicklas |
// Checks to make sure no bad things are included in script file |
4599 |
28 Sep 17 |
nicklas |
333 |
ScriptUtil.checkValidPath(cufflinksFolder, true, true); |
4599 |
28 Sep 17 |
nicklas |
334 |
ScriptUtil.checkValidScriptParameter(raw.getName()); |
4599 |
28 Sep 17 |
nicklas |
335 |
|
4599 |
28 Sep 17 |
nicklas |
336 |
ScriptBuilder script = new ScriptBuilder(); |
6665 |
05 Apr 22 |
nicklas |
337 |
script.cmd(debug ? "set -ex" : "set -e"); |
5596 |
11 Sep 19 |
nicklas |
// Set file permissions based on consent or external group! |
5596 |
11 Sep 19 |
nicklas |
339 |
String externalGroup = isExternal ? Reggie.getExternalGroup(merged.getName()) : null; |
5596 |
11 Sep 19 |
nicklas |
340 |
ScriptUtil.setUmaskForItem(dc, lib, externalGroup, script); |
4599 |
28 Sep 17 |
nicklas |
341 |
script.newLine(); |
6693 |
22 Apr 22 |
nicklas |
342 |
script.cmd(global_env); |
6675 |
13 Apr 22 |
nicklas |
343 |
script.export("ArchiveFolder", archiveFolder); |
6675 |
13 Apr 22 |
nicklas |
344 |
script.export("FastqFolder", "${ArchiveFolder}"+fastQFolder); |
6675 |
13 Apr 22 |
nicklas |
345 |
script.export("CufflinksFolder", "${ArchiveFolder}"+cufflinksFolder); |
4599 |
28 Sep 17 |
nicklas |
346 |
script.newLine(); |
6675 |
13 Apr 22 |
nicklas |
347 |
script.cmd(legacy_env); |
6675 |
13 Apr 22 |
nicklas |
348 |
if (debug) script.cmd(legacy_envdebug); |
6675 |
13 Apr 22 |
nicklas |
349 |
script.export("TophatOptions", "${TophatOptions} --mate-inner-dist "+mateInnerDist+" --mate-std-dev "+mateStdDev); |
4599 |
28 Sep 17 |
nicklas |
350 |
script.newLine(); |
6675 |
13 Apr 22 |
nicklas |
351 |
script.cmd(legacy_execute); |
4599 |
28 Sep 17 |
nicklas |
352 |
script.newLine(); |
5596 |
11 Sep 19 |
nicklas |
353 |
if (externalGroup != null) |
5596 |
11 Sep 19 |
nicklas |
354 |
{ |
5930 |
06 May 20 |
nicklas |
355 |
ScriptUtil.addChgrp(externalGroup, "${CufflinksFolder}", rawName, null, script); |
5596 |
11 Sep 19 |
nicklas |
356 |
} |
6675 |
13 Apr 22 |
nicklas |
357 |
|
6674 |
11 Apr 22 |
nicklas |
358 |
JobDefinition jobDef = new JobDefinition("Cufflinks", jobConfig, batchConfig, cufflinksJob); |
6675 |
13 Apr 22 |
nicklas |
359 |
jobDef.addFile(ScriptUtil.upload("legacy.sh")); |
6675 |
13 Apr 22 |
nicklas |
360 |
jobDef.addFile(ScriptUtil.upload("reggie-utils.sh")); |
6675 |
13 Apr 22 |
nicklas |
361 |
jobDef.addFile(ScriptUtil.upload("stdwrap.sh")); |
6675 |
13 Apr 22 |
nicklas |
362 |
jobDef.addFile(ScriptUtil.upload("alignment_statistics.sh")); |
6675 |
13 Apr 22 |
nicklas |
363 |
jobDef.addFile(ScriptUtil.upload("singlecolumnaverager.awk")); |
6675 |
13 Apr 22 |
nicklas |
364 |
jobDef.addFile(ScriptUtil.upload("fix_cufflinks_tracking_id.sh")); |
4599 |
28 Sep 17 |
nicklas |
365 |
jobDef.setDebug(debug); |
4599 |
28 Sep 17 |
nicklas |
366 |
jobDef.setCmd(script.toString()); |
4599 |
28 Sep 17 |
nicklas |
367 |
jobDefs.add(jobDef); |
3069 |
09 Jan 15 |
nicklas |
368 |
} |
4599 |
28 Sep 17 |
nicklas |
369 |
|
3069 |
09 Jan 15 |
nicklas |
370 |
|
4599 |
28 Sep 17 |
nicklas |
371 |
return jobDefs; |
3069 |
09 Jan 15 |
nicklas |
372 |
} |
3069 |
09 Jan 15 |
nicklas |
373 |
|
3069 |
09 Jan 15 |
nicklas |
374 |
/** |
4543 |
27 Jun 17 |
nicklas |
Sum all parts of the read string that generate |
4543 |
27 Jun 17 |
nicklas |
an ouput read (eg. all T and S) |
4543 |
27 Jun 17 |
nicklas |
377 |
*/ |
4543 |
27 Jun 17 |
nicklas |
378 |
public int getTotalReadSize(String readString) |
4543 |
27 Jun 17 |
nicklas |
379 |
{ |
4543 |
27 Jun 17 |
nicklas |
380 |
Pattern p = Pattern.compile("(\\d+)(T|S)"); |
4543 |
27 Jun 17 |
nicklas |
381 |
Matcher m = p.matcher(readString); |
4543 |
27 Jun 17 |
nicklas |
382 |
int totalReadSize = 0; |
4543 |
27 Jun 17 |
nicklas |
383 |
while (m.find()) |
4543 |
27 Jun 17 |
nicklas |
384 |
{ |
4543 |
27 Jun 17 |
nicklas |
385 |
totalReadSize += Values.getInt(m.group(1)); |
4543 |
27 Jun 17 |
nicklas |
386 |
} |
4543 |
27 Jun 17 |
nicklas |
387 |
return totalReadSize; |
4543 |
27 Jun 17 |
nicklas |
388 |
} |
4543 |
27 Jun 17 |
nicklas |
389 |
|
4543 |
27 Jun 17 |
nicklas |
390 |
/** |
3069 |
09 Jan 15 |
nicklas |
Job completion handler for cufflinks jobs. |
3069 |
09 Jan 15 |
nicklas |
392 |
*/ |
3069 |
09 Jan 15 |
nicklas |
393 |
public static class CufflinksJobCompletionHandler |
3069 |
09 Jan 15 |
nicklas |
394 |
implements JobCompletionHandler |
3069 |
09 Jan 15 |
nicklas |
395 |
{ |
7079 |
27 Mar 23 |
nicklas |
396 |
private static final ExtensionsLogger logger = |
7079 |
27 Mar 23 |
nicklas |
397 |
ExtensionsLog.getLogger(JobCompletionHandlerFactory.ID, true).wrap(LoggerFactory.getLogger(CufflinksJobCompletionHandler.class)); |
3069 |
09 Jan 15 |
nicklas |
398 |
|
3069 |
09 Jan 15 |
nicklas |
399 |
public CufflinksJobCompletionHandler() |
3069 |
09 Jan 15 |
nicklas |
400 |
{} |
3069 |
09 Jan 15 |
nicklas |
401 |
|
3069 |
09 Jan 15 |
nicklas |
402 |
@Override |
4272 |
16 Dec 16 |
nicklas |
403 |
public String jobCompleted(SessionControl sc, OpenGridSession session, Job job, JobStatus status) |
3069 |
09 Jan 15 |
nicklas |
404 |
{ |
4272 |
16 Dec 16 |
nicklas |
405 |
|
4272 |
16 Dec 16 |
nicklas |
406 |
String jobName = status.getName(); |
4545 |
28 Jun 17 |
nicklas |
407 |
String masked = session.getJobFileAsString(jobName, "masked.out", "UTF-8"); |
4545 |
28 Jun 17 |
nicklas |
408 |
String alignStatistics = session.getJobFileAsString(jobName, "alignment_statistics.out", "UTF-8"); |
4545 |
28 Jun 17 |
nicklas |
409 |
String picardMetrics = session.getJobFileAsString(jobName, "accepted_hits_picardmetrics.csv", "UTF-8"); |
4545 |
28 Jun 17 |
nicklas |
410 |
String fragments = session.getJobFileAsString(jobName, "fragments.out", "UTF-8"); |
4545 |
28 Jun 17 |
nicklas |
411 |
String files = session.getJobFileAsString(jobName, "files.out", "UTF-8"); |
3069 |
09 Jan 15 |
nicklas |
412 |
|
4545 |
28 Jun 17 |
nicklas |
413 |
Metrics metrics = parseCufflinksOut(sc, job, masked, alignStatistics, picardMetrics, fragments, files); |
4545 |
28 Jun 17 |
nicklas |
414 |
String msg = Values.formatNumber(metrics.numReadsAfterMask/1000000f, 1) + "M reads after mask; "; |
4545 |
28 Jun 17 |
nicklas |
415 |
msg += Values.formatNumber(metrics.numReadsAfterAlign/1000000f, 1) + "M reads after alignment; "; |
4676 |
08 Feb 18 |
nicklas |
416 |
msg += Values.formatNumber(metrics.fractionDuplication * 100, 1) + "% duplicates"; |
4545 |
28 Jun 17 |
nicklas |
417 |
msg += "; Cufflinks completed."; |
3069 |
09 Jan 15 |
nicklas |
418 |
return msg; |
3069 |
09 Jan 15 |
nicklas |
419 |
} |
3069 |
09 Jan 15 |
nicklas |
420 |
|
4545 |
28 Jun 17 |
nicklas |
421 |
private Metrics parseCufflinksOut(SessionControl sc, Job job, String maskedOut, String alignOut, String picardMetrics, String fragments, String filesOut) |
3069 |
09 Jan 15 |
nicklas |
422 |
{ |
4545 |
28 Jun 17 |
nicklas |
423 |
Metrics metrics = new Metrics(); |
3069 |
09 Jan 15 |
nicklas |
424 |
|
4545 |
28 Jun 17 |
nicklas |
425 |
Pattern p = Pattern.compile("\\s+(\\d+).*aligned concordantly 0 times.*"); |
4545 |
28 Jun 17 |
nicklas |
426 |
for (String line : maskedOut.split("\n")) |
4545 |
28 Jun 17 |
nicklas |
427 |
{ |
4545 |
28 Jun 17 |
nicklas |
428 |
Matcher m = p.matcher(line); |
4545 |
28 Jun 17 |
nicklas |
429 |
if (m.matches()) |
4545 |
28 Jun 17 |
nicklas |
430 |
{ |
4545 |
28 Jun 17 |
nicklas |
431 |
metrics.numReadsAfterMask = Values.getLong(m.group(1), null); |
4545 |
28 Jun 17 |
nicklas |
432 |
if (logger.isDebugEnabled()) |
4545 |
28 Jun 17 |
nicklas |
433 |
{ |
4545 |
28 Jun 17 |
nicklas |
434 |
logger.debug("Found match: " + line + "; numReadsAfterMask="+metrics.numReadsAfterMask); |
4545 |
28 Jun 17 |
nicklas |
435 |
} |
4545 |
28 Jun 17 |
nicklas |
436 |
break; |
4545 |
28 Jun 17 |
nicklas |
437 |
} |
4545 |
28 Jun 17 |
nicklas |
438 |
} |
4545 |
28 Jun 17 |
nicklas |
439 |
|
4545 |
28 Jun 17 |
nicklas |
440 |
p = Pattern.compile("(\\d+)"); |
4545 |
28 Jun 17 |
nicklas |
441 |
for (String line : alignOut.split("\n")) |
4545 |
28 Jun 17 |
nicklas |
442 |
{ |
4545 |
28 Jun 17 |
nicklas |
443 |
Matcher m = p.matcher(line); |
4545 |
28 Jun 17 |
nicklas |
444 |
if (m.matches()) |
4545 |
28 Jun 17 |
nicklas |
445 |
{ |
4545 |
28 Jun 17 |
nicklas |
446 |
metrics.numReadsAfterAlign = Values.getLong(m.group(1), null); |
4545 |
28 Jun 17 |
nicklas |
447 |
if (logger.isDebugEnabled()) |
4545 |
28 Jun 17 |
nicklas |
448 |
{ |
4545 |
28 Jun 17 |
nicklas |
449 |
logger.debug("Found match: " + line + "; numReadsAfterAlign="+metrics.numReadsAfterAlign); |
4545 |
28 Jun 17 |
nicklas |
450 |
} |
4545 |
28 Jun 17 |
nicklas |
451 |
break; |
4545 |
28 Jun 17 |
nicklas |
452 |
} |
4545 |
28 Jun 17 |
nicklas |
453 |
} |
4545 |
28 Jun 17 |
nicklas |
454 |
|
4545 |
28 Jun 17 |
nicklas |
455 |
int readPairsExaminedIndex = -1; |
4545 |
28 Jun 17 |
nicklas |
456 |
int readPairDuplicatesIndex = -1; |
4545 |
28 Jun 17 |
nicklas |
457 |
int percentDuplicationIndex = -1; |
4545 |
28 Jun 17 |
nicklas |
458 |
|
4545 |
28 Jun 17 |
nicklas |
459 |
for (String line : picardMetrics.split("\n")) |
4545 |
28 Jun 17 |
nicklas |
460 |
{ |
4545 |
28 Jun 17 |
nicklas |
461 |
String[] cols = line.split("\t"); |
4545 |
28 Jun 17 |
nicklas |
462 |
if (cols.length >= 9) |
4545 |
28 Jun 17 |
nicklas |
463 |
{ |
4545 |
28 Jun 17 |
nicklas |
464 |
if (readPairsExaminedIndex == -1) |
4545 |
28 Jun 17 |
nicklas |
465 |
{ |
4545 |
28 Jun 17 |
nicklas |
466 |
List<String> colsA = Arrays.asList(cols); |
4545 |
28 Jun 17 |
nicklas |
467 |
readPairsExaminedIndex = colsA.indexOf("READ_PAIRS_EXAMINED"); |
4545 |
28 Jun 17 |
nicklas |
468 |
readPairDuplicatesIndex = colsA.indexOf("READ_PAIR_DUPLICATES"); |
4545 |
28 Jun 17 |
nicklas |
469 |
percentDuplicationIndex = colsA.indexOf("PERCENT_DUPLICATION"); |
4545 |
28 Jun 17 |
nicklas |
470 |
} |
4545 |
28 Jun 17 |
nicklas |
471 |
else |
4545 |
28 Jun 17 |
nicklas |
472 |
{ |
4545 |
28 Jun 17 |
nicklas |
473 |
metrics.readPairsExamined = Values.getLong(cols[readPairsExaminedIndex], null); |
4545 |
28 Jun 17 |
nicklas |
474 |
metrics.readPairDuplicates = Values.getLong(cols[readPairDuplicatesIndex], null); |
4545 |
28 Jun 17 |
nicklas |
475 |
metrics.fractionDuplication = Values.getFloat(cols[percentDuplicationIndex], null); |
4545 |
28 Jun 17 |
nicklas |
476 |
} |
4545 |
28 Jun 17 |
nicklas |
477 |
} |
4545 |
28 Jun 17 |
nicklas |
478 |
} |
4545 |
28 Jun 17 |
nicklas |
479 |
|
4545 |
28 Jun 17 |
nicklas |
// Fragments |
4545 |
28 Jun 17 |
nicklas |
481 |
p = Pattern.compile("(\\d+)\\t(\\d+\\.?\\d*)\\t(\\d+\\.?\\d*)"); |
4545 |
28 Jun 17 |
nicklas |
482 |
for (String line : fragments.split("\n")) |
4545 |
28 Jun 17 |
nicklas |
483 |
{ |
4545 |
28 Jun 17 |
nicklas |
484 |
Matcher m = p.matcher(line); |
4545 |
28 Jun 17 |
nicklas |
485 |
if (m.matches()) |
4545 |
28 Jun 17 |
nicklas |
486 |
{ |
4545 |
28 Jun 17 |
nicklas |
487 |
metrics.fragmentSizeCount = Values.getInt(m.group(1), -1); |
4545 |
28 Jun 17 |
nicklas |
488 |
metrics.fragmentSizeAvg = Values.getInt(m.group(2), -1); |
4545 |
28 Jun 17 |
nicklas |
489 |
metrics.fragmentSizeStd = Values.getInt(m.group(3), -1); |
4545 |
28 Jun 17 |
nicklas |
490 |
} |
4545 |
28 Jun 17 |
nicklas |
491 |
} |
4545 |
28 Jun 17 |
nicklas |
492 |
|
3069 |
09 Jan 15 |
nicklas |
493 |
DbControl dc = null; |
3069 |
09 Jan 15 |
nicklas |
494 |
try |
3069 |
09 Jan 15 |
nicklas |
495 |
{ |
6599 |
22 Feb 22 |
nicklas |
496 |
dc = sc.newDbControl("Reggie: Cufflinks completed handler"); |
3069 |
09 Jan 15 |
nicklas |
497 |
|
4545 |
28 Jun 17 |
nicklas |
498 |
Rawbioassay rawCufflinks = Rawbioassay.getByJob(dc, job); |
4545 |
28 Jun 17 |
nicklas |
499 |
AlignedSequences alignedSequences = rawCufflinks.getAlignedSequences(dc); |
4545 |
28 Jun 17 |
nicklas |
500 |
MaskedSequences maskedSequences = alignedSequences.getMaskedSequences(dc); |
3069 |
09 Jan 15 |
nicklas |
501 |
|
4545 |
28 Jun 17 |
nicklas |
502 |
RawBioAssay raw = rawCufflinks.getItem(); |
4545 |
28 Jun 17 |
nicklas |
503 |
DerivedBioAssay aligned = alignedSequences.getItem(); |
4545 |
28 Jun 17 |
nicklas |
504 |
DerivedBioAssay masked = maskedSequences.getItem(); |
4545 |
28 Jun 17 |
nicklas |
505 |
|
4545 |
28 Jun 17 |
nicklas |
506 |
Annotationtype.PM_READS.setAnnotationValue(dc, masked, metrics.numReadsAfterMask); |
4545 |
28 Jun 17 |
nicklas |
507 |
Annotationtype.ALIGNED_PAIRS.setAnnotationValue(dc, aligned, metrics.numReadsAfterAlign); |
4545 |
28 Jun 17 |
nicklas |
508 |
Annotationtype.READ_PAIRS_EXAMINED.setAnnotationValue(dc, aligned, metrics.readPairsExamined); |
4545 |
28 Jun 17 |
nicklas |
509 |
Annotationtype.READ_PAIR_DUPLICATES.setAnnotationValue(dc, aligned, metrics.readPairDuplicates); |
4545 |
28 Jun 17 |
nicklas |
510 |
Annotationtype.FRACTION_DUPLICATION.setAnnotationValue(dc, aligned, metrics.fractionDuplication); |
4545 |
28 Jun 17 |
nicklas |
511 |
Annotationtype.FRAGMENT_SIZE_AVG.setAnnotationValue(dc, aligned, metrics.fragmentSizeAvg); |
4545 |
28 Jun 17 |
nicklas |
512 |
Annotationtype.FRAGMENT_SIZE_STDEV.setAnnotationValue(dc, aligned, metrics.fragmentSizeStd); |
4545 |
28 Jun 17 |
nicklas |
513 |
|
3069 |
09 Jan 15 |
nicklas |
// Create file links |
5553 |
12 Aug 19 |
nicklas |
515 |
boolean useExternalProjectArchive = Reggie.isExternalItem(raw.getName()); |
3606 |
16 Nov 15 |
nicklas |
516 |
FileServer fileArchive = useExternalProjectArchive ? Fileserver.EXTERNAL_ARCHIVE.load(dc) : Fileserver.PROJECT_ARCHIVE.load(dc); |
3606 |
16 Nov 15 |
nicklas |
517 |
String analysisDir = useExternalProjectArchive ? Reggie.EXTERNAL_ANALYSIS_DIR : Reggie.SECONDARY_ANALYSIS_DIR; |
3606 |
16 Nov 15 |
nicklas |
518 |
|
3069 |
09 Jan 15 |
nicklas |
519 |
String dataFilesFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, raw); |
3069 |
09 Jan 15 |
nicklas |
520 |
String baseFolder = Reggie.convertDataFilesFolderToBaseFolder(dataFilesFolder); |
3606 |
16 Nov 15 |
nicklas |
521 |
Directory localDataDir = Directory.getNew(dc, new Path(analysisDir+baseFolder, Path.Type.DIRECTORY)); |
3069 |
09 Jan 15 |
nicklas |
522 |
|
3069 |
09 Jan 15 |
nicklas |
523 |
DataFileType fpkmData = Datafiletype.FPKM.load(dc); |
3069 |
09 Jan 15 |
nicklas |
524 |
ItemSubtype fpkmType = fpkmData.getGenericType(); |
3069 |
09 Jan 15 |
nicklas |
525 |
|
4545 |
28 Jun 17 |
nicklas |
526 |
DataFileType bamData = Datafiletype.BAM.load(dc); |
4545 |
28 Jun 17 |
nicklas |
527 |
ItemSubtype bamType = bamData.getGenericType(); |
4545 |
28 Jun 17 |
nicklas |
528 |
|
3069 |
09 Jan 15 |
nicklas |
529 |
int lineNo = 0; |
3069 |
09 Jan 15 |
nicklas |
530 |
File fpkmFile = null; |
3069 |
09 Jan 15 |
nicklas |
531 |
for (String line : filesOut.split("\n")) |
3069 |
09 Jan 15 |
nicklas |
532 |
{ |
3069 |
09 Jan 15 |
nicklas |
533 |
lineNo++; |
3069 |
09 Jan 15 |
nicklas |
534 |
|
3069 |
09 Jan 15 |
nicklas |
535 |
File f = File.getFile(dc, localDataDir, line.substring(line.lastIndexOf("/")+1), true); |
3606 |
16 Nov 15 |
nicklas |
536 |
f.setFileServer(fileArchive); |
3606 |
16 Nov 15 |
nicklas |
537 |
String fileUrl = "sftp://" + fileArchive.getHost() + dataFilesFolder + "/" + f.getName(); |
3069 |
09 Jan 15 |
nicklas |
538 |
try |
3069 |
09 Jan 15 |
nicklas |
539 |
{ |
3069 |
09 Jan 15 |
nicklas |
540 |
f.setUrl(fileUrl, true); |
3069 |
09 Jan 15 |
nicklas |
541 |
} |
3069 |
09 Jan 15 |
nicklas |
542 |
catch (RuntimeException ex) |
3069 |
09 Jan 15 |
nicklas |
543 |
{ |
3069 |
09 Jan 15 |
nicklas |
544 |
f.setUrl(fileUrl, false); |
3069 |
09 Jan 15 |
nicklas |
545 |
} |
3069 |
09 Jan 15 |
nicklas |
546 |
if (!f.isInDatabase()) |
3069 |
09 Jan 15 |
nicklas |
547 |
{ |
3069 |
09 Jan 15 |
nicklas |
548 |
dc.saveItem(f); |
3069 |
09 Jan 15 |
nicklas |
549 |
} |
4545 |
28 Jun 17 |
nicklas |
550 |
if (f.getName().equals("accepted_hits.bam")) |
3069 |
09 Jan 15 |
nicklas |
551 |
{ |
4545 |
28 Jun 17 |
nicklas |
552 |
f.setDescription(metrics.numReadsAfterAlign + " ALIGNED PAIRS"); |
4545 |
28 Jun 17 |
nicklas |
553 |
f.setItemSubtype(bamType); |
4545 |
28 Jun 17 |
nicklas |
554 |
FileSetMember member = aligned.getFileSet().addMember(f, bamData); |
4545 |
28 Jun 17 |
nicklas |
555 |
} |
4545 |
28 Jun 17 |
nicklas |
556 |
else if (f.getName().equals("isoforms.fpkm_tracking")) |
4545 |
28 Jun 17 |
nicklas |
557 |
{ |
3069 |
09 Jan 15 |
nicklas |
558 |
f.setItemSubtype(fpkmType); |
3069 |
09 Jan 15 |
nicklas |
559 |
FileSetMember member = raw.getFileSet().addMember(f, fpkmData); |
3069 |
09 Jan 15 |
nicklas |
560 |
fpkmFile = f; |
3069 |
09 Jan 15 |
nicklas |
561 |
} |
3069 |
09 Jan 15 |
nicklas |
562 |
else |
3069 |
09 Jan 15 |
nicklas |
563 |
{ |
3069 |
09 Jan 15 |
nicklas |
564 |
AnyToAny link = AnyToAny.getNewOrExisting(dc, raw, f.getName(), f, true); |
3069 |
09 Jan 15 |
nicklas |
565 |
if (!link.isInDatabase()) dc.saveItem(link); |
3069 |
09 Jan 15 |
nicklas |
566 |
} |
3069 |
09 Jan 15 |
nicklas |
567 |
} |
3069 |
09 Jan 15 |
nicklas |
568 |
dc.commit(); |
3069 |
09 Jan 15 |
nicklas |
569 |
} |
3069 |
09 Jan 15 |
nicklas |
570 |
finally |
3069 |
09 Jan 15 |
nicklas |
571 |
{ |
3069 |
09 Jan 15 |
nicklas |
572 |
if (dc != null) dc.close(); |
3069 |
09 Jan 15 |
nicklas |
573 |
} |
4545 |
28 Jun 17 |
nicklas |
574 |
|
4545 |
28 Jun 17 |
nicklas |
575 |
return metrics; |
3069 |
09 Jan 15 |
nicklas |
576 |
} |
3069 |
09 Jan 15 |
nicklas |
577 |
} |
4545 |
28 Jun 17 |
nicklas |
578 |
|
4545 |
28 Jun 17 |
nicklas |
579 |
static class Metrics |
4545 |
28 Jun 17 |
nicklas |
580 |
{ |
4545 |
28 Jun 17 |
nicklas |
581 |
Long numReadsAfterMask = null; |
4545 |
28 Jun 17 |
nicklas |
582 |
Long numReadsAfterAlign = null; |
4545 |
28 Jun 17 |
nicklas |
583 |
Long readPairsExamined = null; |
4545 |
28 Jun 17 |
nicklas |
584 |
Long readPairDuplicates = null; |
4545 |
28 Jun 17 |
nicklas |
585 |
Float fractionDuplication = null; |
4545 |
28 Jun 17 |
nicklas |
586 |
int fragmentSizeAvg = -1; |
4545 |
28 Jun 17 |
nicklas |
587 |
int fragmentSizeStd = -1; |
4545 |
28 Jun 17 |
nicklas |
588 |
int fragmentSizeCount = -1; |
4545 |
28 Jun 17 |
nicklas |
589 |
} |
3069 |
09 Jan 15 |
nicklas |
590 |
|
3069 |
09 Jan 15 |
nicklas |
591 |
} |