5826 |
18 Feb 20 |
nicklas |
1 |
package net.sf.basedb.reggie.grid; |
5826 |
18 Feb 20 |
nicklas |
2 |
|
5826 |
18 Feb 20 |
nicklas |
3 |
import java.util.ArrayList; |
5843 |
25 Feb 20 |
nicklas |
4 |
import java.util.Arrays; |
5826 |
18 Feb 20 |
nicklas |
5 |
import java.util.List; |
5827 |
19 Feb 20 |
nicklas |
6 |
import java.util.Set; |
5827 |
19 Feb 20 |
nicklas |
7 |
import java.util.TreeSet; |
5826 |
18 Feb 20 |
nicklas |
8 |
|
5841 |
25 Feb 20 |
nicklas |
9 |
import net.sf.basedb.core.AnyToAny; |
5841 |
25 Feb 20 |
nicklas |
10 |
import net.sf.basedb.core.DataFileType; |
5826 |
18 Feb 20 |
nicklas |
11 |
import net.sf.basedb.core.DbControl; |
5826 |
18 Feb 20 |
nicklas |
12 |
import net.sf.basedb.core.DerivedBioAssay; |
5841 |
25 Feb 20 |
nicklas |
13 |
import net.sf.basedb.core.Directory; |
5827 |
19 Feb 20 |
nicklas |
14 |
import net.sf.basedb.core.File; |
5841 |
25 Feb 20 |
nicklas |
15 |
import net.sf.basedb.core.FileServer; |
5841 |
25 Feb 20 |
nicklas |
16 |
import net.sf.basedb.core.FileSetMember; |
5830 |
21 Feb 20 |
nicklas |
17 |
import net.sf.basedb.core.InvalidDataException; |
5826 |
18 Feb 20 |
nicklas |
18 |
import net.sf.basedb.core.ItemList; |
5826 |
18 Feb 20 |
nicklas |
19 |
import net.sf.basedb.core.ItemNotFoundException; |
5826 |
18 Feb 20 |
nicklas |
20 |
import net.sf.basedb.core.ItemSubtype; |
5826 |
18 Feb 20 |
nicklas |
21 |
import net.sf.basedb.core.Job; |
5841 |
25 Feb 20 |
nicklas |
22 |
import net.sf.basedb.core.Path; |
5826 |
18 Feb 20 |
nicklas |
23 |
import net.sf.basedb.core.Protocol; |
5826 |
18 Feb 20 |
nicklas |
24 |
import net.sf.basedb.core.Sample; |
5826 |
18 Feb 20 |
nicklas |
25 |
import net.sf.basedb.core.SessionControl; |
5826 |
18 Feb 20 |
nicklas |
26 |
import net.sf.basedb.core.Software; |
5826 |
18 Feb 20 |
nicklas |
27 |
import net.sf.basedb.core.StringParameterType; |
5826 |
18 Feb 20 |
nicklas |
28 |
import net.sf.basedb.opengrid.JobDefinition; |
5826 |
18 Feb 20 |
nicklas |
29 |
import net.sf.basedb.opengrid.JobStatus; |
5826 |
18 Feb 20 |
nicklas |
30 |
import net.sf.basedb.opengrid.OpenGridCluster; |
5826 |
18 Feb 20 |
nicklas |
31 |
import net.sf.basedb.opengrid.OpenGridSession; |
5826 |
18 Feb 20 |
nicklas |
32 |
import net.sf.basedb.opengrid.ScriptBuilder; |
5826 |
18 Feb 20 |
nicklas |
33 |
import net.sf.basedb.opengrid.config.ClusterConfig; |
5826 |
18 Feb 20 |
nicklas |
34 |
import net.sf.basedb.opengrid.config.JobConfig; |
5826 |
18 Feb 20 |
nicklas |
35 |
import net.sf.basedb.opengrid.service.JobCompletionHandler; |
5826 |
18 Feb 20 |
nicklas |
36 |
import net.sf.basedb.reggie.Reggie; |
5826 |
18 Feb 20 |
nicklas |
37 |
import net.sf.basedb.reggie.XmlConfig; |
5841 |
25 Feb 20 |
nicklas |
38 |
import net.sf.basedb.reggie.dao.AlignedSequences; |
5826 |
18 Feb 20 |
nicklas |
39 |
import net.sf.basedb.reggie.dao.Annotationtype; |
5826 |
18 Feb 20 |
nicklas |
40 |
import net.sf.basedb.reggie.dao.BiomaterialList; |
5827 |
19 Feb 20 |
nicklas |
41 |
import net.sf.basedb.reggie.dao.Datafiletype; |
5830 |
21 Feb 20 |
nicklas |
42 |
import net.sf.basedb.reggie.dao.Dna; |
5826 |
18 Feb 20 |
nicklas |
43 |
import net.sf.basedb.reggie.dao.DoNotUse; |
5841 |
25 Feb 20 |
nicklas |
44 |
import net.sf.basedb.reggie.dao.Fileserver; |
5826 |
18 Feb 20 |
nicklas |
45 |
import net.sf.basedb.reggie.dao.Library; |
5826 |
18 Feb 20 |
nicklas |
46 |
import net.sf.basedb.reggie.dao.MergedSequences; |
5826 |
18 Feb 20 |
nicklas |
47 |
import net.sf.basedb.reggie.dao.Pipeline; |
5826 |
18 Feb 20 |
nicklas |
48 |
import net.sf.basedb.reggie.dao.Subtype; |
5826 |
18 Feb 20 |
nicklas |
49 |
import net.sf.basedb.util.Values; |
5826 |
18 Feb 20 |
nicklas |
50 |
|
5826 |
18 Feb 20 |
nicklas |
51 |
/** |
5826 |
18 Feb 20 |
nicklas |
Helper class for creating items needed for aligning MIPs |
5826 |
18 Feb 20 |
nicklas |
libraries as well as generating the script and send it to the cluster for |
5826 |
18 Feb 20 |
nicklas |
execution. |
5826 |
18 Feb 20 |
nicklas |
55 |
|
5826 |
18 Feb 20 |
nicklas |
@author nicklas |
5826 |
18 Feb 20 |
nicklas |
@since 4.26 |
5826 |
18 Feb 20 |
nicklas |
58 |
*/ |
5826 |
18 Feb 20 |
nicklas |
59 |
public class MipsAlignJobCreator |
6674 |
11 Apr 22 |
nicklas |
60 |
extends AbstractJobCreator |
5826 |
18 Feb 20 |
nicklas |
61 |
{ |
5826 |
18 Feb 20 |
nicklas |
62 |
private Software alignSoftware; |
5826 |
18 Feb 20 |
nicklas |
63 |
private Protocol alignProtocol; |
5826 |
18 Feb 20 |
nicklas |
64 |
|
5826 |
18 Feb 20 |
nicklas |
65 |
public MipsAlignJobCreator() |
5826 |
18 Feb 20 |
nicklas |
66 |
{} |
5826 |
18 Feb 20 |
nicklas |
67 |
|
5826 |
18 Feb 20 |
nicklas |
68 |
/** |
5826 |
18 Feb 20 |
nicklas |
Set the software item to set on created AlignedSequences. |
5826 |
18 Feb 20 |
nicklas |
@see DerivedBioAssay#setSoftware(Software) |
5826 |
18 Feb 20 |
nicklas |
71 |
*/ |
5826 |
18 Feb 20 |
nicklas |
72 |
public void setAlignSoftware(Software software) |
5826 |
18 Feb 20 |
nicklas |
73 |
{ |
5826 |
18 Feb 20 |
nicklas |
74 |
this.alignSoftware = software; |
5826 |
18 Feb 20 |
nicklas |
75 |
} |
5826 |
18 Feb 20 |
nicklas |
76 |
|
5826 |
18 Feb 20 |
nicklas |
77 |
/** |
5826 |
18 Feb 20 |
nicklas |
Set the protocol item to set on created AlignedSequences. |
5826 |
18 Feb 20 |
nicklas |
@see DerivedBioAssay#setProtocol(Protocol) |
5826 |
18 Feb 20 |
nicklas |
80 |
*/ |
5826 |
18 Feb 20 |
nicklas |
81 |
public void setAlignProtocol(Protocol protocol) |
5826 |
18 Feb 20 |
nicklas |
82 |
{ |
5826 |
18 Feb 20 |
nicklas |
83 |
this.alignProtocol = protocol; |
5826 |
18 Feb 20 |
nicklas |
84 |
} |
5826 |
18 Feb 20 |
nicklas |
85 |
|
5826 |
18 Feb 20 |
nicklas |
86 |
/** |
5826 |
18 Feb 20 |
nicklas |
Create a child bioassays for all given merged sequences and schedule |
5826 |
18 Feb 20 |
nicklas |
jobs on the given cluster for running MIPs alignment. |
5826 |
18 Feb 20 |
nicklas |
@return A list with the corresponding jobs in BASE |
5826 |
18 Feb 20 |
nicklas |
90 |
*/ |
5826 |
18 Feb 20 |
nicklas |
91 |
public List<JobDefinition> createMipsAlignJobs(DbControl dc, OpenGridCluster cluster, List<MergedSequences> mergedSequences) |
5826 |
18 Feb 20 |
nicklas |
92 |
{ |
5826 |
18 Feb 20 |
nicklas |
93 |
SessionControl sc = dc.getSessionControl(); |
5826 |
18 Feb 20 |
nicklas |
94 |
|
5826 |
18 Feb 20 |
nicklas |
95 |
ClusterConfig clusterCfg = cluster.getConfig(); |
5826 |
18 Feb 20 |
nicklas |
96 |
XmlConfig cfg = Reggie.getConfig(cluster.getId()); |
5826 |
18 Feb 20 |
nicklas |
97 |
if (cfg == null) |
5826 |
18 Feb 20 |
nicklas |
98 |
{ |
5826 |
18 Feb 20 |
nicklas |
99 |
throw new ItemNotFoundException("No configuration in reggie-config.xml for cluster: " + cluster.getId()); |
5826 |
18 Feb 20 |
nicklas |
100 |
} |
5826 |
18 Feb 20 |
nicklas |
101 |
|
5826 |
18 Feb 20 |
nicklas |
102 |
String alignParameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, alignSoftware); |
5826 |
18 Feb 20 |
nicklas |
103 |
|
5826 |
18 Feb 20 |
nicklas |
// Get global options |
6693 |
22 Apr 22 |
nicklas |
105 |
String global_env = ScriptUtil.multilineIndent(cfg.getConfig("global-env")); |
5826 |
18 Feb 20 |
nicklas |
106 |
String projectRoot = cfg.getRequiredConfig("project-archive", null); |
5826 |
18 Feb 20 |
nicklas |
107 |
String externalRoot = cfg.getConfig("external-archive", null, projectRoot); |
5826 |
18 Feb 20 |
nicklas |
108 |
|
5826 |
18 Feb 20 |
nicklas |
// Paths to programs used (bowtie , hisat, picards, and more...) |
5826 |
18 Feb 20 |
nicklas |
110 |
String pipeline_scripts_path = cfg.getRequiredConfig("programs/pipeline-scripts/path", null); |
5827 |
19 Feb 20 |
nicklas |
111 |
String java_path = cfg.getRequiredConfig("programs/java/path", alignParameterSet); |
5827 |
19 Feb 20 |
nicklas |
112 |
String trimmomatic_path = cfg.getConfig("align-mips/trimmomatic/path", alignParameterSet, null); |
5827 |
19 Feb 20 |
nicklas |
113 |
if (trimmomatic_path == null) |
5827 |
19 Feb 20 |
nicklas |
114 |
{ |
5827 |
19 Feb 20 |
nicklas |
115 |
trimmomatic_path = cfg.getRequiredConfig("programs/trimmomatic/path", alignParameterSet); |
5827 |
19 Feb 20 |
nicklas |
116 |
} |
5828 |
19 Feb 20 |
nicklas |
117 |
String picard_path = cfg.getRequiredConfig("programs/picard/path", alignParameterSet); |
5829 |
19 Feb 20 |
nicklas |
118 |
String fgbio_path = cfg.getRequiredConfig("programs/fgbio/path", alignParameterSet); |
5830 |
21 Feb 20 |
nicklas |
119 |
String novoalign_path = cfg.getRequiredConfig("programs/novoalign/path", alignParameterSet); |
5830 |
21 Feb 20 |
nicklas |
120 |
String novosort_path = cfg.getRequiredConfig("programs/novosort/path", alignParameterSet); |
5831 |
21 Feb 20 |
nicklas |
121 |
String samtools_path = cfg.getRequiredConfig("programs/samtools/path", alignParameterSet); |
5826 |
18 Feb 20 |
nicklas |
122 |
|
5826 |
18 Feb 20 |
nicklas |
// Options for the programs |
7372 |
06 Oct 23 |
nicklas |
124 |
String align_submit = cfg.getConfig("align-mips/submit", alignParameterSet, null); |
7372 |
06 Oct 23 |
nicklas |
125 |
String align_submit_debug = cfg.getConfig("align-mips/submit-debug", alignParameterSet, null); |
5827 |
19 Feb 20 |
nicklas |
126 |
String align_trimmomaticOptions1 = cfg.getRequiredConfig("align-mips/trimmomatic/step-1", alignParameterSet); |
5827 |
19 Feb 20 |
nicklas |
127 |
String align_trimmomaticOptions2 = cfg.getRequiredConfig("align-mips/trimmomatic/step-2", alignParameterSet); |
5830 |
21 Feb 20 |
nicklas |
128 |
String ampliconsRoot = cfg.getRequiredConfig("align-mips/amplicons/main-dir", alignParameterSet); |
5830 |
21 Feb 20 |
nicklas |
129 |
String novoalign_index = cfg.getRequiredConfig("align-mips/novoalign/index", alignParameterSet); |
5830 |
21 Feb 20 |
nicklas |
130 |
String novoalign_options = cfg.getRequiredConfig("align-mips/novoalign/options", alignParameterSet); |
5831 |
21 Feb 20 |
nicklas |
131 |
String genome_dict = cfg.getRequiredConfig("align-mips/genome-dict", alignParameterSet); |
5831 |
21 Feb 20 |
nicklas |
132 |
String genome_fasta = cfg.getRequiredConfig("align-mips/genome-fasta", alignParameterSet); |
5841 |
25 Feb 20 |
nicklas |
133 |
String mark_duplicates_options = cfg.getRequiredConfig("align-mips/mark-duplicates-options", alignParameterSet); |
5841 |
25 Feb 20 |
nicklas |
134 |
String pcr_metrics_options = cfg.getRequiredConfig("align-mips/pcr-metrics-options", alignParameterSet); |
5826 |
18 Feb 20 |
nicklas |
135 |
|
5826 |
18 Feb 20 |
nicklas |
// Load common items |
5826 |
18 Feb 20 |
nicklas |
137 |
ItemSubtype alignType = Subtype.ALIGNED_SEQUENCES.get(dc); |
5826 |
18 Feb 20 |
nicklas |
138 |
ItemSubtype mipsAlignJobType = Subtype.MIPS_ALIGN_JOB.get(dc); |
5826 |
18 Feb 20 |
nicklas |
139 |
|
5826 |
18 Feb 20 |
nicklas |
// Selected items must be removed from this list |
5826 |
18 Feb 20 |
nicklas |
141 |
ItemList mipsAlignPipeline = BiomaterialList.MIPS_ALIGN_PIPELINE.load(dc); |
5826 |
18 Feb 20 |
nicklas |
142 |
|
5826 |
18 Feb 20 |
nicklas |
// Options common for all jobs |
5826 |
18 Feb 20 |
nicklas |
144 |
JobConfig jobConfig = new JobConfig(); |
5826 |
18 Feb 20 |
nicklas |
145 |
if (priority != null) jobConfig.setPriority(priority); |
7372 |
06 Oct 23 |
nicklas |
146 |
if (partition != null) jobConfig.setSbatchOption("partition", ScriptUtil.checkValidScriptParameter(partition)); |
7372 |
06 Oct 23 |
nicklas |
147 |
jobConfig.convertOptionsTo(clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
148 |
if (submitOptionsOverride != null) |
7372 |
06 Oct 23 |
nicklas |
149 |
{ |
7372 |
06 Oct 23 |
nicklas |
150 |
ScriptUtil.addSubmitOptions(jobConfig, submitOptionsOverride, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
151 |
} |
7372 |
06 Oct 23 |
nicklas |
152 |
else |
7372 |
06 Oct 23 |
nicklas |
153 |
{ |
7372 |
06 Oct 23 |
nicklas |
154 |
ScriptUtil.addSubmitOptions(jobConfig, align_submit, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
155 |
if (debug) ScriptUtil.addSubmitOptions(jobConfig, align_submit_debug, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
156 |
} |
5826 |
18 Feb 20 |
nicklas |
157 |
|
5826 |
18 Feb 20 |
nicklas |
// We submit one job for each raw bioassay to the cluster |
5826 |
18 Feb 20 |
nicklas |
159 |
List<JobDefinition> jobDefs = new ArrayList<JobDefinition>(mergedSequences.size()); |
5826 |
18 Feb 20 |
nicklas |
160 |
|
5826 |
18 Feb 20 |
nicklas |
161 |
for (MergedSequences ms : mergedSequences) |
5826 |
18 Feb 20 |
nicklas |
162 |
{ |
5826 |
18 Feb 20 |
nicklas |
163 |
ms = MergedSequences.getById(dc, ms.getId()); // Ensure item is loaded in this transaction |
5826 |
18 Feb 20 |
nicklas |
// Get some information about the merged data that we need |
5826 |
18 Feb 20 |
nicklas |
165 |
DerivedBioAssay merged = ms.getDerivedBioAssay(); |
5826 |
18 Feb 20 |
nicklas |
166 |
mipsAlignPipeline.removeItem(merged); |
5826 |
18 Feb 20 |
nicklas |
167 |
|
5826 |
18 Feb 20 |
nicklas |
168 |
Library lib = Library.get(merged.getExtract()); |
5826 |
18 Feb 20 |
nicklas |
169 |
boolean isExternal = Reggie.isExternalItem(merged.getName()); |
5826 |
18 Feb 20 |
nicklas |
170 |
String archiveRoot = isExternal ? externalRoot : projectRoot; |
5826 |
18 Feb 20 |
nicklas |
171 |
|
5826 |
18 Feb 20 |
nicklas |
172 |
Sample specimen = (Sample)lib.findSingleParent(dc, Subtype.SPECIMEN); |
5826 |
18 Feb 20 |
nicklas |
173 |
String fastQFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, merged); |
5826 |
18 Feb 20 |
nicklas |
174 |
|
5830 |
21 Feb 20 |
nicklas |
175 |
Dna dna = lib.getDna(dc, true); |
5830 |
21 Feb 20 |
nicklas |
176 |
String mipsPanel = (String)Annotationtype.MIPS_PANEL.getAnnotationValue(dc, dna.getItem()); |
5830 |
21 Feb 20 |
nicklas |
177 |
if (mipsPanel == null) |
5830 |
21 Feb 20 |
nicklas |
178 |
{ |
5830 |
21 Feb 20 |
nicklas |
179 |
throw new InvalidDataException("No value for '"+Annotationtype.MIPS_PANEL.getName() + "' on DNA: " + dna.getName()); |
5830 |
21 Feb 20 |
nicklas |
180 |
} |
5830 |
21 Feb 20 |
nicklas |
181 |
|
5830 |
21 Feb 20 |
nicklas |
182 |
String amplicons_bed = cfg.getRequiredConfig("align-mips/amplicons/bed[@panel='"+mipsPanel+"']", alignParameterSet); |
5830 |
21 Feb 20 |
nicklas |
183 |
|
5827 |
19 Feb 20 |
nicklas |
// Get all FASTQ files |
5827 |
19 Feb 20 |
nicklas |
185 |
List<File> fastqFiles = Datafiletype.FASTQ.getAllFiles(dc, merged); |
5827 |
19 Feb 20 |
nicklas |
186 |
Set<String> fastqPrefix = new TreeSet<>(); |
5827 |
19 Feb 20 |
nicklas |
187 |
String R1_suffix = "_R1.fastq.gz"; |
5827 |
19 Feb 20 |
nicklas |
188 |
for (File f : fastqFiles) |
5827 |
19 Feb 20 |
nicklas |
189 |
{ |
5827 |
19 Feb 20 |
nicklas |
190 |
String name = ScriptUtil.checkValidFilename(f.getName()); |
5827 |
19 Feb 20 |
nicklas |
191 |
if (name.endsWith(R1_suffix)) |
5827 |
19 Feb 20 |
nicklas |
192 |
{ |
5827 |
19 Feb 20 |
nicklas |
193 |
String prefix = name.substring(0, name.length()-R1_suffix.length()); |
5827 |
19 Feb 20 |
nicklas |
194 |
fastqPrefix.add(prefix); |
5827 |
19 Feb 20 |
nicklas |
195 |
} |
5827 |
19 Feb 20 |
nicklas |
196 |
} |
5827 |
19 Feb 20 |
nicklas |
197 |
|
5826 |
18 Feb 20 |
nicklas |
198 |
/* |
5826 |
18 Feb 20 |
nicklas |
List<FlowCell> flowCells = ms.getFlowCells(dc); |
5826 |
18 Feb 20 |
nicklas |
List<String> flowCellIds = new ArrayList<>(); |
5826 |
18 Feb 20 |
nicklas |
for (FlowCell fc : flowCells) |
5826 |
18 Feb 20 |
nicklas |
202 |
{ |
5826 |
18 Feb 20 |
nicklas |
String fcId = (String)Annotationtype.FLOWCELL_ID.getAnnotationValue(dc, fc.getItem()); |
5826 |
18 Feb 20 |
nicklas |
if (fcId != null) flowCellIds.add(ScriptUtil.checkValidScriptParameter(fcId)); |
5826 |
18 Feb 20 |
nicklas |
205 |
} |
5826 |
18 Feb 20 |
nicklas |
206 |
*/ |
5826 |
18 Feb 20 |
nicklas |
207 |
|
5826 |
18 Feb 20 |
nicklas |
// Create job |
5826 |
18 Feb 20 |
nicklas |
209 |
Job alignJob = Job.getNew(dc, null, null, null); |
5826 |
18 Feb 20 |
nicklas |
210 |
alignJob.setItemSubtype(mipsAlignJobType); |
5826 |
18 Feb 20 |
nicklas |
211 |
alignJob.setPluginVersion("reggie-"+Reggie.VERSION); |
5826 |
18 Feb 20 |
nicklas |
212 |
alignJob.setSendMessage(Values.getBoolean(sc.getUserClientSetting("plugins.sendmessage"), false)); |
5826 |
18 Feb 20 |
nicklas |
213 |
alignJob.setName("MIPsAlign " + merged.getName()); |
5826 |
18 Feb 20 |
nicklas |
214 |
alignJob.setParameterValue("pipeline", new StringParameterType(), Pipeline.MIPS.getId()); |
5826 |
18 Feb 20 |
nicklas |
215 |
if (debug) alignJob.setName(alignJob.getName() + " (debug)"); |
6981 |
17 Jan 23 |
nicklas |
216 |
if (partition != null) alignJob.setParameterValue("partition", new StringParameterType(), partition); |
7372 |
06 Oct 23 |
nicklas |
217 |
if (submitOptionsOverride != null) alignJob.setParameterValue("jobOptions", new StringParameterType(), submitOptionsOverride); |
5826 |
18 Feb 20 |
nicklas |
218 |
dc.saveItem(alignJob); |
5826 |
18 Feb 20 |
nicklas |
219 |
|
5826 |
18 Feb 20 |
nicklas |
// Create ALIGNED derived bioassay set |
5826 |
18 Feb 20 |
nicklas |
221 |
String alignedName = ms.getNextAlignedSequencesName(dc); |
5826 |
18 Feb 20 |
nicklas |
222 |
DerivedBioAssay aligned = DerivedBioAssay.getNew(dc, false, alignJob); |
5826 |
18 Feb 20 |
nicklas |
223 |
aligned.setItemSubtype(alignType); |
5826 |
18 Feb 20 |
nicklas |
224 |
Pipeline.MIPS.setAnnotation(dc, aligned); |
5826 |
18 Feb 20 |
nicklas |
225 |
aligned.setName(alignedName); |
5826 |
18 Feb 20 |
nicklas |
226 |
aligned.setExtract(merged.getExtract()); |
5826 |
18 Feb 20 |
nicklas |
227 |
aligned.setSoftware(alignSoftware); |
5826 |
18 Feb 20 |
nicklas |
228 |
aligned.setProtocol(alignProtocol); |
5826 |
18 Feb 20 |
nicklas |
229 |
aligned.addParent(merged); |
5826 |
18 Feb 20 |
nicklas |
230 |
DoNotUse.copyDoNotUseAnnotations(dc, merged, aligned, false); |
5826 |
18 Feb 20 |
nicklas |
231 |
dc.saveItem(aligned); |
5826 |
18 Feb 20 |
nicklas |
232 |
|
5826 |
18 Feb 20 |
nicklas |
233 |
String alignFolder = fastQFolder + "/"+aligned.getName().substring(merged.getName().length()+1); |
5826 |
18 Feb 20 |
nicklas |
234 |
if (debug && !alignFolder.startsWith("/debug")) |
5826 |
18 Feb 20 |
nicklas |
235 |
{ |
5826 |
18 Feb 20 |
nicklas |
236 |
alignFolder = "/debug" + alignFolder; |
5826 |
18 Feb 20 |
nicklas |
237 |
} |
5826 |
18 Feb 20 |
nicklas |
238 |
Annotationtype.DATA_FILES_FOLDER.setAnnotationValue(dc, aligned, alignFolder); |
5826 |
18 Feb 20 |
nicklas |
239 |
if (autoConfirm) |
5826 |
18 Feb 20 |
nicklas |
240 |
{ |
5826 |
18 Feb 20 |
nicklas |
241 |
Annotationtype.AUTO_PROCESSING.setAnnotationValue(dc, aligned, "AutoConfirm"); |
5826 |
18 Feb 20 |
nicklas |
242 |
} |
5826 |
18 Feb 20 |
nicklas |
243 |
|
5826 |
18 Feb 20 |
nicklas |
// Checks to make sure no bad things are included in script file |
5826 |
18 Feb 20 |
nicklas |
245 |
ScriptUtil.checkValidPath(alignFolder, true, true); |
5826 |
18 Feb 20 |
nicklas |
246 |
String mergedName = ScriptUtil.checkValidScriptParameter(merged.getName()); |
5826 |
18 Feb 20 |
nicklas |
247 |
String libName = ScriptUtil.checkValidScriptParameter(lib.getName()); |
5826 |
18 Feb 20 |
nicklas |
// Remove all after the first dot not followed by a number |
5826 |
18 Feb 20 |
nicklas |
249 |
String sampleName = libName.replaceFirst("\\.(?!\\d).*", ""); |
5826 |
18 Feb 20 |
nicklas |
250 |
if (specimen != null && specimen.getExternalId() != null) |
5826 |
18 Feb 20 |
nicklas |
251 |
{ |
5826 |
18 Feb 20 |
nicklas |
// Replace SCANB-ID in libName/mergedName/sampleName with Sample.externalId |
5826 |
18 Feb 20 |
nicklas |
253 |
sampleName = ScriptUtil.checkValidScriptParameter(specimen.getExternalId()); |
5826 |
18 Feb 20 |
nicklas |
254 |
libName = libName.replace(specimen.getName(), specimen.getExternalId()); |
5826 |
18 Feb 20 |
nicklas |
255 |
mergedName = mergedName.replace(specimen.getName(), specimen.getExternalId()); |
5826 |
18 Feb 20 |
nicklas |
256 |
} |
5826 |
18 Feb 20 |
nicklas |
257 |
|
5826 |
18 Feb 20 |
nicklas |
258 |
ScriptBuilder script = new ScriptBuilder(); |
6665 |
05 Apr 22 |
nicklas |
259 |
script.cmd(debug ? "set -ex" : "set -e"); |
5826 |
18 Feb 20 |
nicklas |
260 |
script.comment("Setting up scripting environment and copying script to tmp folder"); |
6693 |
22 Apr 22 |
nicklas |
261 |
script.cmd(global_env); |
5826 |
18 Feb 20 |
nicklas |
262 |
script.cmd("ScriptDir=" + pipeline_scripts_path); |
5828 |
19 Feb 20 |
nicklas |
263 |
script.cmd("PicardDir="+picard_path); |
5827 |
19 Feb 20 |
nicklas |
264 |
script.cmd("JAVA="+java_path); |
5827 |
19 Feb 20 |
nicklas |
265 |
script.cmd("TrimmomaticJAR="+trimmomatic_path); |
5829 |
19 Feb 20 |
nicklas |
266 |
script.cmd("FgBioJAR="+fgbio_path); |
5830 |
21 Feb 20 |
nicklas |
267 |
script.cmd("NovoAlign="+novoalign_path); |
5830 |
21 Feb 20 |
nicklas |
268 |
script.cmd("NovoSort="+novosort_path); |
5831 |
21 Feb 20 |
nicklas |
269 |
script.cmd("samtools="+samtools_path); |
5830 |
21 Feb 20 |
nicklas |
270 |
script.cmd("AmpliconsDir=" + ampliconsRoot); |
5830 |
21 Feb 20 |
nicklas |
271 |
script.cmd("AmpliconsBed=${AmpliconsDir}/"+amplicons_bed); |
5830 |
21 Feb 20 |
nicklas |
272 |
script.cmd("NovoIndex="+novoalign_index); |
5831 |
21 Feb 20 |
nicklas |
273 |
script.cmd("GenomeDict="+genome_dict); |
5831 |
21 Feb 20 |
nicklas |
274 |
script.cmd("GenomeFasta="+genome_fasta); |
5826 |
18 Feb 20 |
nicklas |
275 |
script.cmd("ArchiveRoot="+archiveRoot); |
5826 |
18 Feb 20 |
nicklas |
276 |
script.cmd("FastqFolder=${ArchiveRoot}"+fastQFolder); |
5833 |
24 Feb 20 |
nicklas |
277 |
script.cmd("FastqPrefix=( "); |
5833 |
24 Feb 20 |
nicklas |
278 |
script.cmd(" " + Values.getString(fastqPrefix, "\n ", true)); |
5833 |
24 Feb 20 |
nicklas |
279 |
script.cmd(")"); |
5841 |
25 Feb 20 |
nicklas |
280 |
script.cmd("AlignFolder=${ArchiveRoot}"+alignFolder); |
5833 |
24 Feb 20 |
nicklas |
281 |
script.cmd("TrimOptionsAdapter=\""+align_trimmomaticOptions1+"\""); |
5833 |
24 Feb 20 |
nicklas |
282 |
script.cmd("TrimOptionsQual=\""+align_trimmomaticOptions2+"\""); |
5837 |
24 Feb 20 |
nicklas |
283 |
script.cmd("NovoAlignOptions=\""+novoalign_options+"\""); |
5841 |
25 Feb 20 |
nicklas |
284 |
script.cmd("MarkDuplicatesOptions=\""+mark_duplicates_options+"\""); |
5841 |
25 Feb 20 |
nicklas |
285 |
script.cmd("PcrMetricsOptions=\""+pcr_metrics_options+"\""); |
5827 |
19 Feb 20 |
nicklas |
286 |
|
5826 |
18 Feb 20 |
nicklas |
287 |
script.newLine(); |
5826 |
18 Feb 20 |
nicklas |
288 |
|
5826 |
18 Feb 20 |
nicklas |
289 |
script.comment("Use 1 thread/core but not more than slots assigned by the queue system"); |
5826 |
18 Feb 20 |
nicklas |
290 |
script.cmd("NumThreads=`nproc`"); |
5826 |
18 Feb 20 |
nicklas |
291 |
script.cmd("NumThreads=$(( ${NSLOTS} < ${NumThreads} ? ${NSLOTS} : ${NumThreads} ))"); |
5826 |
18 Feb 20 |
nicklas |
292 |
script.newLine(); |
5826 |
18 Feb 20 |
nicklas |
293 |
|
5826 |
18 Feb 20 |
nicklas |
// Set file permissions based on consent or external group! |
5826 |
18 Feb 20 |
nicklas |
295 |
String externalGroup = isExternal ? Reggie.getExternalGroup(merged.getName()) : null; |
5826 |
18 Feb 20 |
nicklas |
296 |
ScriptUtil.setUmaskForItem(dc, lib, externalGroup, script); |
5826 |
18 Feb 20 |
nicklas |
297 |
|
5826 |
18 Feb 20 |
nicklas |
298 |
script.comment("Move to the temporary working directory and copy the pipeline scripts"); |
5826 |
18 Feb 20 |
nicklas |
299 |
script.cmd("cd ${TMPDIR}"); |
5827 |
19 Feb 20 |
nicklas |
300 |
script.cmd("mkdir fastq"); |
5833 |
24 Feb 20 |
nicklas |
301 |
script.cmd("mkdir tmp"); |
5833 |
24 Feb 20 |
nicklas |
302 |
script.cmd("mkdir out"); |
5826 |
18 Feb 20 |
nicklas |
303 |
script.cmd("cp ${ScriptDir}/stdwrap.sh ."); |
5830 |
21 Feb 20 |
nicklas |
304 |
script.cmd("cp ${ScriptDir}/stderrwrap.sh ."); |
5828 |
19 Feb 20 |
nicklas |
305 |
script.cmd("cp ${ScriptDir}/picard2 ."); |
5833 |
24 Feb 20 |
nicklas |
306 |
script.cmd("cp ${ScriptDir}/mips_functions.sh ."); |
5826 |
18 Feb 20 |
nicklas |
307 |
script.newLine(); |
5826 |
18 Feb 20 |
nicklas |
308 |
|
5833 |
24 Feb 20 |
nicklas |
309 |
script.comment("Load script with helper functions"); |
5833 |
24 Feb 20 |
nicklas |
310 |
script.cmd(". ./mips_functions.sh"); |
5827 |
19 Feb 20 |
nicklas |
311 |
script.newLine(); |
5827 |
19 Feb 20 |
nicklas |
312 |
|
5828 |
19 Feb 20 |
nicklas |
313 |
script.comment("Copy FASTQ and .rg files to tmp folder"); |
5826 |
18 Feb 20 |
nicklas |
314 |
script.progress(10, "Copying FASTQ files"); |
5833 |
24 Feb 20 |
nicklas |
315 |
script.cmd("copy_fastq_and_rg \"${FastqFolder}\" \"fastq\""); |
5826 |
18 Feb 20 |
nicklas |
316 |
script.newLine(); |
5833 |
24 Feb 20 |
nicklas |
317 |
|
5827 |
19 Feb 20 |
nicklas |
318 |
script.comment("Run Trimmomatic"); |
5833 |
24 Feb 20 |
nicklas |
319 |
script.progress(20, "Running Trimmomatic"); |
5833 |
24 Feb 20 |
nicklas |
320 |
script.comment("Create adapter.fa for use with trimmomatic"); |
5833 |
24 Feb 20 |
nicklas |
321 |
script.cmd("create_adapter_fa \"adapter.fa\""); |
5833 |
24 Feb 20 |
nicklas |
322 |
script.newLine(); |
5833 |
24 Feb 20 |
nicklas |
323 |
|
5833 |
24 Feb 20 |
nicklas |
324 |
script.cmd("n=0"); |
5833 |
24 Feb 20 |
nicklas |
325 |
script.cmd("nAfterTrim=0"); |
5827 |
19 Feb 20 |
nicklas |
326 |
script.cmd("for prefix in ${FastqPrefix[@]} ; do"); |
5833 |
24 Feb 20 |
nicklas |
327 |
script.cmd(" n=$((n + 1))"); |
5833 |
24 Feb 20 |
nicklas |
328 |
script.cmd(" run_trimmomatic \"${prefix}\" $n"); |
5827 |
19 Feb 20 |
nicklas |
329 |
script.cmd("done"); |
5835 |
24 Feb 20 |
nicklas |
330 |
script.newLine(); |
5833 |
24 Feb 20 |
nicklas |
331 |
script.cmd("trimlog_stats \"tmp/*.adaptrim.log\" \"out/adaptrim.hist.txt\""); |
5833 |
24 Feb 20 |
nicklas |
script.cmd("trimlog_stats \"tmp/*.qualtrim.log\" \"out/qualtrim.hist.txt\""); |
5835 |
24 Feb 20 |
nicklas |
script.newLine(); |
5833 |
24 Feb 20 |
nicklas |
script.cmd("if [ ${nAfterTrim} -eq 0 ]; then"); |
5833 |
24 Feb 20 |
nicklas |
script.cmd(" echo \"No paired end reads surviving adapter and quality trimming for any fastq prefix\" 1>&2"); |
5833 |
24 Feb 20 |
nicklas |
script.cmd("exit 1"); |
5833 |
24 Feb 20 |
nicklas |
script.cmd("fi"); |
5835 |
24 Feb 20 |
nicklas |
script.newLine(); |
5827 |
19 Feb 20 |
nicklas |
339 |
|
5835 |
24 Feb 20 |
nicklas |
script.comment("Run Picard FastqToSam and annotate the BAM files with UMI"); |
5835 |
24 Feb 20 |
nicklas |
script.progress(30, "Converting FASTQ to BAM and annotating with UMI"); |
5835 |
24 Feb 20 |
nicklas |
script.cmd("n=0"); |
5828 |
19 Feb 20 |
nicklas |
script.cmd("for prefix in ${FastqPrefix[@]} ; do"); |
5835 |
24 Feb 20 |
nicklas |
script.cmd(" n=$((n + 1))"); |
5835 |
24 Feb 20 |
nicklas |
script.cmd(" fastq_to_bam \"${prefix}\" $n"); |
5835 |
24 Feb 20 |
nicklas |
script.cmd(" annotate_umi \"${prefix}\" $n"); |
5828 |
19 Feb 20 |
nicklas |
script.cmd("done"); |
5830 |
21 Feb 20 |
nicklas |
script.newLine(); |
5835 |
24 Feb 20 |
nicklas |
349 |
|
5837 |
24 Feb 20 |
nicklas |
script.comment("Run novoalign"); |
5837 |
24 Feb 20 |
nicklas |
script.progress(40, "Aligning with novoalign"); |
5841 |
25 Feb 20 |
nicklas |
script.cmd("get_min_max_insert"); |
5837 |
24 Feb 20 |
nicklas |
script.cmd("n=0"); |
5830 |
21 Feb 20 |
nicklas |
script.cmd("AlignedBams=''"); |
5830 |
21 Feb 20 |
nicklas |
script.cmd("for prefix in ${FastqPrefix[@]} ; do"); |
5837 |
24 Feb 20 |
nicklas |
script.cmd(" n=$((n + 1))"); |
5837 |
24 Feb 20 |
nicklas |
script.cmd(" novoalign \"${prefix}\" $n"); |
5830 |
21 Feb 20 |
nicklas |
script.cmd("done"); |
5830 |
21 Feb 20 |
nicklas |
script.newLine(); |
5830 |
21 Feb 20 |
nicklas |
script.comment("Merge BAM files"); |
5837 |
24 Feb 20 |
nicklas |
script.cmd("merge_bam"); |
5831 |
21 Feb 20 |
nicklas |
script.newLine(); |
5839 |
25 Feb 20 |
nicklas |
363 |
|
5841 |
25 Feb 20 |
nicklas |
script.comment("Re-header and split BAM file"); |
5839 |
25 Feb 20 |
nicklas |
script.progress(70, "Post-processing aligned BAM file"); |
5839 |
25 Feb 20 |
nicklas |
script.cmd("reheader_bam"); |
5839 |
25 Feb 20 |
nicklas |
script.cmd("split_bam"); |
5831 |
21 Feb 20 |
nicklas |
script.newLine(); |
5831 |
21 Feb 20 |
nicklas |
369 |
|
5839 |
25 Feb 20 |
nicklas |
script.comment("Mark duplicates"); |
5839 |
25 Feb 20 |
nicklas |
script.progress(80, "Marking duplicates"); |
5839 |
25 Feb 20 |
nicklas |
script.cmd("mark_duplicates \"concordant\""); |
5839 |
25 Feb 20 |
nicklas |
script.cmd("mark_duplicates \"discordant\""); |
5839 |
25 Feb 20 |
nicklas |
script.newLine(); |
5839 |
25 Feb 20 |
nicklas |
375 |
|
5841 |
25 Feb 20 |
nicklas |
script.comment("Collect metrics"); |
5841 |
25 Feb 20 |
nicklas |
script.progress(90, "Collecting PCR and alignment metrics"); |
5841 |
25 Feb 20 |
nicklas |
script.cmd("collect_pcr_metrics"); |
5841 |
25 Feb 20 |
nicklas |
script.cmd("collect_alignment_metrics"); |
5841 |
25 Feb 20 |
nicklas |
script.newLine(); |
5841 |
25 Feb 20 |
nicklas |
381 |
|
5826 |
18 Feb 20 |
nicklas |
script.progress(95, "Copying result files to project archive"); |
5841 |
25 Feb 20 |
nicklas |
script.cmd("mkdir -p ${AlignFolder}"); |
5841 |
25 Feb 20 |
nicklas |
script.cmd("rm -rf ${AlignFolder}/*"); |
5841 |
25 Feb 20 |
nicklas |
script.cmd("cp out/* ${AlignFolder}"); |
5843 |
25 Feb 20 |
nicklas |
script.cmd("cp out/concordant.*_metrics.txt ${WD}"); |
5826 |
18 Feb 20 |
nicklas |
if (externalGroup != null) |
5826 |
18 Feb 20 |
nicklas |
388 |
{ |
5930 |
06 May 20 |
nicklas |
ScriptUtil.addChgrp(externalGroup, "${AlignFolder}", alignedName, null, script); |
5826 |
18 Feb 20 |
nicklas |
390 |
} |
5841 |
25 Feb 20 |
nicklas |
script.cmd("ls -1 ${AlignFolder}/* >> ${WD}/files.out"); |
5826 |
18 Feb 20 |
nicklas |
script.newLine(); |
5826 |
18 Feb 20 |
nicklas |
393 |
|
5826 |
18 Feb 20 |
nicklas |
script.progress(99, "Cleaning up temporary folders"); |
5826 |
18 Feb 20 |
nicklas |
395 |
|
6674 |
11 Apr 22 |
nicklas |
JobDefinition jobDef = new JobDefinition("MipsAlign", jobConfig, batchConfig, alignJob); |
5826 |
18 Feb 20 |
nicklas |
jobDef.setDebug(debug); |
5826 |
18 Feb 20 |
nicklas |
jobDef.setCmd(script.toString()); |
5826 |
18 Feb 20 |
nicklas |
jobDefs.add(jobDef); |
5826 |
18 Feb 20 |
nicklas |
400 |
} |
5826 |
18 Feb 20 |
nicklas |
401 |
|
5826 |
18 Feb 20 |
nicklas |
return jobDefs; |
5826 |
18 Feb 20 |
nicklas |
403 |
} |
5826 |
18 Feb 20 |
nicklas |
404 |
|
5826 |
18 Feb 20 |
nicklas |
405 |
/** |
5826 |
18 Feb 20 |
nicklas |
Job completion handler for mask/align jobs. The handler downloads the |
5826 |
18 Feb 20 |
nicklas |
'filter.out' file from the job folder and parses out number of |
5826 |
18 Feb 20 |
nicklas |
reads remaining. The information is stored as {@link Annotationtype#READS} |
5826 |
18 Feb 20 |
nicklas |
annotation on FilteredSequences items. |
5826 |
18 Feb 20 |
nicklas |
410 |
*/ |
5826 |
18 Feb 20 |
nicklas |
411 |
public static class AlignJobCompletionHandler |
5826 |
18 Feb 20 |
nicklas |
412 |
implements JobCompletionHandler |
5826 |
18 Feb 20 |
nicklas |
413 |
{ |
5826 |
18 Feb 20 |
nicklas |
414 |
|
5826 |
18 Feb 20 |
nicklas |
415 |
public AlignJobCompletionHandler() |
5826 |
18 Feb 20 |
nicklas |
416 |
{} |
5826 |
18 Feb 20 |
nicklas |
417 |
|
5826 |
18 Feb 20 |
nicklas |
418 |
@Override |
5826 |
18 Feb 20 |
nicklas |
419 |
public String jobCompleted(SessionControl sc, OpenGridSession session, Job job, JobStatus status) |
5826 |
18 Feb 20 |
nicklas |
420 |
{ |
5841 |
25 Feb 20 |
nicklas |
421 |
String jobName = status.getName(); |
5841 |
25 Feb 20 |
nicklas |
422 |
String files = session.getJobFileAsString(jobName, "files.out", "UTF-8"); |
5843 |
25 Feb 20 |
nicklas |
423 |
String alignmentMetrics = session.getJobFileAsString(jobName, "concordant.alignment_summary_metrics.txt", "UTF-8"); |
5843 |
25 Feb 20 |
nicklas |
424 |
String duplicateMetrics = session.getJobFileAsString(jobName, "concordant.dedup_metrics.txt", "UTF-8"); |
5841 |
25 Feb 20 |
nicklas |
425 |
|
5843 |
25 Feb 20 |
nicklas |
426 |
Metrics metrics = parseAlignedOut(sc, job, files, alignmentMetrics, duplicateMetrics); |
5843 |
25 Feb 20 |
nicklas |
427 |
String msg = Reggie.formatCount(metrics.alignedReadPairs) + " reads after alignment; "; |
5843 |
25 Feb 20 |
nicklas |
428 |
msg += Values.formatNumber(metrics.fractionDuplication * 100, 1) + "% duplicates; "; |
5843 |
25 Feb 20 |
nicklas |
429 |
msg += Reggie.formatCount(metrics.alignedBases) + " aligned bases"; |
5843 |
25 Feb 20 |
nicklas |
430 |
return msg; |
5826 |
18 Feb 20 |
nicklas |
431 |
} |
5826 |
18 Feb 20 |
nicklas |
432 |
|
5843 |
25 Feb 20 |
nicklas |
433 |
private Metrics parseAlignedOut(SessionControl sc, Job job, String filesOut, String alignmentMetrics, String duplicateMetrics) |
5841 |
25 Feb 20 |
nicklas |
434 |
{ |
5841 |
25 Feb 20 |
nicklas |
435 |
Metrics metrics = new Metrics(); |
5843 |
25 Feb 20 |
nicklas |
436 |
|
5843 |
25 Feb 20 |
nicklas |
437 |
int categoryIndex = -1; |
5843 |
25 Feb 20 |
nicklas |
438 |
int alignedPairsIndex = -1; |
5843 |
25 Feb 20 |
nicklas |
439 |
int alignedBasesIndex = -1; |
5843 |
25 Feb 20 |
nicklas |
440 |
for (String line : alignmentMetrics.split("\n")) |
5843 |
25 Feb 20 |
nicklas |
441 |
{ |
5843 |
25 Feb 20 |
nicklas |
442 |
String[] cols = line.split("\t"); |
5843 |
25 Feb 20 |
nicklas |
443 |
if (cols.length >= 20) |
5843 |
25 Feb 20 |
nicklas |
444 |
{ |
5843 |
25 Feb 20 |
nicklas |
445 |
if (categoryIndex == -1) |
5843 |
25 Feb 20 |
nicklas |
446 |
{ |
5843 |
25 Feb 20 |
nicklas |
447 |
List<String> colsA = Arrays.asList(cols); |
5843 |
25 Feb 20 |
nicklas |
448 |
categoryIndex = colsA.indexOf("CATEGORY"); |
5843 |
25 Feb 20 |
nicklas |
449 |
alignedPairsIndex = colsA.indexOf("READS_ALIGNED_IN_PAIRS"); |
5843 |
25 Feb 20 |
nicklas |
450 |
alignedBasesIndex = colsA.indexOf("PF_ALIGNED_BASES"); |
5843 |
25 Feb 20 |
nicklas |
451 |
} |
5843 |
25 Feb 20 |
nicklas |
452 |
else if ("PAIR".equals(cols[categoryIndex])) |
5843 |
25 Feb 20 |
nicklas |
453 |
{ |
5843 |
25 Feb 20 |
nicklas |
454 |
Long tmp = Values.getLong(cols[alignedPairsIndex], null); |
5843 |
25 Feb 20 |
nicklas |
455 |
if (tmp != null) metrics.alignedReadPairs = tmp / 2; |
5843 |
25 Feb 20 |
nicklas |
456 |
metrics.alignedBases = Values.getLong(cols[alignedBasesIndex], null); |
5843 |
25 Feb 20 |
nicklas |
457 |
break; |
5843 |
25 Feb 20 |
nicklas |
458 |
} |
5843 |
25 Feb 20 |
nicklas |
459 |
} |
5843 |
25 Feb 20 |
nicklas |
460 |
} |
5843 |
25 Feb 20 |
nicklas |
461 |
|
5841 |
25 Feb 20 |
nicklas |
462 |
|
5843 |
25 Feb 20 |
nicklas |
463 |
int readPairsExaminedIndex = -1; |
5843 |
25 Feb 20 |
nicklas |
464 |
int readPairDuplicatesIndex = -1; |
5843 |
25 Feb 20 |
nicklas |
465 |
int percentDuplicationIndex = -1; |
5843 |
25 Feb 20 |
nicklas |
466 |
for (String line : duplicateMetrics.split("\n")) |
5843 |
25 Feb 20 |
nicklas |
467 |
{ |
5843 |
25 Feb 20 |
nicklas |
468 |
String[] cols = line.split("\t"); |
5843 |
25 Feb 20 |
nicklas |
469 |
if (cols.length >= 9) |
5843 |
25 Feb 20 |
nicklas |
470 |
{ |
5843 |
25 Feb 20 |
nicklas |
471 |
if (readPairsExaminedIndex == -1) |
5843 |
25 Feb 20 |
nicklas |
472 |
{ |
5843 |
25 Feb 20 |
nicklas |
473 |
List<String> colsA = Arrays.asList(cols); |
5843 |
25 Feb 20 |
nicklas |
474 |
readPairsExaminedIndex = colsA.indexOf("READ_PAIRS_EXAMINED"); |
5843 |
25 Feb 20 |
nicklas |
475 |
readPairDuplicatesIndex = colsA.indexOf("READ_PAIR_DUPLICATES"); |
5843 |
25 Feb 20 |
nicklas |
476 |
percentDuplicationIndex = colsA.indexOf("PERCENT_DUPLICATION"); |
5843 |
25 Feb 20 |
nicklas |
477 |
} |
5843 |
25 Feb 20 |
nicklas |
478 |
else |
5843 |
25 Feb 20 |
nicklas |
479 |
{ |
5843 |
25 Feb 20 |
nicklas |
480 |
metrics.readPairsExamined = Values.getLong(cols[readPairsExaminedIndex], null); |
5843 |
25 Feb 20 |
nicklas |
481 |
metrics.readPairDuplicates = Values.getLong(cols[readPairDuplicatesIndex], null); |
5843 |
25 Feb 20 |
nicklas |
482 |
metrics.fractionDuplication = Values.getFloat(cols[percentDuplicationIndex], null); |
5843 |
25 Feb 20 |
nicklas |
483 |
break; |
5843 |
25 Feb 20 |
nicklas |
484 |
} |
5843 |
25 Feb 20 |
nicklas |
485 |
} |
5843 |
25 Feb 20 |
nicklas |
486 |
} |
5843 |
25 Feb 20 |
nicklas |
487 |
|
5841 |
25 Feb 20 |
nicklas |
488 |
DbControl dc = null; |
5841 |
25 Feb 20 |
nicklas |
489 |
try |
5841 |
25 Feb 20 |
nicklas |
490 |
{ |
6599 |
22 Feb 22 |
nicklas |
491 |
dc = sc.newDbControl("Reggie: MIPS alignment completed handler"); |
5841 |
25 Feb 20 |
nicklas |
492 |
|
5841 |
25 Feb 20 |
nicklas |
493 |
AlignedSequences alignedSequences = AlignedSequences.getByJob(dc, job); |
5841 |
25 Feb 20 |
nicklas |
494 |
DerivedBioAssay aligned = alignedSequences.getItem(); |
5841 |
25 Feb 20 |
nicklas |
495 |
|
5843 |
25 Feb 20 |
nicklas |
496 |
Annotationtype.ALIGNED_PAIRS.setAnnotationValue(dc, aligned, metrics.alignedReadPairs); |
5843 |
25 Feb 20 |
nicklas |
497 |
Annotationtype.ALIGNED_BASES.setAnnotationValue(dc, aligned, metrics.alignedBases); |
5843 |
25 Feb 20 |
nicklas |
498 |
Annotationtype.READ_PAIRS_EXAMINED.setAnnotationValue(dc, aligned, metrics.readPairsExamined); |
5843 |
25 Feb 20 |
nicklas |
499 |
Annotationtype.READ_PAIR_DUPLICATES.setAnnotationValue(dc, aligned, metrics.readPairDuplicates); |
5843 |
25 Feb 20 |
nicklas |
500 |
Annotationtype.FRACTION_DUPLICATION.setAnnotationValue(dc, aligned, metrics.fractionDuplication); |
5843 |
25 Feb 20 |
nicklas |
501 |
|
5841 |
25 Feb 20 |
nicklas |
// Create file links |
5841 |
25 Feb 20 |
nicklas |
503 |
boolean useExternalProjectArchive = Reggie.isExternalItem(aligned.getName()); |
5841 |
25 Feb 20 |
nicklas |
504 |
FileServer fileArchive = useExternalProjectArchive ? Fileserver.EXTERNAL_ARCHIVE.load(dc) : Fileserver.PROJECT_ARCHIVE.load(dc); |
5841 |
25 Feb 20 |
nicklas |
505 |
String analysisDir = useExternalProjectArchive ? Reggie.EXTERNAL_ANALYSIS_DIR : Reggie.SECONDARY_ANALYSIS_DIR; |
5841 |
25 Feb 20 |
nicklas |
506 |
|
5841 |
25 Feb 20 |
nicklas |
507 |
String dataFilesFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, aligned); |
5841 |
25 Feb 20 |
nicklas |
508 |
String baseFolder = Reggie.convertDataFilesFolderToBaseFolder(dataFilesFolder); |
5841 |
25 Feb 20 |
nicklas |
509 |
Directory localDataDir = Directory.getNew(dc, new Path(analysisDir+baseFolder, Path.Type.DIRECTORY)); |
5841 |
25 Feb 20 |
nicklas |
510 |
DataFileType bamData = Datafiletype.BAM.load(dc); |
5841 |
25 Feb 20 |
nicklas |
511 |
ItemSubtype bamType = bamData.getGenericType(); |
5841 |
25 Feb 20 |
nicklas |
512 |
|
5841 |
25 Feb 20 |
nicklas |
513 |
int lineNo = 0; |
5841 |
25 Feb 20 |
nicklas |
514 |
for (String line : filesOut.split("\n")) |
5841 |
25 Feb 20 |
nicklas |
515 |
{ |
5841 |
25 Feb 20 |
nicklas |
516 |
lineNo++; |
5841 |
25 Feb 20 |
nicklas |
517 |
|
5841 |
25 Feb 20 |
nicklas |
518 |
File f = File.getFile(dc, localDataDir, line.substring(line.lastIndexOf("/")+1), true); |
5841 |
25 Feb 20 |
nicklas |
519 |
f.setFileServer(fileArchive); |
5841 |
25 Feb 20 |
nicklas |
520 |
String fileUrl = "sftp://" + fileArchive.getHost() + dataFilesFolder + "/" + f.getName(); |
5841 |
25 Feb 20 |
nicklas |
521 |
try |
5841 |
25 Feb 20 |
nicklas |
522 |
{ |
5841 |
25 Feb 20 |
nicklas |
523 |
f.setUrl(fileUrl, true); |
5841 |
25 Feb 20 |
nicklas |
524 |
} |
5841 |
25 Feb 20 |
nicklas |
525 |
catch (RuntimeException ex) |
5841 |
25 Feb 20 |
nicklas |
526 |
{ |
5841 |
25 Feb 20 |
nicklas |
527 |
f.setUrl(fileUrl, false); |
5841 |
25 Feb 20 |
nicklas |
528 |
} |
5841 |
25 Feb 20 |
nicklas |
529 |
|
5841 |
25 Feb 20 |
nicklas |
530 |
if (!f.isInDatabase()) |
5841 |
25 Feb 20 |
nicklas |
531 |
{ |
5841 |
25 Feb 20 |
nicklas |
532 |
dc.saveItem(f); |
5841 |
25 Feb 20 |
nicklas |
533 |
} |
5841 |
25 Feb 20 |
nicklas |
534 |
if (f.getName().equals("concordant.bam")) |
5841 |
25 Feb 20 |
nicklas |
535 |
{ |
5843 |
25 Feb 20 |
nicklas |
536 |
f.setDescription(metrics.alignedReadPairs + " ALIGNED PAIRS; " + metrics.alignedBases + " ALIGNED BASES"); |
5841 |
25 Feb 20 |
nicklas |
537 |
f.setItemSubtype(bamType); |
5841 |
25 Feb 20 |
nicklas |
538 |
FileSetMember member = aligned.getFileSet().addMember(f, bamData); |
5841 |
25 Feb 20 |
nicklas |
539 |
} |
5841 |
25 Feb 20 |
nicklas |
540 |
else |
5841 |
25 Feb 20 |
nicklas |
541 |
{ |
5841 |
25 Feb 20 |
nicklas |
542 |
AnyToAny link = AnyToAny.getNewOrExisting(dc, aligned, f.getName(), f, true); |
5841 |
25 Feb 20 |
nicklas |
543 |
if (!link.isInDatabase()) dc.saveItem(link); |
5841 |
25 Feb 20 |
nicklas |
544 |
} |
5841 |
25 Feb 20 |
nicklas |
545 |
} |
5841 |
25 Feb 20 |
nicklas |
546 |
|
5841 |
25 Feb 20 |
nicklas |
547 |
dc.commit(); |
5841 |
25 Feb 20 |
nicklas |
548 |
} |
5841 |
25 Feb 20 |
nicklas |
549 |
finally |
5841 |
25 Feb 20 |
nicklas |
550 |
{ |
5841 |
25 Feb 20 |
nicklas |
551 |
if (dc != null) dc.close(); |
5841 |
25 Feb 20 |
nicklas |
552 |
} |
5841 |
25 Feb 20 |
nicklas |
553 |
|
5841 |
25 Feb 20 |
nicklas |
554 |
return metrics; |
5841 |
25 Feb 20 |
nicklas |
555 |
} |
5841 |
25 Feb 20 |
nicklas |
556 |
|
5826 |
18 Feb 20 |
nicklas |
557 |
} |
5826 |
18 Feb 20 |
nicklas |
558 |
|
5841 |
25 Feb 20 |
nicklas |
559 |
static class Metrics |
5841 |
25 Feb 20 |
nicklas |
560 |
{ |
5843 |
25 Feb 20 |
nicklas |
561 |
Long alignedReadPairs = null; |
5843 |
25 Feb 20 |
nicklas |
562 |
Long alignedBases = null; |
5843 |
25 Feb 20 |
nicklas |
563 |
Long readPairsExamined = null; |
5843 |
25 Feb 20 |
nicklas |
564 |
Long readPairDuplicates = null; |
5843 |
25 Feb 20 |
nicklas |
565 |
Float fractionDuplication = null; |
5841 |
25 Feb 20 |
nicklas |
566 |
} |
5841 |
25 Feb 20 |
nicklas |
567 |
|
5826 |
18 Feb 20 |
nicklas |
568 |
} |