5486 |
12 Jun 19 |
nicklas |
1 |
package net.sf.basedb.reggie.grid; |
5486 |
12 Jun 19 |
nicklas |
2 |
|
5490 |
13 Jun 19 |
nicklas |
3 |
import java.io.ByteArrayInputStream; |
5490 |
13 Jun 19 |
nicklas |
4 |
import java.io.IOException; |
5490 |
13 Jun 19 |
nicklas |
5 |
import java.nio.charset.Charset; |
5487 |
12 Jun 19 |
nicklas |
6 |
import java.util.ArrayList; |
5487 |
12 Jun 19 |
nicklas |
7 |
import java.util.Arrays; |
5587 |
30 Aug 19 |
nicklas |
8 |
import java.util.Date; |
5490 |
13 Jun 19 |
nicklas |
9 |
import java.util.HashMap; |
5487 |
12 Jun 19 |
nicklas |
10 |
import java.util.HashSet; |
5486 |
12 Jun 19 |
nicklas |
11 |
import java.util.List; |
5487 |
12 Jun 19 |
nicklas |
12 |
import java.util.Map; |
5487 |
12 Jun 19 |
nicklas |
13 |
import java.util.Set; |
5487 |
12 Jun 19 |
nicklas |
14 |
import java.util.TreeMap; |
5490 |
13 Jun 19 |
nicklas |
15 |
import java.util.TreeSet; |
5490 |
13 Jun 19 |
nicklas |
16 |
import java.util.regex.Matcher; |
5490 |
13 Jun 19 |
nicklas |
17 |
import java.util.regex.Pattern; |
5486 |
12 Jun 19 |
nicklas |
18 |
|
5486 |
12 Jun 19 |
nicklas |
19 |
import org.json.simple.JSONArray; |
7079 |
27 Mar 23 |
nicklas |
20 |
import org.slf4j.LoggerFactory; |
5486 |
12 Jun 19 |
nicklas |
21 |
|
5490 |
13 Jun 19 |
nicklas |
22 |
import net.sf.basedb.core.AnyToAny; |
5490 |
13 Jun 19 |
nicklas |
23 |
import net.sf.basedb.core.DataFileType; |
5486 |
12 Jun 19 |
nicklas |
24 |
import net.sf.basedb.core.DbControl; |
5487 |
12 Jun 19 |
nicklas |
25 |
import net.sf.basedb.core.DerivedBioAssay; |
5490 |
13 Jun 19 |
nicklas |
26 |
import net.sf.basedb.core.Directory; |
5487 |
12 Jun 19 |
nicklas |
27 |
import net.sf.basedb.core.Extract; |
5490 |
13 Jun 19 |
nicklas |
28 |
import net.sf.basedb.core.File; |
5490 |
13 Jun 19 |
nicklas |
29 |
import net.sf.basedb.core.FileServer; |
5490 |
13 Jun 19 |
nicklas |
30 |
import net.sf.basedb.core.FileSetMember; |
5487 |
12 Jun 19 |
nicklas |
31 |
import net.sf.basedb.core.ItemNotFoundException; |
5584 |
26 Aug 19 |
nicklas |
32 |
import net.sf.basedb.core.ItemQuery; |
5487 |
12 Jun 19 |
nicklas |
33 |
import net.sf.basedb.core.ItemSubtype; |
5486 |
12 Jun 19 |
nicklas |
34 |
import net.sf.basedb.core.Job; |
5490 |
13 Jun 19 |
nicklas |
35 |
import net.sf.basedb.core.Path; |
5487 |
12 Jun 19 |
nicklas |
36 |
import net.sf.basedb.core.PhysicalBioAssay; |
5487 |
12 Jun 19 |
nicklas |
37 |
import net.sf.basedb.core.SessionControl; |
5490 |
13 Jun 19 |
nicklas |
38 |
import net.sf.basedb.core.Software; |
5487 |
12 Jun 19 |
nicklas |
39 |
import net.sf.basedb.core.StringParameterType; |
5487 |
12 Jun 19 |
nicklas |
40 |
import net.sf.basedb.core.Tag; |
5487 |
12 Jun 19 |
nicklas |
41 |
import net.sf.basedb.opengrid.CmdResult; |
5487 |
12 Jun 19 |
nicklas |
42 |
import net.sf.basedb.opengrid.JobDefinition; |
5487 |
12 Jun 19 |
nicklas |
43 |
import net.sf.basedb.opengrid.JobStatus; |
5487 |
12 Jun 19 |
nicklas |
44 |
import net.sf.basedb.opengrid.OpenGrid; |
5486 |
12 Jun 19 |
nicklas |
45 |
import net.sf.basedb.opengrid.OpenGridCluster; |
5487 |
12 Jun 19 |
nicklas |
46 |
import net.sf.basedb.opengrid.OpenGridSession; |
5487 |
12 Jun 19 |
nicklas |
47 |
import net.sf.basedb.opengrid.ScriptBuilder; |
5487 |
12 Jun 19 |
nicklas |
48 |
import net.sf.basedb.opengrid.config.ClusterConfig; |
5487 |
12 Jun 19 |
nicklas |
49 |
import net.sf.basedb.opengrid.config.JobConfig; |
5490 |
13 Jun 19 |
nicklas |
50 |
import net.sf.basedb.opengrid.service.JobCompletionHandler; |
5487 |
12 Jun 19 |
nicklas |
51 |
import net.sf.basedb.reggie.Reggie; |
5487 |
12 Jun 19 |
nicklas |
52 |
import net.sf.basedb.reggie.XmlConfig; |
5487 |
12 Jun 19 |
nicklas |
53 |
import net.sf.basedb.reggie.dao.Annotationtype; |
5490 |
13 Jun 19 |
nicklas |
54 |
import net.sf.basedb.reggie.dao.Datafiletype; |
5490 |
13 Jun 19 |
nicklas |
55 |
import net.sf.basedb.reggie.dao.DemuxedSequences; |
5490 |
13 Jun 19 |
nicklas |
56 |
import net.sf.basedb.reggie.dao.Fileserver; |
5487 |
12 Jun 19 |
nicklas |
57 |
import net.sf.basedb.reggie.dao.FlowCell; |
5487 |
12 Jun 19 |
nicklas |
58 |
import net.sf.basedb.reggie.dao.Library; |
5487 |
12 Jun 19 |
nicklas |
59 |
import net.sf.basedb.reggie.dao.MergedSequences; |
5490 |
13 Jun 19 |
nicklas |
60 |
import net.sf.basedb.reggie.dao.Pipeline; |
5487 |
12 Jun 19 |
nicklas |
61 |
import net.sf.basedb.reggie.dao.Subtype; |
5487 |
12 Jun 19 |
nicklas |
62 |
import net.sf.basedb.reggie.plugins.BarcodeFilesForDemuxExporter; |
5587 |
30 Aug 19 |
nicklas |
63 |
import net.sf.basedb.reggie.plugins.BarcodeFilesForDemuxExporter.OutputFileData; |
5487 |
12 Jun 19 |
nicklas |
64 |
import net.sf.basedb.util.NameableComparator; |
5487 |
12 Jun 19 |
nicklas |
65 |
import net.sf.basedb.util.Values; |
7079 |
27 Mar 23 |
nicklas |
66 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7079 |
27 Mar 23 |
nicklas |
67 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
5490 |
13 Jun 19 |
nicklas |
68 |
import net.sf.basedb.util.parser.FlatFileParser; |
5486 |
12 Jun 19 |
nicklas |
69 |
|
5486 |
12 Jun 19 |
nicklas |
70 |
/** |
5486 |
12 Jun 19 |
nicklas |
Helper class for creating items needed for demuxing and merging |
5486 |
12 Jun 19 |
nicklas |
MIPs sequencing data. It will generate the demux script and send it |
5486 |
12 Jun 19 |
nicklas |
to the cluster for execution. |
5486 |
12 Jun 19 |
nicklas |
74 |
|
5486 |
12 Jun 19 |
nicklas |
@author nicklas |
5486 |
12 Jun 19 |
nicklas |
@since 4.23 |
5486 |
12 Jun 19 |
nicklas |
77 |
*/ |
5486 |
12 Jun 19 |
nicklas |
78 |
public class MipsDemuxJobCreator |
5486 |
12 Jun 19 |
nicklas |
79 |
extends DemuxJobCreator |
5486 |
12 Jun 19 |
nicklas |
80 |
{ |
5486 |
12 Jun 19 |
nicklas |
81 |
public MipsDemuxJobCreator() |
5492 |
13 Jun 19 |
nicklas |
82 |
{ |
5492 |
13 Jun 19 |
nicklas |
83 |
super(Pipeline.MIPS); |
5492 |
13 Jun 19 |
nicklas |
84 |
} |
5486 |
12 Jun 19 |
nicklas |
85 |
|
5486 |
12 Jun 19 |
nicklas |
86 |
@Override |
5486 |
12 Jun 19 |
nicklas |
87 |
public Job createDemuxJob(DbControl dc, OpenGridCluster cluster, List<DemuxDefinition> demuxDefs, JSONArray jsonMessages) |
5486 |
12 Jun 19 |
nicklas |
88 |
{ |
5487 |
12 Jun 19 |
nicklas |
// Add 'rm' in script to remove files that we no longer need (to preserve disk space) |
5487 |
12 Jun 19 |
nicklas |
90 |
boolean earlyCleanup = !debug; |
5487 |
12 Jun 19 |
nicklas |
91 |
|
5487 |
12 Jun 19 |
nicklas |
92 |
SessionControl sc = dc.getSessionControl(); |
5487 |
12 Jun 19 |
nicklas |
93 |
|
5487 |
12 Jun 19 |
nicklas |
94 |
String demuxParameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, demuxSoftware); |
5487 |
12 Jun 19 |
nicklas |
95 |
String mergeParameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, mergeSoftware); |
5487 |
12 Jun 19 |
nicklas |
96 |
|
5487 |
12 Jun 19 |
nicklas |
97 |
ClusterConfig clusterCfg = cluster.getConfig(); |
5487 |
12 Jun 19 |
nicklas |
98 |
XmlConfig cfg = Reggie.getConfig(cluster.getId()); |
5487 |
12 Jun 19 |
nicklas |
99 |
if (cfg == null) |
5487 |
12 Jun 19 |
nicklas |
100 |
{ |
5487 |
12 Jun 19 |
nicklas |
101 |
throw new ItemNotFoundException("No configuration in reggie-config.xml for cluster: " + cluster.getId()); |
5487 |
12 Jun 19 |
nicklas |
102 |
} |
5487 |
12 Jun 19 |
nicklas |
103 |
|
5487 |
12 Jun 19 |
nicklas |
// Get global options |
5487 |
12 Jun 19 |
nicklas |
105 |
String runArchive = cfg.getRequiredConfig("run-archive", null); |
5487 |
12 Jun 19 |
nicklas |
106 |
List<String> allRunArchives = new ArrayList<>(); |
5487 |
12 Jun 19 |
nicklas |
107 |
allRunArchives.add(runArchive); |
5487 |
12 Jun 19 |
nicklas |
108 |
allRunArchives.addAll(cfg.getConfigList("run-archive", 2)); |
5487 |
12 Jun 19 |
nicklas |
109 |
String projectRoot = cfg.getRequiredConfig("project-archive", null); |
5487 |
12 Jun 19 |
nicklas |
110 |
String externalRoot = cfg.getConfig("external-archive", null, projectRoot); |
5487 |
12 Jun 19 |
nicklas |
111 |
|
5487 |
12 Jun 19 |
nicklas |
// Paths to programs used (picard, ....) |
5487 |
12 Jun 19 |
nicklas |
113 |
String pipeline_scripts_path = cfg.getRequiredConfig("programs/pipeline-scripts/path", null); |
5487 |
12 Jun 19 |
nicklas |
114 |
String picard_path = cfg.getRequiredConfig("programs/picard/path", demuxParameterSet); |
5487 |
12 Jun 19 |
nicklas |
115 |
|
5487 |
12 Jun 19 |
nicklas |
// Options for the programs when demuxing |
7372 |
06 Oct 23 |
nicklas |
117 |
String demux_submit = cfg.getConfig("demux-mips/submit", demuxParameterSet, null); |
7372 |
06 Oct 23 |
nicklas |
118 |
String demux_submit_debug = cfg.getConfig("demux-mips/submit-debug", demuxParameterSet, null); |
5487 |
12 Jun 19 |
nicklas |
119 |
String demux_picardMemory = cfg.getConfig("demux-mips/picard-memory", demuxParameterSet, "50g"); |
5487 |
12 Jun 19 |
nicklas |
120 |
String demux_extractOptions = cfg.getConfig("demux-mips/extract-options", demuxParameterSet, null); |
5487 |
12 Jun 19 |
nicklas |
121 |
String demux_fastqOptions = cfg.getConfig("demux-mips/fastq-options", demuxParameterSet, null); |
5587 |
30 Aug 19 |
nicklas |
122 |
String demux_readGroupOptions = cfg.getConfig("demux-mips/readgroup-options", demuxParameterSet, ""); |
5487 |
12 Jun 19 |
nicklas |
123 |
String debug_tileLimitHiSeq = cfg.getConfig("demux-mips/debug-tile-limit-hiseq", demuxParameterSet, "2"); |
5487 |
12 Jun 19 |
nicklas |
124 |
String debug_tileLimitNextSeq = cfg.getConfig("demux-mips/debug-tile-limit-nextseq", demuxParameterSet, "16"); |
5487 |
12 Jun 19 |
nicklas |
125 |
String pigzOptions = cfg.getConfig("demux-mips/pigz-options", mergeParameterSet, "-5"); |
5487 |
12 Jun 19 |
nicklas |
126 |
|
5487 |
12 Jun 19 |
nicklas |
// Load common items |
5487 |
12 Jun 19 |
nicklas |
128 |
ItemSubtype demuxedType = Subtype.DEMUXED_SEQUENCES.get(dc); |
5487 |
12 Jun 19 |
nicklas |
129 |
ItemSubtype mergedType = Subtype.MERGED_SEQUENCES.get(dc); |
5487 |
12 Jun 19 |
nicklas |
130 |
|
5487 |
12 Jun 19 |
nicklas |
// We need all possible barcodes to create the |
5487 |
12 Jun 19 |
nicklas |
// demux template files |
5487 |
12 Jun 19 |
nicklas |
133 |
ItemQuery<Tag> barcodeQuery = Tag.getQuery(); |
5487 |
12 Jun 19 |
nicklas |
134 |
Subtype.BARCODE.addFilter(dc, barcodeQuery); |
5487 |
12 Jun 19 |
nicklas |
135 |
barcodeQuery.include(Reggie.INCLUDE_IN_CURRENT_PROJECT); |
5487 |
12 Jun 19 |
nicklas |
136 |
Pipeline.MIPS.addFilter(dc, barcodeQuery); |
5487 |
12 Jun 19 |
nicklas |
137 |
List<Tag> allTags = barcodeQuery.list(dc); |
5487 |
12 Jun 19 |
nicklas |
138 |
|
5487 |
12 Jun 19 |
nicklas |
// Options common for all jobs |
5487 |
12 Jun 19 |
nicklas |
140 |
JobConfig jobConfig = new JobConfig(); |
5487 |
12 Jun 19 |
nicklas |
141 |
if (priority != null) jobConfig.setPriority(priority); |
7372 |
06 Oct 23 |
nicklas |
142 |
if (partition != null) jobConfig.setSbatchOption("partition", ScriptUtil.checkValidScriptParameter(partition)); |
7372 |
06 Oct 23 |
nicklas |
143 |
jobConfig.convertOptionsTo(clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
144 |
if (submitOptionsOverride != null) |
7372 |
06 Oct 23 |
nicklas |
145 |
{ |
7372 |
06 Oct 23 |
nicklas |
146 |
ScriptUtil.addSubmitOptions(jobConfig, submitOptionsOverride, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
147 |
} |
7372 |
06 Oct 23 |
nicklas |
148 |
else |
7372 |
06 Oct 23 |
nicklas |
149 |
{ |
7372 |
06 Oct 23 |
nicklas |
150 |
ScriptUtil.addSubmitOptions(jobConfig, demux_submit, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
151 |
if (debug) ScriptUtil.addSubmitOptions(jobConfig, demux_submit_debug, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
152 |
} |
5487 |
12 Jun 19 |
nicklas |
153 |
|
5487 |
12 Jun 19 |
nicklas |
// Create job |
5492 |
13 Jun 19 |
nicklas |
155 |
Job demuxJob = createJobItem(dc, "MIPs demux", demuxDefs); |
5487 |
12 Jun 19 |
nicklas |
156 |
|
5487 |
12 Jun 19 |
nicklas |
// Files and folder to be used for storing data and other information |
5487 |
12 Jun 19 |
nicklas |
158 |
String totalMetricsFile = "${WD}/demultiplex_metrics.txt"; |
5487 |
12 Jun 19 |
nicklas |
159 |
String skippedTilesFile = "${WD}/skipped_tiles.txt"; |
5487 |
12 Jun 19 |
nicklas |
160 |
String filesOut = "${WD}/files.out"; |
5487 |
12 Jun 19 |
nicklas |
161 |
|
5487 |
12 Jun 19 |
nicklas |
162 |
OpenGridSession ogSession = null; |
5487 |
12 Jun 19 |
nicklas |
163 |
try |
5487 |
12 Jun 19 |
nicklas |
164 |
{ |
5487 |
12 Jun 19 |
nicklas |
165 |
ogSession = cluster.connect(5); |
5487 |
12 Jun 19 |
nicklas |
166 |
|
5487 |
12 Jun 19 |
nicklas |
// Generated script for demuxing and merging the selected sequencing runs |
5487 |
12 Jun 19 |
nicklas |
168 |
ScriptBuilder script = new ScriptBuilder(); |
6665 |
05 Apr 22 |
nicklas |
169 |
script.cmd(debug ? "set -ex" : "set -e"); |
6674 |
11 Apr 22 |
nicklas |
170 |
JobDefinition jobDef = new JobDefinition("MIPsDemux", jobConfig, batchConfig, demuxJob); |
5487 |
12 Jun 19 |
nicklas |
171 |
jobDef.setDebug(debug); |
5487 |
12 Jun 19 |
nicklas |
172 |
|
5487 |
12 Jun 19 |
nicklas |
173 |
script.comment("Setting up scripting environment and copying script to tmp folder"); |
5487 |
12 Jun 19 |
nicklas |
174 |
script.cmd("export ScriptDir=" + pipeline_scripts_path); |
5487 |
12 Jun 19 |
nicklas |
175 |
script.cmd("export PicardDir="+picard_path); |
5487 |
12 Jun 19 |
nicklas |
176 |
script.cmd("export PicardMemory="+demux_picardMemory); |
5487 |
12 Jun 19 |
nicklas |
177 |
script.newLine(); |
5487 |
12 Jun 19 |
nicklas |
178 |
|
5487 |
12 Jun 19 |
nicklas |
179 |
script.comment("Use 1 thread/core but not more than slots assigned by the queue system"); |
5487 |
12 Jun 19 |
nicklas |
180 |
script.cmd("NumThreads=`nproc`"); |
5487 |
12 Jun 19 |
nicklas |
181 |
script.cmd("NumThreads=$(( ${NSLOTS} < ${NumThreads} ? ${NSLOTS} : ${NumThreads} ))"); |
5487 |
12 Jun 19 |
nicklas |
182 |
script.newLine(); |
5487 |
12 Jun 19 |
nicklas |
183 |
script.comment("Move to the temporary working directory and copy the pipeline scripts"); |
5487 |
12 Jun 19 |
nicklas |
184 |
script.cmd("cd ${TMPDIR}"); |
5487 |
12 Jun 19 |
nicklas |
185 |
script.cmd("mkdir fastq"); |
5534 |
27 Jun 19 |
nicklas |
186 |
script.cmd("cp ${ScriptDir}/picard2 ."); |
5487 |
12 Jun 19 |
nicklas |
187 |
script.cmd("cp ${ScriptDir}/stdwrap.sh ."); |
5539 |
28 Jun 19 |
nicklas |
188 |
script.cmd("cp ${ScriptDir}/fqpaste.pl ."); |
5487 |
12 Jun 19 |
nicklas |
189 |
script.newLine(); |
5487 |
12 Jun 19 |
nicklas |
190 |
|
5487 |
12 Jun 19 |
nicklas |
// Maps Library->MergedSequences that are created by this job |
5487 |
12 Jun 19 |
nicklas |
192 |
Map<Extract, DerivedBioAssay> mergedSequences = new TreeMap<Extract, DerivedBioAssay>(new NameableComparator<Extract>(false)); |
5865 |
12 Mar 20 |
nicklas |
193 |
BarcodeFilesForDemuxExporter exporter = new BarcodeFilesForDemuxExporter(dc, allTags); |
5587 |
30 Aug 19 |
nicklas |
194 |
Map<String, String> DT = new HashMap<>(); // We need sequencing started dates (DT) in the "Read group" files |
5487 |
12 Jun 19 |
nicklas |
195 |
|
5487 |
12 Jun 19 |
nicklas |
196 |
for (DemuxDefinition def : demuxDefs) |
5487 |
12 Jun 19 |
nicklas |
197 |
{ |
5487 |
12 Jun 19 |
nicklas |
198 |
DerivedBioAssay seqRun = def.seqRun.getDerivedBioAssay(); |
5487 |
12 Jun 19 |
nicklas |
199 |
Annotationtype.AUTO_PROCESSING.setAnnotationValue(dc, seqRun, null); |
5487 |
12 Jun 19 |
nicklas |
200 |
|
5487 |
12 Jun 19 |
nicklas |
// Create DEMUXED derived bioassay item |
5492 |
13 Jun 19 |
nicklas |
202 |
DerivedBioAssay demux = createDemuxedSequences(dc, demuxJob, def); |
5492 |
13 Jun 19 |
nicklas |
203 |
def.existingMergedSequences = mergedSequences; |
5487 |
12 Jun 19 |
nicklas |
204 |
|
5492 |
13 Jun 19 |
nicklas |
205 |
createAllMergedSequences(dc, demuxJob, def); |
5492 |
13 Jun 19 |
nicklas |
206 |
|
5492 |
13 Jun 19 |
nicklas |
207 |
scriptSnippetFindRunArchive(dc, script, allRunArchives, def); |
5492 |
13 Jun 19 |
nicklas |
208 |
|
5487 |
12 Jun 19 |
nicklas |
// Load flow cell information for the sequencing run |
5487 |
12 Jun 19 |
nicklas |
210 |
FlowCell fc = FlowCell.getBySequencingRun(dc, def.seqRun); |
5487 |
12 Jun 19 |
nicklas |
211 |
PhysicalBioAssay flowCell = fc.getPhysicalBioAssay(); |
5487 |
12 Jun 19 |
nicklas |
212 |
String sequencerName = getSequencerName(seqRun.getHardware()); |
5487 |
12 Jun 19 |
nicklas |
213 |
Integer runNumber = (Integer)Annotationtype.SEQUENCING_RUN_NUMBER.getAnnotationValue(dc, seqRun); |
5487 |
12 Jun 19 |
nicklas |
214 |
String flowCellBarcode = ScriptUtil.checkValidScriptParameter((String)Annotationtype.FLOWCELL_ID.getAnnotationValue(dc, flowCell)); |
5487 |
12 Jun 19 |
nicklas |
215 |
String flowCellType = (String)Annotationtype.FLOWCELL_TYPE.getAnnotationValue(dc, flowCell); |
5487 |
12 Jun 19 |
nicklas |
216 |
String debug_tileLimit = "NextSeq".equals(flowCellType) ? debug_tileLimitNextSeq : debug_tileLimitHiSeq; |
5487 |
12 Jun 19 |
nicklas |
217 |
|
5592 |
06 Sep 19 |
nicklas |
// NOTE! We store barcode2 in NextSeq format and they need to be reversed for HiSeq |
5592 |
06 Sep 19 |
nicklas |
// https://support.illumina.com/content/dam/illumina-support/documents/documentation/system_documentation/miseq/indexed-sequencing-overview-guide-15057455-05.pdf |
5592 |
06 Sep 19 |
nicklas |
220 |
boolean useReverseComplementOnBarcode2 = "HiSeq".equals(flowCellType); |
5592 |
06 Sep 19 |
nicklas |
221 |
|
5587 |
30 Aug 19 |
nicklas |
222 |
Date started = (Date)Annotationtype.SEQUENCING_START.getAnnotationValue(dc, seqRun); |
5587 |
30 Aug 19 |
nicklas |
223 |
DT.put(flowCellBarcode, Reggie.CONVERTER_DATE_TO_STRING_WITH_SEPARATOR.convert(started)); |
5587 |
30 Aug 19 |
nicklas |
224 |
|
5492 |
13 Jun 19 |
nicklas |
225 |
int totalLanes = def.laneInfo.size(); |
5487 |
12 Jun 19 |
nicklas |
226 |
int currentLane = 0; |
5492 |
13 Jun 19 |
nicklas |
227 |
for (LaneInfo lane : def.laneInfo) |
5487 |
12 Jun 19 |
nicklas |
228 |
{ |
5492 |
13 Jun 19 |
nicklas |
229 |
int laneNo = lane.laneNo; |
5487 |
12 Jun 19 |
nicklas |
230 |
currentLane++; |
5492 |
13 Jun 19 |
nicklas |
231 |
|
5492 |
13 Jun 19 |
nicklas |
232 |
ScriptUtil.checkValidScriptParameter(lane.pool.getName()); |
5487 |
12 Jun 19 |
nicklas |
233 |
|
5492 |
13 Jun 19 |
nicklas |
// Export files required by Picard |
5592 |
06 Sep 19 |
nicklas |
235 |
exportMultiplexFiles(dc, exporter, jobDef, def, lane, useReverseComplementOnBarcode2); |
5487 |
12 Jun 19 |
nicklas |
236 |
|
5487 |
12 Jun 19 |
nicklas |
// Progress between 5-25% |
5492 |
13 Jun 19 |
nicklas |
238 |
String demuxName = demux.getName(); |
5487 |
12 Jun 19 |
nicklas |
239 |
int percent = 5 + ((currentLane * 20) / totalLanes); |
5492 |
13 Jun 19 |
nicklas |
240 |
script.comment(flowCellBarcode + "; lane " + laneNo); |
5487 |
12 Jun 19 |
nicklas |
241 |
script.progress(percent, "Extracting barcodes: " + flowCellBarcode + "; lane "+ laneNo + " (${NumThreads} threads)"); |
5586 |
29 Aug 19 |
nicklas |
242 |
script.cmd("mkdir -p " + lane.barcodesDir); |
5487 |
12 Jun 19 |
nicklas |
243 |
|
5487 |
12 Jun 19 |
nicklas |
// First step is to extract barcodes |
5534 |
27 Jun 19 |
nicklas |
245 |
String extractCmd = "./stdwrap.sh ./picard2 ExtractIlluminaBarcodes"; |
5534 |
27 Jun 19 |
nicklas |
246 |
extractCmd += " -BASECALLS_DIR ${RUN_ARCHIVE}/Data/Intensities/BaseCalls"; |
5534 |
27 Jun 19 |
nicklas |
247 |
extractCmd += " -BARCODE_FILE ${WD}/"+lane.barcodesFile; |
5534 |
27 Jun 19 |
nicklas |
248 |
extractCmd += " -LANE "+laneNo; |
5534 |
27 Jun 19 |
nicklas |
249 |
extractCmd += " -READ_STRUCTURE "+def.readString; |
5586 |
29 Aug 19 |
nicklas |
250 |
extractCmd += " -OUTPUT_DIR "+lane.barcodesDir; |
5534 |
27 Jun 19 |
nicklas |
251 |
extractCmd += " -METRICS_FILE "+lane.metricsFile; |
5534 |
27 Jun 19 |
nicklas |
252 |
extractCmd += " -NUM_PROCESSORS ${NumThreads}"; |
5534 |
27 Jun 19 |
nicklas |
253 |
extractCmd += " -TMP_DIR ${TMPDIR}"; |
5487 |
12 Jun 19 |
nicklas |
254 |
if (demux_extractOptions != null) |
5487 |
12 Jun 19 |
nicklas |
255 |
{ |
5487 |
12 Jun 19 |
nicklas |
256 |
extractCmd += " " + demux_extractOptions; |
5487 |
12 Jun 19 |
nicklas |
257 |
} |
5487 |
12 Jun 19 |
nicklas |
258 |
extractCmd += " >> " + demuxName + "_" + laneNo + ".out"; |
5584 |
26 Aug 19 |
nicklas |
259 |
script.time("STARTING: ExtractIlluminaBarcodes; lane="+laneNo); |
5487 |
12 Jun 19 |
nicklas |
260 |
script.cmd(extractCmd); |
5487 |
12 Jun 19 |
nicklas |
261 |
script.time("DONE: ExtractIlluminaBarcodes; lane="+laneNo); |
5487 |
12 Jun 19 |
nicklas |
262 |
|
5487 |
12 Jun 19 |
nicklas |
// Copy demultiplex metrics back to job folder |
5487 |
12 Jun 19 |
nicklas |
264 |
script.cmd("echo \"# [" + demuxName+"]\" >> " + totalMetricsFile); |
5487 |
12 Jun 19 |
nicklas |
265 |
script.cmd("echo \"# Lane=" + laneNo + "\" >> " + totalMetricsFile); |
5492 |
13 Jun 19 |
nicklas |
266 |
script.cmd("echo \"# Pool=" + lane.pool.getName() + "\" >> " + totalMetricsFile); |
5492 |
13 Jun 19 |
nicklas |
267 |
script.cmd("cat " + lane.metricsFile + " >> " + totalMetricsFile); |
5487 |
12 Jun 19 |
nicklas |
268 |
|
5487 |
12 Jun 19 |
nicklas |
// Check for skipped tiles (eg. *_barcode.txt file size is 0) |
5487 |
12 Jun 19 |
nicklas |
270 |
script.cmd("echo \"[" + demuxName + "]\" >> " + skippedTilesFile); |
5586 |
29 Aug 19 |
nicklas |
271 |
script.cmd("find " + lane.barcodesDir + " -name \"*_barcode.txt\" -size 0 >> " + skippedTilesFile); |
5487 |
12 Jun 19 |
nicklas |
272 |
|
5487 |
12 Jun 19 |
nicklas |
// Second step to generate FASTQ files |
5487 |
12 Jun 19 |
nicklas |
274 |
script.progress(percent+1, "Creating FASTQ: " + flowCellBarcode + "; lane "+ laneNo + " (${NumThreads} threads)"); |
5487 |
12 Jun 19 |
nicklas |
275 |
|
5534 |
27 Jun 19 |
nicklas |
276 |
String fastqCmd = "./stdwrap.sh ./picard2 IlluminaBasecallsToFastq"; |
5534 |
27 Jun 19 |
nicklas |
277 |
fastqCmd += " -BASECALLS_DIR ${RUN_ARCHIVE}/Data/Intensities/BaseCalls"; |
5586 |
29 Aug 19 |
nicklas |
278 |
fastqCmd += " -BARCODES_DIR "+lane.barcodesDir; |
5534 |
27 Jun 19 |
nicklas |
279 |
fastqCmd += " -MULTIPLEX_PARAMS ${WD}/"+lane.multiplexFile; |
5534 |
27 Jun 19 |
nicklas |
280 |
fastqCmd += " -LANE "+laneNo; |
5534 |
27 Jun 19 |
nicklas |
281 |
fastqCmd += " -READ_STRUCTURE "+def.readString; |
5534 |
27 Jun 19 |
nicklas |
282 |
fastqCmd += " -FLOWCELL_BARCODE "+flowCellBarcode; |
5534 |
27 Jun 19 |
nicklas |
283 |
fastqCmd += " -RUN_BARCODE " + runNumber; |
5534 |
27 Jun 19 |
nicklas |
284 |
fastqCmd += " -MACHINE_NAME "+sequencerName; |
5534 |
27 Jun 19 |
nicklas |
285 |
fastqCmd += " -NUM_PROCESSORS ${NumThreads}"; |
5534 |
27 Jun 19 |
nicklas |
286 |
fastqCmd += " -TMP_DIR ${TMPDIR}"; |
5487 |
12 Jun 19 |
nicklas |
287 |
if (demux_fastqOptions != null) |
5487 |
12 Jun 19 |
nicklas |
288 |
{ |
5487 |
12 Jun 19 |
nicklas |
289 |
fastqCmd += " " + demux_fastqOptions; |
5487 |
12 Jun 19 |
nicklas |
290 |
} |
5584 |
26 Aug 19 |
nicklas |
291 |
if (!fastqCmd.contains(("IGNORE_UNEXPECTED_BARCODES"))) |
5584 |
26 Aug 19 |
nicklas |
292 |
{ |
5584 |
26 Aug 19 |
nicklas |
293 |
fastqCmd += " -IGNORE_UNEXPECTED_BARCODES true"; |
5584 |
26 Aug 19 |
nicklas |
294 |
} |
5487 |
12 Jun 19 |
nicklas |
295 |
if (debug) |
5487 |
12 Jun 19 |
nicklas |
296 |
{ |
5534 |
27 Jun 19 |
nicklas |
297 |
fastqCmd += " -TILE_LIMIT "+debug_tileLimit; |
5487 |
12 Jun 19 |
nicklas |
298 |
} |
5487 |
12 Jun 19 |
nicklas |
299 |
fastqCmd += " >> " + demuxName + "_" + laneNo + ".out"; |
5584 |
26 Aug 19 |
nicklas |
300 |
script.time("STARTING: IlluminaBasecallsToFastq; lane="+laneNo); |
5487 |
12 Jun 19 |
nicklas |
301 |
script.cmd(fastqCmd); |
5487 |
12 Jun 19 |
nicklas |
302 |
script.time("DONE: IlluminaBasecallsToFastq; lane="+laneNo); |
5487 |
12 Jun 19 |
nicklas |
303 |
if (earlyCleanup) |
5487 |
12 Jun 19 |
nicklas |
304 |
{ |
5487 |
12 Jun 19 |
nicklas |
// Remove barcode files we no longer need |
5586 |
29 Aug 19 |
nicklas |
306 |
script.cmd("rm -rf " + lane.barcodesDir); |
5487 |
12 Jun 19 |
nicklas |
307 |
} |
5487 |
12 Jun 19 |
nicklas |
308 |
script.newLine(); |
5487 |
12 Jun 19 |
nicklas |
309 |
} |
5487 |
12 Jun 19 |
nicklas |
310 |
if (jsonMessages != null) |
5487 |
12 Jun 19 |
nicklas |
311 |
{ |
5492 |
13 Jun 19 |
nicklas |
312 |
jsonMessages.add("Created " + demux.getName() + " for demuxing " + def.libsOnFlowCell.size() + " libraries on " + totalLanes + " lanes."); |
5487 |
12 Jun 19 |
nicklas |
313 |
} |
5487 |
12 Jun 19 |
nicklas |
314 |
} |
5487 |
12 Jun 19 |
nicklas |
315 |
|
5487 |
12 Jun 19 |
nicklas |
316 |
script.time("DEMUX COMPLETE"); |
5487 |
12 Jun 19 |
nicklas |
317 |
if (jsonMessages != null) |
5487 |
12 Jun 19 |
nicklas |
318 |
{ |
5487 |
12 Jun 19 |
nicklas |
319 |
jsonMessages.add("Created MergedSequences items for " + mergedSequences.size() + " libraries "); |
5487 |
12 Jun 19 |
nicklas |
320 |
} |
5487 |
12 Jun 19 |
nicklas |
321 |
|
5487 |
12 Jun 19 |
nicklas |
// Generate script for moving FASTQ files to project-archive/external-archive |
5487 |
12 Jun 19 |
nicklas |
323 |
int totalToMove = mergedSequences.size(); |
5487 |
12 Jun 19 |
nicklas |
324 |
int currentToMove = 0; |
5487 |
12 Jun 19 |
nicklas |
325 |
for (DerivedBioAssay merged : mergedSequences.values()) |
5487 |
12 Jun 19 |
nicklas |
326 |
{ |
5487 |
12 Jun 19 |
nicklas |
327 |
Library lib = Library.get(merged.getExtract()); |
5487 |
12 Jun 19 |
nicklas |
328 |
|
5524 |
24 Jun 19 |
nicklas |
329 |
String mergeName = ScriptUtil.checkValidFilename(merged.getName()); |
5553 |
12 Aug 19 |
nicklas |
330 |
boolean isExternal = Reggie.isExternalItem(mergeName); |
5524 |
24 Jun 19 |
nicklas |
331 |
String archiveRoot = isExternal ? externalRoot : projectRoot; |
5487 |
12 Jun 19 |
nicklas |
332 |
String fastqFolder = archiveRoot + (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, merged); |
5587 |
30 Aug 19 |
nicklas |
333 |
|
5595 |
11 Sep 19 |
nicklas |
// Set file permissions based on consent or external group! |
5595 |
11 Sep 19 |
nicklas |
335 |
String externalGroup = isExternal ? Reggie.getExternalGroup(mergeName) : null; |
5595 |
11 Sep 19 |
nicklas |
336 |
ScriptUtil.setUmaskForItem(dc, lib, externalGroup, script); |
5595 |
11 Sep 19 |
nicklas |
337 |
|
5587 |
30 Aug 19 |
nicklas |
338 |
String LB = isExternal ? Reggie.removePrefix(lib.getName()) : lib.getName(); |
5786 |
13 Dec 19 |
nicklas |
339 |
String SM = isExternal ? Reggie.removePrefix(lib.getTopExtractOrSample(dc).getName()) : LB.replaceFirst("\\.(?!\\d).*", ""); |
5587 |
30 Aug 19 |
nicklas |
340 |
|
5487 |
12 Jun 19 |
nicklas |
// Progress between 25-95% |
5487 |
12 Jun 19 |
nicklas |
342 |
int percent = 25 + ((currentToMove * 70) / totalToMove); |
5487 |
12 Jun 19 |
nicklas |
343 |
currentToMove++; |
5586 |
29 Aug 19 |
nicklas |
344 |
script.comment("Consolidating FASTQ: " + mergeName); |
5586 |
29 Aug 19 |
nicklas |
345 |
script.progress(percent, "Consolidating FASTQ: " + mergeName); |
5586 |
29 Aug 19 |
nicklas |
346 |
script.time("STARTING: " + mergeName); |
5586 |
29 Aug 19 |
nicklas |
347 |
script.cmd("echo [" + mergeName + "] >> " + filesOut); |
5586 |
29 Aug 19 |
nicklas |
348 |
script.cmd("mkdir -p " + fastqFolder); |
5586 |
29 Aug 19 |
nicklas |
349 |
script.cmd("rm -rf " + fastqFolder + "/*"); |
5487 |
12 Jun 19 |
nicklas |
350 |
|
5587 |
30 Aug 19 |
nicklas |
List<OutputFileData> filePrefixes = exporter.getOutputFilePrefixes(merged); |
5524 |
24 Jun 19 |
nicklas |
String baseFileName = isExternal ? Reggie.removePrefix(mergeName) : mergeName; |
5587 |
30 Aug 19 |
nicklas |
for (OutputFileData prefix : filePrefixes) |
5586 |
29 Aug 19 |
nicklas |
354 |
{ |
5587 |
30 Aug 19 |
nicklas |
String baseFileNameForLane = baseFileName+"_"+prefix.flowCellId+"_L"+prefix.lane; |
5587 |
30 Aug 19 |
nicklas |
String R1_name = baseFileNameForLane+"_R1.fastq.gz"; |
5587 |
30 Aug 19 |
nicklas |
String R2_name = baseFileNameForLane+"_R2.fastq.gz"; |
5587 |
30 Aug 19 |
nicklas |
String umi_name = baseFileNameForLane+"_UMI.fastq"; // No .gz since we need the name without it as well |
5587 |
30 Aug 19 |
nicklas |
String rg_name = baseFileNameForLane+".rg"; |
5586 |
29 Aug 19 |
nicklas |
360 |
|
5587 |
30 Aug 19 |
nicklas |
script.cmd("pigz " + pigzOptions + " -p ${NumThreads} -c fastq/" + prefix.prefix + ".1.fastq > " + fastqFolder + "/"+R1_name); |
5586 |
29 Aug 19 |
nicklas |
script.time("DONE: pigz " + R1_name); |
5587 |
30 Aug 19 |
nicklas |
script.cmd("pigz " + pigzOptions + " -p ${NumThreads} -c fastq/" + prefix.prefix + ".2.fastq > " + fastqFolder + "/"+R2_name); |
5586 |
29 Aug 19 |
nicklas |
script.time("DONE: pigz " + R2_name); |
5586 |
29 Aug 19 |
nicklas |
365 |
|
5586 |
29 Aug 19 |
nicklas |
script.cmd("./fqpaste.pl fastq/"+prefix.prefix+".index_1.fastq" + " fastq/"+prefix.prefix+".index_2.fastq > fastq/" + umi_name); |
5586 |
29 Aug 19 |
nicklas |
script.cmd("pigz " + pigzOptions + " -p ${NumThreads} -c fastq/" + umi_name + " > " + fastqFolder + "/"+umi_name + ".gz"); |
5586 |
29 Aug 19 |
nicklas |
script.time("DONE: pigz " + umi_name); |
5587 |
30 Aug 19 |
nicklas |
369 |
|
5587 |
30 Aug 19 |
nicklas |
// Create "Read group" file |
5587 |
30 Aug 19 |
nicklas |
String RG = prefix.flowCellId + "." + prefix.lane; |
5587 |
30 Aug 19 |
nicklas |
String PU = RG+"."+prefix.barcode1+"-"+prefix.barcode2; |
5587 |
30 Aug 19 |
nicklas |
script.cmd("echo \"RG="+RG+" SM="+SM+" LB="+LB+" PU="+PU+" DT="+DT.get(prefix.flowCellId)+" "+demux_readGroupOptions + "\" > " + fastqFolder + "/" + rg_name); |
5586 |
29 Aug 19 |
nicklas |
374 |
} |
5487 |
12 Jun 19 |
nicklas |
375 |
|
5595 |
11 Sep 19 |
nicklas |
if (externalGroup != null) |
5595 |
11 Sep 19 |
nicklas |
377 |
{ |
5930 |
06 May 20 |
nicklas |
ScriptUtil.addChgrp(externalGroup, fastqFolder, mergeName, archiveRoot+"/"+Reggie.getPrefix(mergeName), script); |
5595 |
11 Sep 19 |
nicklas |
379 |
} |
5595 |
11 Sep 19 |
nicklas |
380 |
|
5487 |
12 Jun 19 |
nicklas |
if (earlyCleanup) |
5487 |
12 Jun 19 |
nicklas |
382 |
{ |
5487 |
12 Jun 19 |
nicklas |
// Remove the unmerged FASTQ files |
5487 |
12 Jun 19 |
nicklas |
script.cmd("rm -f fastq/" + mergeName + "*"); |
5487 |
12 Jun 19 |
nicklas |
385 |
} |
5587 |
30 Aug 19 |
nicklas |
script.cmd("ls -1 "+fastqFolder+"/* >> " + filesOut); |
5586 |
29 Aug 19 |
nicklas |
script.time("DONE: " + mergeName); |
5487 |
12 Jun 19 |
nicklas |
script.newLine(); |
5487 |
12 Jun 19 |
nicklas |
389 |
} |
5487 |
12 Jun 19 |
nicklas |
390 |
|
5487 |
12 Jun 19 |
nicklas |
script.newLine(); |
5487 |
12 Jun 19 |
nicklas |
392 |
|
5487 |
12 Jun 19 |
nicklas |
script.progress(99, "Cleaning up temporary folders"); |
5487 |
12 Jun 19 |
nicklas |
394 |
|
5487 |
12 Jun 19 |
nicklas |
// submit to cluster |
5487 |
12 Jun 19 |
nicklas |
jobDef.setCmd(script.toString()); |
5487 |
12 Jun 19 |
nicklas |
397 |
|
5487 |
12 Jun 19 |
nicklas |
CmdResult<List<JobStatus>> qsub = ogSession.qsub(dc, Arrays.asList(jobDef)); |
5487 |
12 Jun 19 |
nicklas |
qsub.throwExceptionIfNonZeroExitStatus(); |
5487 |
12 Jun 19 |
nicklas |
400 |
|
5487 |
12 Jun 19 |
nicklas |
JobStatus jobStatus = qsub.getResult().get(0); |
5492 |
13 Jun 19 |
nicklas |
demuxJob.setParameterValue("jobName", new StringParameterType(), jobStatus.getName()); |
5487 |
12 Jun 19 |
nicklas |
String jobId = jobStatus.getJobIdentifier().getClusterJobId(); |
5487 |
12 Jun 19 |
nicklas |
if (jobId == null || jobId.equals("")) |
5487 |
12 Jun 19 |
nicklas |
405 |
{ |
5487 |
12 Jun 19 |
nicklas |
demuxJob.doneError("Cluster returned no job-id for this job"); |
5487 |
12 Jun 19 |
nicklas |
407 |
} |
5487 |
12 Jun 19 |
nicklas |
408 |
} |
5487 |
12 Jun 19 |
nicklas |
finally |
5487 |
12 Jun 19 |
nicklas |
410 |
{ |
5487 |
12 Jun 19 |
nicklas |
OpenGrid.close(ogSession); |
5487 |
12 Jun 19 |
nicklas |
412 |
} |
5487 |
12 Jun 19 |
nicklas |
413 |
|
5487 |
12 Jun 19 |
nicklas |
return demuxJob; |
5486 |
12 Jun 19 |
nicklas |
415 |
} |
5486 |
12 Jun 19 |
nicklas |
416 |
|
5490 |
13 Jun 19 |
nicklas |
417 |
/** |
5490 |
13 Jun 19 |
nicklas |
Job completion handler for MIPs demux/merge jobs. The handler downloads the |
5490 |
13 Jun 19 |
nicklas |
'demultiplex_metrics.txt' file from the job folder and parses out number of |
5490 |
13 Jun 19 |
nicklas |
reads and passed filter information for each sequenced library. The information |
5490 |
13 Jun 19 |
nicklas |
is stored on {@link MergedSequences} items in {@link Annotationtype#READS} |
5490 |
13 Jun 19 |
nicklas |
and {@link Annotationtype#PF_READS} annotations. |
5490 |
13 Jun 19 |
nicklas |
423 |
*/ |
5490 |
13 Jun 19 |
nicklas |
424 |
public static class DemuxJobCompletionHandler |
5490 |
13 Jun 19 |
nicklas |
425 |
implements JobCompletionHandler |
5490 |
13 Jun 19 |
nicklas |
426 |
{ |
7079 |
27 Mar 23 |
nicklas |
427 |
|
7079 |
27 Mar 23 |
nicklas |
428 |
private static final ExtensionsLogger logger = |
7079 |
27 Mar 23 |
nicklas |
429 |
ExtensionsLog.getLogger(JobCompletionHandlerFactory.ID, true).wrap(LoggerFactory.getLogger(DemuxJobCompletionHandler.class)); |
7079 |
27 Mar 23 |
nicklas |
430 |
|
5490 |
13 Jun 19 |
nicklas |
431 |
private XmlConfig cfg; |
5490 |
13 Jun 19 |
nicklas |
432 |
|
5490 |
13 Jun 19 |
nicklas |
433 |
public DemuxJobCompletionHandler() |
5490 |
13 Jun 19 |
nicklas |
434 |
{} |
5490 |
13 Jun 19 |
nicklas |
435 |
|
5490 |
13 Jun 19 |
nicklas |
436 |
@Override |
5490 |
13 Jun 19 |
nicklas |
437 |
public String jobCompleted(SessionControl sc, OpenGridSession session, Job job, JobStatus status) |
5490 |
13 Jun 19 |
nicklas |
438 |
{ |
5490 |
13 Jun 19 |
nicklas |
439 |
String jobName = status.getName(); |
5490 |
13 Jun 19 |
nicklas |
440 |
String metrics = session.getJobFileAsString(jobName, "demultiplex_metrics.txt", "UTF-8"); |
5490 |
13 Jun 19 |
nicklas |
441 |
String skippedTiles = session.getJobFileAsString(jobName, "skipped_tiles.txt", "UTF-8"); |
5490 |
13 Jun 19 |
nicklas |
442 |
String files = session.getJobFileAsString(jobName, "files.out", "UTF-8"); |
5619 |
20 Sep 19 |
nicklas |
443 |
String chgrp = null; |
5619 |
20 Sep 19 |
nicklas |
444 |
try |
5619 |
20 Sep 19 |
nicklas |
445 |
{ |
5619 |
20 Sep 19 |
nicklas |
446 |
chgrp = session.getJobFileAsString(jobName, "chgrp.out", "UTF-8"); |
5619 |
20 Sep 19 |
nicklas |
447 |
} |
5619 |
20 Sep 19 |
nicklas |
448 |
catch (RuntimeException ex) |
5619 |
20 Sep 19 |
nicklas |
449 |
{} // The file only exists if there are external samples that has been mapped to groups |
5490 |
13 Jun 19 |
nicklas |
450 |
|
5490 |
13 Jun 19 |
nicklas |
451 |
cfg = Reggie.getConfig(session.getHost().getId()); |
5490 |
13 Jun 19 |
nicklas |
452 |
|
5619 |
20 Sep 19 |
nicklas |
453 |
Reads total = parseDemultiplexMetrics(sc, metrics, skippedTiles, files, chgrp); |
5490 |
13 Jun 19 |
nicklas |
454 |
String msg = Values.formatNumber(total.reads/1000000f, 1) + "M reads; "; |
5490 |
13 Jun 19 |
nicklas |
455 |
msg += Values.formatNumber(total.passedFilter/1000000f, 1) + "M passed filter; "; |
5490 |
13 Jun 19 |
nicklas |
456 |
if (total.warnings.size() > 0) |
5490 |
13 Jun 19 |
nicklas |
457 |
{ |
5490 |
13 Jun 19 |
nicklas |
458 |
msg += total.warnings.size() + " warnings!"; |
5490 |
13 Jun 19 |
nicklas |
459 |
} |
5490 |
13 Jun 19 |
nicklas |
460 |
return msg; |
5490 |
13 Jun 19 |
nicklas |
461 |
} |
5490 |
13 Jun 19 |
nicklas |
462 |
|
5619 |
20 Sep 19 |
nicklas |
463 |
private Reads parseDemultiplexMetrics(SessionControl sc, String metrics, String skippedTiles, String files, String chgrp) |
5490 |
13 Jun 19 |
nicklas |
464 |
{ |
5490 |
13 Jun 19 |
nicklas |
465 |
Map<String, Reads> sumReads = new HashMap<String, Reads>(); |
5490 |
13 Jun 19 |
nicklas |
466 |
|
5619 |
20 Sep 19 |
nicklas |
467 |
Map<String, String> chgrpWarnings = ScriptUtil.parseChgrpErrors(chgrp); |
5619 |
20 Sep 19 |
nicklas |
468 |
|
5490 |
13 Jun 19 |
nicklas |
// Parse the demultiplex_metrics file |
5490 |
13 Jun 19 |
nicklas |
470 |
FlatFileParser ffp = new FlatFileParser(); |
5490 |
13 Jun 19 |
nicklas |
471 |
ffp.setIgnoreRegexp(Pattern.compile("#.*")); |
5534 |
27 Jun 19 |
nicklas |
472 |
ffp.setDataHeaderRegexp(Pattern.compile("BARCODE\t.*")); |
5490 |
13 Jun 19 |
nicklas |
473 |
ffp.setDataSplitterRegexp(Pattern.compile("\t")); |
5490 |
13 Jun 19 |
nicklas |
474 |
ffp.setSectionRegexp(Pattern.compile("#\\s\\[(.*)\\]")); |
5490 |
13 Jun 19 |
nicklas |
475 |
ffp.setHeaderRegexp(Pattern.compile("#\\s(\\w+)=(.*)")); |
5490 |
13 Jun 19 |
nicklas |
476 |
ffp.setInputStream(new ByteArrayInputStream(metrics.getBytes(Charset.forName("UTF-8"))), "UTF-8"); |
5490 |
13 Jun 19 |
nicklas |
477 |
ffp.setMinDataColumns(10); |
5490 |
13 Jun 19 |
nicklas |
478 |
|
5490 |
13 Jun 19 |
nicklas |
479 |
Set<String> demuxNames = new HashSet<String>(); |
5490 |
13 Jun 19 |
nicklas |
480 |
try |
5490 |
13 Jun 19 |
nicklas |
481 |
{ |
5490 |
13 Jun 19 |
nicklas |
482 |
while (ffp.hasMoreSections()) |
5490 |
13 Jun 19 |
nicklas |
483 |
{ |
5490 |
13 Jun 19 |
nicklas |
484 |
FlatFileParser.Line section = ffp.nextSection(); |
5490 |
13 Jun 19 |
nicklas |
485 |
String demuxName = section.name(); |
5490 |
13 Jun 19 |
nicklas |
486 |
demuxNames.add(demuxName); |
5490 |
13 Jun 19 |
nicklas |
487 |
|
5490 |
13 Jun 19 |
nicklas |
488 |
Reads sumDemux = sumReads.get(demuxName); |
5490 |
13 Jun 19 |
nicklas |
489 |
if (sumDemux == null) |
5490 |
13 Jun 19 |
nicklas |
490 |
{ |
5490 |
13 Jun 19 |
nicklas |
491 |
sumDemux = new Reads(demuxName); |
5490 |
13 Jun 19 |
nicklas |
492 |
sumReads.put(demuxName, sumDemux); |
5490 |
13 Jun 19 |
nicklas |
493 |
} |
5490 |
13 Jun 19 |
nicklas |
494 |
|
5490 |
13 Jun 19 |
nicklas |
495 |
ffp.parseHeaders(); |
5490 |
13 Jun 19 |
nicklas |
496 |
int laneNo = Values.getInt(ffp.getHeader("Lane")); |
5490 |
13 Jun 19 |
nicklas |
497 |
String poolName = ffp.getHeader("Pool"); |
5490 |
13 Jun 19 |
nicklas |
498 |
int barcodeCol = ffp.getColumnHeaderIndex("BARCODE"); |
5490 |
13 Jun 19 |
nicklas |
499 |
int barcodeNameCol = ffp.getColumnHeaderIndex("BARCODE_NAME"); |
5490 |
13 Jun 19 |
nicklas |
500 |
int libCol = ffp.getColumnHeaderIndex("LIBRARY_NAME"); |
5490 |
13 Jun 19 |
nicklas |
501 |
int numReadsCol = ffp.getColumnHeaderIndex("READS"); |
5490 |
13 Jun 19 |
nicklas |
502 |
int passedFilterCol = ffp.getColumnHeaderIndex("PF_READS"); |
5490 |
13 Jun 19 |
nicklas |
503 |
int passedFilterPctCol = ffp.getColumnHeaderIndex("PF_PCT_MATCHES"); |
5490 |
13 Jun 19 |
nicklas |
504 |
ffp.setUseNullIfEmpty(true); |
5490 |
13 Jun 19 |
nicklas |
505 |
|
5490 |
13 Jun 19 |
nicklas |
506 |
if (logger.isDebugEnabled()) |
5490 |
13 Jun 19 |
nicklas |
507 |
{ |
5490 |
13 Jun 19 |
nicklas |
508 |
logger.debug("Section: " + demuxName + "; lane: " + laneNo + "; pool: " + poolName); |
5490 |
13 Jun 19 |
nicklas |
509 |
logger.debug("Columns indexes: LIBRARY_NAME=" + libCol + "; READS="+numReadsCol + "; PF_READS="+passedFilterCol+"; PF_PCT_MATCHES="+passedFilterPctCol); |
5490 |
13 Jun 19 |
nicklas |
510 |
} |
5490 |
13 Jun 19 |
nicklas |
511 |
|
5490 |
13 Jun 19 |
nicklas |
512 |
while (ffp.hasMoreData()) |
5490 |
13 Jun 19 |
nicklas |
513 |
{ |
5490 |
13 Jun 19 |
nicklas |
514 |
FlatFileParser.Data line = ffp.nextData(); |
5490 |
13 Jun 19 |
nicklas |
515 |
String libName = line.getString(libCol); |
5490 |
13 Jun 19 |
nicklas |
516 |
long numReads = Values.getLong(line.getString(numReadsCol), -1); |
5490 |
13 Jun 19 |
nicklas |
517 |
long numPassedFilter = Values.getLong(line.getString(passedFilterCol), 0); |
5490 |
13 Jun 19 |
nicklas |
518 |
float passedFilterPct = Values.getFloat(line.getString(passedFilterPctCol), 0) * 100; |
5490 |
13 Jun 19 |
nicklas |
519 |
|
5490 |
13 Jun 19 |
nicklas |
520 |
if (logger.isDebugEnabled()) |
5490 |
13 Jun 19 |
nicklas |
521 |
{ |
5490 |
13 Jun 19 |
nicklas |
522 |
logger.debug(libName + "; " + numReads + "; " + numPassedFilter +"; " + passedFilterPct); |
5490 |
13 Jun 19 |
nicklas |
523 |
} |
5490 |
13 Jun 19 |
nicklas |
524 |
|
5490 |
13 Jun 19 |
nicklas |
525 |
if (numReads >= 0) |
5490 |
13 Jun 19 |
nicklas |
526 |
{ |
5490 |
13 Jun 19 |
nicklas |
527 |
if (libName == null) |
5490 |
13 Jun 19 |
nicklas |
528 |
{ |
5490 |
13 Jun 19 |
nicklas |
529 |
libName = demuxName+".N"; |
5490 |
13 Jun 19 |
nicklas |
530 |
} |
5490 |
13 Jun 19 |
nicklas |
531 |
else if ("IGNORED".equals(libName)) |
5490 |
13 Jun 19 |
nicklas |
532 |
{ |
5490 |
13 Jun 19 |
nicklas |
533 |
libName = demuxName+".I"; |
5490 |
13 Jun 19 |
nicklas |
534 |
} |
5490 |
13 Jun 19 |
nicklas |
535 |
else if ("UNUSED".equals(libName)) |
5490 |
13 Jun 19 |
nicklas |
536 |
{ |
5490 |
13 Jun 19 |
nicklas |
537 |
libName = demuxName+".U"; |
5490 |
13 Jun 19 |
nicklas |
538 |
} |
5490 |
13 Jun 19 |
nicklas |
539 |
else |
5804 |
08 Jan 20 |
nicklas |
540 |
{} |
5490 |
13 Jun 19 |
nicklas |
541 |
|
5490 |
13 Jun 19 |
nicklas |
542 |
Reads sum = sumReads.get(libName); |
5490 |
13 Jun 19 |
nicklas |
543 |
if (sum == null) |
5490 |
13 Jun 19 |
nicklas |
544 |
{ |
5490 |
13 Jun 19 |
nicklas |
545 |
sum = new Reads(libName); |
5490 |
13 Jun 19 |
nicklas |
546 |
sumReads.put(libName, sum); |
5490 |
13 Jun 19 |
nicklas |
547 |
} |
5588 |
30 Aug 19 |
nicklas |
548 |
sum.add(laneNo, numReads, numPassedFilter); |
5588 |
30 Aug 19 |
nicklas |
549 |
sumDemux.add(laneNo, numReads, numPassedFilter); |
5490 |
13 Jun 19 |
nicklas |
550 |
} |
5619 |
20 Sep 19 |
nicklas |
551 |
|
5619 |
20 Sep 19 |
nicklas |
552 |
if (chgrpWarnings.containsKey(libName)) |
5619 |
20 Sep 19 |
nicklas |
553 |
{ |
5619 |
20 Sep 19 |
nicklas |
554 |
sumDemux.addWarning(libName+": "+chgrpWarnings.get(libName)); |
5619 |
20 Sep 19 |
nicklas |
555 |
} |
5490 |
13 Jun 19 |
nicklas |
556 |
} |
5490 |
13 Jun 19 |
nicklas |
557 |
} |
5490 |
13 Jun 19 |
nicklas |
558 |
} |
5490 |
13 Jun 19 |
nicklas |
559 |
catch (IOException ex) |
5490 |
13 Jun 19 |
nicklas |
560 |
{ |
5490 |
13 Jun 19 |
nicklas |
561 |
logger.error(ex.getMessage(), ex); |
5490 |
13 Jun 19 |
nicklas |
562 |
throw new RuntimeException(ex); |
5490 |
13 Jun 19 |
nicklas |
563 |
} |
5490 |
13 Jun 19 |
nicklas |
564 |
|
5490 |
13 Jun 19 |
nicklas |
// Parse the skipped_tiles.txt file |
5490 |
13 Jun 19 |
nicklas |
566 |
Pattern sectionPattern = Pattern.compile("\\[(.*)\\]"); |
5490 |
13 Jun 19 |
nicklas |
567 |
Pattern barcodeFilePattern = Pattern.compile(".*(\\d+)_(.+)_barcode.txt"); |
5490 |
13 Jun 19 |
nicklas |
568 |
int lineNo = 0; |
5490 |
13 Jun 19 |
nicklas |
569 |
Reads currentDemux = null; |
5490 |
13 Jun 19 |
nicklas |
570 |
for (String line : skippedTiles.split("\n")) |
5490 |
13 Jun 19 |
nicklas |
571 |
{ |
5490 |
13 Jun 19 |
nicklas |
572 |
lineNo++; |
5490 |
13 Jun 19 |
nicklas |
573 |
Matcher m = sectionPattern.matcher(line); |
5490 |
13 Jun 19 |
nicklas |
574 |
if (m.matches()) |
5490 |
13 Jun 19 |
nicklas |
575 |
{ |
5490 |
13 Jun 19 |
nicklas |
576 |
String demuxName = m.group(1); |
5490 |
13 Jun 19 |
nicklas |
577 |
currentDemux = sumReads.get(demuxName); |
5490 |
13 Jun 19 |
nicklas |
578 |
if (currentDemux == null) |
5490 |
13 Jun 19 |
nicklas |
579 |
{ |
5490 |
13 Jun 19 |
nicklas |
580 |
logger.error("At line " + lineNo + ": Found skipped tiles section for '" + demuxName + "' but not demultiplex metrics"); |
5490 |
13 Jun 19 |
nicklas |
581 |
} |
5490 |
13 Jun 19 |
nicklas |
582 |
continue; |
5490 |
13 Jun 19 |
nicklas |
583 |
} |
5490 |
13 Jun 19 |
nicklas |
584 |
m = barcodeFilePattern.matcher(line); |
5490 |
13 Jun 19 |
nicklas |
585 |
if (m.matches()) |
5490 |
13 Jun 19 |
nicklas |
586 |
{ |
5490 |
13 Jun 19 |
nicklas |
587 |
if (currentDemux == null) |
5490 |
13 Jun 19 |
nicklas |
588 |
{ |
5490 |
13 Jun 19 |
nicklas |
589 |
logger.error("At line " + lineNo + ": Found skipped tiles data but has not found a demux name"); |
5490 |
13 Jun 19 |
nicklas |
590 |
} |
5490 |
13 Jun 19 |
nicklas |
591 |
else |
5490 |
13 Jun 19 |
nicklas |
592 |
{ |
5490 |
13 Jun 19 |
nicklas |
593 |
int laneNo = Values.getInt(m.group(1)); |
5490 |
13 Jun 19 |
nicklas |
594 |
String tileNo = m.group(2); |
5490 |
13 Jun 19 |
nicklas |
595 |
currentDemux.addSkippedTile(laneNo, tileNo); |
5490 |
13 Jun 19 |
nicklas |
596 |
if (logger.isDebugEnabled()) |
5490 |
13 Jun 19 |
nicklas |
597 |
{ |
5490 |
13 Jun 19 |
nicklas |
598 |
logger.debug("Skipped tile: " + currentDemux.libName + "; lane=" + laneNo + "; tile=" + tileNo); |
5490 |
13 Jun 19 |
nicklas |
599 |
} |
5490 |
13 Jun 19 |
nicklas |
600 |
} |
5490 |
13 Jun 19 |
nicklas |
601 |
} |
5490 |
13 Jun 19 |
nicklas |
602 |
} |
5490 |
13 Jun 19 |
nicklas |
603 |
|
5490 |
13 Jun 19 |
nicklas |
// Parse the files.out file |
5490 |
13 Jun 19 |
nicklas |
605 |
Reads currentLib = null; |
5490 |
13 Jun 19 |
nicklas |
606 |
lineNo = 0; |
5490 |
13 Jun 19 |
nicklas |
607 |
for (String line : files.split("\n")) |
5490 |
13 Jun 19 |
nicklas |
608 |
{ |
5490 |
13 Jun 19 |
nicklas |
609 |
lineNo++; |
5490 |
13 Jun 19 |
nicklas |
610 |
Matcher m = sectionPattern.matcher(line); |
5490 |
13 Jun 19 |
nicklas |
611 |
if (m.matches()) |
5490 |
13 Jun 19 |
nicklas |
612 |
{ |
5490 |
13 Jun 19 |
nicklas |
613 |
String libName = m.group(1); |
5490 |
13 Jun 19 |
nicklas |
614 |
currentLib = sumReads.get(libName); |
5490 |
13 Jun 19 |
nicklas |
615 |
if (currentLib == null) |
5490 |
13 Jun 19 |
nicklas |
616 |
{ |
5490 |
13 Jun 19 |
nicklas |
617 |
logger.error("At line " + lineNo + ": Found files section for lib '" + libName + "' but not demultiplex metrics"); |
5490 |
13 Jun 19 |
nicklas |
618 |
} |
5490 |
13 Jun 19 |
nicklas |
619 |
continue; |
5490 |
13 Jun 19 |
nicklas |
620 |
} |
5490 |
13 Jun 19 |
nicklas |
621 |
else |
5490 |
13 Jun 19 |
nicklas |
622 |
{ |
5490 |
13 Jun 19 |
nicklas |
623 |
if (currentLib == null) |
5490 |
13 Jun 19 |
nicklas |
624 |
{ |
5490 |
13 Jun 19 |
nicklas |
625 |
logger.error("At line " + lineNo + ": Found file data but has not found a library name"); |
5490 |
13 Jun 19 |
nicklas |
626 |
} |
5490 |
13 Jun 19 |
nicklas |
627 |
else |
5490 |
13 Jun 19 |
nicklas |
628 |
{ |
5490 |
13 Jun 19 |
nicklas |
629 |
currentLib.addFile(line); |
5490 |
13 Jun 19 |
nicklas |
630 |
if (logger.isDebugEnabled()) |
5490 |
13 Jun 19 |
nicklas |
631 |
{ |
5490 |
13 Jun 19 |
nicklas |
632 |
logger.debug("File: " + currentLib.libName + "; " + line); |
5490 |
13 Jun 19 |
nicklas |
633 |
} |
5490 |
13 Jun 19 |
nicklas |
634 |
} |
5490 |
13 Jun 19 |
nicklas |
635 |
} |
5490 |
13 Jun 19 |
nicklas |
636 |
} |
5490 |
13 Jun 19 |
nicklas |
637 |
|
5490 |
13 Jun 19 |
nicklas |
638 |
DbControl dc = null; |
5490 |
13 Jun 19 |
nicklas |
639 |
Reads total = new Reads(null); |
5490 |
13 Jun 19 |
nicklas |
640 |
if (logger.isDebugEnabled()) |
5490 |
13 Jun 19 |
nicklas |
641 |
{ |
5490 |
13 Jun 19 |
nicklas |
642 |
logger.debug("Got read information for " + sumReads.size() + " libraries"); |
5490 |
13 Jun 19 |
nicklas |
643 |
} |
5490 |
13 Jun 19 |
nicklas |
644 |
try |
5490 |
13 Jun 19 |
nicklas |
645 |
{ |
6599 |
22 Feb 22 |
nicklas |
646 |
dc = sc.newDbControl("Reggie: MIPS demux completed handler"); |
5490 |
13 Jun 19 |
nicklas |
// Save metric file to BASE |
5490 |
13 Jun 19 |
nicklas |
648 |
Directory metricsDir = Directory.getNew(dc, new Path(DEMULTIPLEX_METRICS_DIR, Path.Type.DIRECTORY)); |
5490 |
13 Jun 19 |
nicklas |
649 |
|
5490 |
13 Jun 19 |
nicklas |
650 |
for (String demuxName : demuxNames) |
5490 |
13 Jun 19 |
nicklas |
651 |
{ |
5490 |
13 Jun 19 |
nicklas |
652 |
Reads demuxTotal = sumReads.remove(demuxName); |
5490 |
13 Jun 19 |
nicklas |
653 |
Reads demuxN = sumReads.remove(demuxName+".N"); |
5490 |
13 Jun 19 |
nicklas |
654 |
Reads demuxUnused = sumReads.remove(demuxName+".U"); |
5490 |
13 Jun 19 |
nicklas |
655 |
Reads demuxIgnore = sumReads.remove(demuxName+".I"); |
5490 |
13 Jun 19 |
nicklas |
656 |
|
5490 |
13 Jun 19 |
nicklas |
657 |
DemuxedSequences demux = DemuxedSequences.getByName(dc, demuxName); |
5490 |
13 Jun 19 |
nicklas |
658 |
DerivedBioAssay dx = demux.getItem(); |
5490 |
13 Jun 19 |
nicklas |
659 |
Annotationtype.READS.setAnnotationValue(dc, dx, demuxTotal.reads); |
5490 |
13 Jun 19 |
nicklas |
660 |
Annotationtype.PF_READS.setAnnotationValue(dc, dx, demuxTotal.passedFilter); |
5490 |
13 Jun 19 |
nicklas |
661 |
|
5490 |
13 Jun 19 |
nicklas |
662 |
Annotationtype.PF_NNNN_PCT.setAnnotationValue(dc, dx, demuxN == null ? 0 : 100f * demuxN.passedFilter / demuxTotal.passedFilter); |
5490 |
13 Jun 19 |
nicklas |
663 |
Annotationtype.PF_UNUSED_PCT.setAnnotationValue(dc, dx, demuxUnused == null ? 0 : 100f * demuxUnused.passedFilter / demuxTotal.passedFilter); |
5490 |
13 Jun 19 |
nicklas |
664 |
|
5490 |
13 Jun 19 |
nicklas |
665 |
if (demuxTotal.skippedTiles.size() > 0) |
5490 |
13 Jun 19 |
nicklas |
666 |
{ |
5490 |
13 Jun 19 |
nicklas |
667 |
Annotationtype.SKIPPED_TILES.setAnnotationValues(dc, dx, new ArrayList<String>(demuxTotal.skippedTiles)); |
5490 |
13 Jun 19 |
nicklas |
668 |
if (demuxTotal.skippedTiles.size() >= MAX_SKIPPED_TILES) |
5490 |
13 Jun 19 |
nicklas |
669 |
{ |
5490 |
13 Jun 19 |
nicklas |
670 |
demuxTotal.addWarning(demuxTotal.skippedTiles.size() + " tiles skipped due to 0-size barcode file"); |
5490 |
13 Jun 19 |
nicklas |
671 |
} |
5490 |
13 Jun 19 |
nicklas |
672 |
} |
5490 |
13 Jun 19 |
nicklas |
673 |
|
5490 |
13 Jun 19 |
nicklas |
674 |
if (demuxTotal.warnings.size() > 0) |
5490 |
13 Jun 19 |
nicklas |
675 |
{ |
5490 |
13 Jun 19 |
nicklas |
676 |
Annotationtype.DEMUX_WARNINGS.setAnnotationValues(dc, dx, demuxTotal.warnings); |
5490 |
13 Jun 19 |
nicklas |
677 |
total.warnings.addAll(demuxTotal.warnings); |
5490 |
13 Jun 19 |
nicklas |
678 |
} |
5490 |
13 Jun 19 |
nicklas |
679 |
|
5490 |
13 Jun 19 |
nicklas |
680 |
File metricsFile = File.getFile(dc, metricsDir, demuxName+".csv", true); |
5490 |
13 Jun 19 |
nicklas |
681 |
metricsFile.setMimeType("text/plain"); |
5490 |
13 Jun 19 |
nicklas |
682 |
metricsFile.setCharacterSet("UTF-8"); |
5490 |
13 Jun 19 |
nicklas |
683 |
if (!metricsFile.isInDatabase()) |
5490 |
13 Jun 19 |
nicklas |
684 |
{ |
5490 |
13 Jun 19 |
nicklas |
685 |
dc.saveItem(metricsFile); |
5490 |
13 Jun 19 |
nicklas |
686 |
} |
5800 |
19 Dec 19 |
nicklas |
687 |
String metricsForDemux = demuxNames.size() > 1 ? extractSections(metrics, demuxName) : metrics; |
5800 |
19 Dec 19 |
nicklas |
688 |
metricsFile.upload(new ByteArrayInputStream(metricsForDemux.getBytes(Charset.forName("UTF-8"))), false); |
5490 |
13 Jun 19 |
nicklas |
689 |
|
5490 |
13 Jun 19 |
nicklas |
690 |
AnyToAny dxMetrics = AnyToAny.getNew(dc, dx, metricsFile, "DemultiplexMetrics", true); |
5490 |
13 Jun 19 |
nicklas |
691 |
dc.saveItem(dxMetrics); |
5490 |
13 Jun 19 |
nicklas |
692 |
} |
5490 |
13 Jun 19 |
nicklas |
693 |
|
5490 |
13 Jun 19 |
nicklas |
694 |
DataFileType fastqData = Datafiletype.FASTQ.load(dc); |
5490 |
13 Jun 19 |
nicklas |
695 |
ItemSubtype fastqType = fastqData.getGenericType(); |
5490 |
13 Jun 19 |
nicklas |
696 |
FileServer projectArchive = Fileserver.PROJECT_ARCHIVE.load(dc); |
5490 |
13 Jun 19 |
nicklas |
697 |
FileServer externalArchive = Fileserver.EXTERNAL_ARCHIVE.load(dc); |
5490 |
13 Jun 19 |
nicklas |
698 |
|
5588 |
30 Aug 19 |
nicklas |
699 |
Pattern findLaneInFilename = Pattern.compile("_L(\\d)[_.]"); // Find '_L' followed by lane number and '.' or '_') |
5588 |
30 Aug 19 |
nicklas |
700 |
|
5490 |
13 Jun 19 |
nicklas |
701 |
for (Reads r : sumReads.values()) |
5490 |
13 Jun 19 |
nicklas |
702 |
{ |
5490 |
13 Jun 19 |
nicklas |
703 |
if (logger.isDebugEnabled()) |
5490 |
13 Jun 19 |
nicklas |
704 |
{ |
5490 |
13 Jun 19 |
nicklas |
705 |
logger.debug(r.libName + "; " + r.reads + "; " + r.passedFilter); |
5490 |
13 Jun 19 |
nicklas |
706 |
} |
5490 |
13 Jun 19 |
nicklas |
707 |
|
5490 |
13 Jun 19 |
nicklas |
708 |
MergedSequences merged = MergedSequences.getByName(dc, r.libName); |
5490 |
13 Jun 19 |
nicklas |
709 |
if (merged != null) |
5490 |
13 Jun 19 |
nicklas |
710 |
{ |
5490 |
13 Jun 19 |
nicklas |
711 |
DerivedBioAssay m = merged.getItem(); |
5490 |
13 Jun 19 |
nicklas |
712 |
Software mergeSoftware = m.getSoftware(); |
5490 |
13 Jun 19 |
nicklas |
713 |
String mergeParameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, mergeSoftware); |
5490 |
13 Jun 19 |
nicklas |
714 |
int bowtie_fragment_count_limit = Values.getInt(cfg.getConfig("demux/bowtie-fragment-count-limit", mergeParameterSet, "20000")); |
5490 |
13 Jun 19 |
nicklas |
715 |
|
5490 |
13 Jun 19 |
nicklas |
716 |
Annotationtype.READS.setAnnotationValue(dc, m, r.reads); |
5490 |
13 Jun 19 |
nicklas |
717 |
Annotationtype.PF_READS.setAnnotationValue(dc, m, r.passedFilter); |
5490 |
13 Jun 19 |
nicklas |
718 |
|
5490 |
13 Jun 19 |
nicklas |
// Create FASTQ file links |
5553 |
12 Aug 19 |
nicklas |
720 |
boolean useExternalProjectArchive = Reggie.isExternalItem(merged.getName()); |
5490 |
13 Jun 19 |
nicklas |
721 |
FileServer fileArchive = useExternalProjectArchive ? externalArchive : projectArchive; |
5490 |
13 Jun 19 |
nicklas |
722 |
String analysisDir = useExternalProjectArchive ? Reggie.EXTERNAL_ANALYSIS_DIR : Reggie.SECONDARY_ANALYSIS_DIR; |
5490 |
13 Jun 19 |
nicklas |
723 |
|
5490 |
13 Jun 19 |
nicklas |
724 |
String dataFilesFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, m); |
5490 |
13 Jun 19 |
nicklas |
725 |
String baseFolder = Reggie.convertDataFilesFolderToBaseFolder(dataFilesFolder); |
5490 |
13 Jun 19 |
nicklas |
726 |
Directory localDataDir = Directory.getNew(dc, new Path(analysisDir+baseFolder, Path.Type.DIRECTORY)); |
5588 |
30 Aug 19 |
nicklas |
727 |
for (String path : r.files) |
5490 |
13 Jun 19 |
nicklas |
728 |
{ |
5588 |
30 Aug 19 |
nicklas |
729 |
String fileName = path.substring(path.lastIndexOf("/")+1); |
5588 |
30 Aug 19 |
nicklas |
730 |
File f = File.getFile(dc, localDataDir, fileName, true); |
5490 |
13 Jun 19 |
nicklas |
731 |
f.setFileServer(fileArchive); |
5588 |
30 Aug 19 |
nicklas |
732 |
String fileUrl = "sftp://" + fileArchive.getHost() + dataFilesFolder + "/" + fileName; |
5490 |
13 Jun 19 |
nicklas |
733 |
try |
5490 |
13 Jun 19 |
nicklas |
734 |
{ |
5490 |
13 Jun 19 |
nicklas |
735 |
f.setUrl(fileUrl, true); |
5490 |
13 Jun 19 |
nicklas |
736 |
} |
5490 |
13 Jun 19 |
nicklas |
737 |
catch (RuntimeException ex) |
5490 |
13 Jun 19 |
nicklas |
738 |
{ |
5490 |
13 Jun 19 |
nicklas |
739 |
f.setUrl(fileUrl, false); |
5490 |
13 Jun 19 |
nicklas |
740 |
} |
5490 |
13 Jun 19 |
nicklas |
741 |
if (!f.isInDatabase()) |
5490 |
13 Jun 19 |
nicklas |
742 |
{ |
5490 |
13 Jun 19 |
nicklas |
743 |
dc.saveItem(f); |
5490 |
13 Jun 19 |
nicklas |
744 |
} |
5588 |
30 Aug 19 |
nicklas |
745 |
|
5588 |
30 Aug 19 |
nicklas |
// Try to match the lane the file comes from and get the number of reads/passed filter |
5588 |
30 Aug 19 |
nicklas |
747 |
Matcher lane = findLaneInFilename.matcher(fileName); |
5588 |
30 Aug 19 |
nicklas |
748 |
if (lane.find()) |
5588 |
30 Aug 19 |
nicklas |
749 |
{ |
5588 |
30 Aug 19 |
nicklas |
750 |
int laneNo = Values.getInt(lane.group(1)); |
5588 |
30 Aug 19 |
nicklas |
751 |
f.setDescription(r.readsPerLane[laneNo] + " READS; " + r.passedFilterPerLane[laneNo] + " PF_READS"); |
5588 |
30 Aug 19 |
nicklas |
752 |
} |
5588 |
30 Aug 19 |
nicklas |
753 |
else |
5588 |
30 Aug 19 |
nicklas |
754 |
{ |
5588 |
30 Aug 19 |
nicklas |
755 |
f.setDescription(r.reads + " READS; " + r.passedFilter + " PF_READS"); |
5588 |
30 Aug 19 |
nicklas |
756 |
} |
5588 |
30 Aug 19 |
nicklas |
757 |
|
5588 |
30 Aug 19 |
nicklas |
758 |
if (fileName.endsWith(".fastq.gz")) |
5588 |
30 Aug 19 |
nicklas |
759 |
{ |
5588 |
30 Aug 19 |
nicklas |
760 |
f.setItemSubtype(fastqType); |
5588 |
30 Aug 19 |
nicklas |
761 |
FileSetMember member = m.getFileSet().addMember(f, fastqData); |
5588 |
30 Aug 19 |
nicklas |
762 |
} |
5588 |
30 Aug 19 |
nicklas |
763 |
else |
5588 |
30 Aug 19 |
nicklas |
764 |
{ |
5588 |
30 Aug 19 |
nicklas |
765 |
AnyToAny link = AnyToAny.getNewOrExisting(dc, m, fileName, f, true); |
5588 |
30 Aug 19 |
nicklas |
766 |
if (!link.isInDatabase()) dc.saveItem(link); |
5588 |
30 Aug 19 |
nicklas |
767 |
if (fileName.endsWith(".rg")) // "Read group" file |
5588 |
30 Aug 19 |
nicklas |
768 |
{ |
5588 |
30 Aug 19 |
nicklas |
769 |
f.setMimeType("text/plain"); |
5588 |
30 Aug 19 |
nicklas |
770 |
} |
5588 |
30 Aug 19 |
nicklas |
771 |
} |
5490 |
13 Jun 19 |
nicklas |
772 |
} |
5490 |
13 Jun 19 |
nicklas |
773 |
|
5490 |
13 Jun 19 |
nicklas |
774 |
total.reads += r.reads; |
5490 |
13 Jun 19 |
nicklas |
775 |
total.passedFilter += r.passedFilter; |
5490 |
13 Jun 19 |
nicklas |
776 |
} |
5490 |
13 Jun 19 |
nicklas |
777 |
} |
5490 |
13 Jun 19 |
nicklas |
778 |
dc.commit(); |
5490 |
13 Jun 19 |
nicklas |
779 |
} |
5490 |
13 Jun 19 |
nicklas |
780 |
finally |
5490 |
13 Jun 19 |
nicklas |
781 |
{ |
5490 |
13 Jun 19 |
nicklas |
782 |
if (dc != null) dc.close(); |
5490 |
13 Jun 19 |
nicklas |
783 |
} |
5490 |
13 Jun 19 |
nicklas |
784 |
|
5490 |
13 Jun 19 |
nicklas |
785 |
return total; |
5490 |
13 Jun 19 |
nicklas |
786 |
} |
5800 |
19 Dec 19 |
nicklas |
787 |
|
5800 |
19 Dec 19 |
nicklas |
788 |
/** |
5800 |
19 Dec 19 |
nicklas |
Split the metrics file so that only sections related to the given |
5800 |
19 Dec 19 |
nicklas |
demux/flow cell remains. |
5800 |
19 Dec 19 |
nicklas |
791 |
*/ |
5800 |
19 Dec 19 |
nicklas |
792 |
private String extractSections(String allMetrics, String demuxName) |
5800 |
19 Dec 19 |
nicklas |
793 |
{ |
5800 |
19 Dec 19 |
nicklas |
794 |
|
5800 |
19 Dec 19 |
nicklas |
795 |
String[] lines = allMetrics.split("\n"); |
5800 |
19 Dec 19 |
nicklas |
796 |
StringBuilder extracted = new StringBuilder(); |
5800 |
19 Dec 19 |
nicklas |
797 |
boolean clone = false; |
5800 |
19 Dec 19 |
nicklas |
798 |
for (String line : lines) |
5800 |
19 Dec 19 |
nicklas |
799 |
{ |
5800 |
19 Dec 19 |
nicklas |
800 |
if (line.startsWith("# [") && line.endsWith("]")) |
5800 |
19 Dec 19 |
nicklas |
801 |
{ |
5800 |
19 Dec 19 |
nicklas |
// This is a new section -- see if it matches our wanted demux |
5800 |
19 Dec 19 |
nicklas |
803 |
clone = line.contains(demuxName); |
5800 |
19 Dec 19 |
nicklas |
804 |
} |
5800 |
19 Dec 19 |
nicklas |
805 |
if (clone) extracted.append(line).append("\n"); |
5800 |
19 Dec 19 |
nicklas |
806 |
} |
5800 |
19 Dec 19 |
nicklas |
807 |
return extracted.toString(); |
5800 |
19 Dec 19 |
nicklas |
808 |
} |
5800 |
19 Dec 19 |
nicklas |
809 |
|
5490 |
13 Jun 19 |
nicklas |
810 |
} |
5490 |
13 Jun 19 |
nicklas |
811 |
|
5490 |
13 Jun 19 |
nicklas |
812 |
private static class Reads |
5490 |
13 Jun 19 |
nicklas |
813 |
{ |
5490 |
13 Jun 19 |
nicklas |
814 |
final String libName; |
5490 |
13 Jun 19 |
nicklas |
815 |
final List<String> warnings; |
5490 |
13 Jun 19 |
nicklas |
816 |
final List<String> files; |
5490 |
13 Jun 19 |
nicklas |
817 |
final Set<String> skippedTiles; |
5490 |
13 Jun 19 |
nicklas |
818 |
long reads = 0; |
5490 |
13 Jun 19 |
nicklas |
819 |
long passedFilter = 0; |
5588 |
30 Aug 19 |
nicklas |
820 |
final long[] readsPerLane; |
5588 |
30 Aug 19 |
nicklas |
821 |
final long[] passedFilterPerLane; |
5490 |
13 Jun 19 |
nicklas |
822 |
|
5490 |
13 Jun 19 |
nicklas |
823 |
Reads(String libName) |
5490 |
13 Jun 19 |
nicklas |
824 |
{ |
5490 |
13 Jun 19 |
nicklas |
825 |
this.libName = libName; |
5490 |
13 Jun 19 |
nicklas |
826 |
this.warnings = new ArrayList<String>(); |
5490 |
13 Jun 19 |
nicklas |
827 |
this.files = new ArrayList<String>(); |
5490 |
13 Jun 19 |
nicklas |
828 |
this.skippedTiles = new TreeSet<String>(); |
5588 |
30 Aug 19 |
nicklas |
829 |
this.readsPerLane = new long[10]; // TODO -- 10 is safe but "ugly" since we only have 2, 4 or 8 lanes |
5588 |
30 Aug 19 |
nicklas |
830 |
this.passedFilterPerLane = new long[10]; |
5490 |
13 Jun 19 |
nicklas |
831 |
} |
5490 |
13 Jun 19 |
nicklas |
832 |
|
5588 |
30 Aug 19 |
nicklas |
833 |
void add(int lane, long reads, long passedFilter) |
5490 |
13 Jun 19 |
nicklas |
834 |
{ |
5490 |
13 Jun 19 |
nicklas |
835 |
this.reads += reads; |
5490 |
13 Jun 19 |
nicklas |
836 |
this.passedFilter += passedFilter; |
5588 |
30 Aug 19 |
nicklas |
837 |
this.readsPerLane[lane] += reads; |
5588 |
30 Aug 19 |
nicklas |
838 |
this.passedFilterPerLane[lane] += passedFilter; |
5490 |
13 Jun 19 |
nicklas |
839 |
} |
5490 |
13 Jun 19 |
nicklas |
840 |
|
5490 |
13 Jun 19 |
nicklas |
841 |
void addWarning(String warning) |
5490 |
13 Jun 19 |
nicklas |
842 |
{ |
5490 |
13 Jun 19 |
nicklas |
843 |
this.warnings.add(warning); |
5490 |
13 Jun 19 |
nicklas |
844 |
} |
5490 |
13 Jun 19 |
nicklas |
845 |
|
5490 |
13 Jun 19 |
nicklas |
846 |
void addFile(String file) |
5490 |
13 Jun 19 |
nicklas |
847 |
{ |
5490 |
13 Jun 19 |
nicklas |
848 |
this.files.add(file); |
5490 |
13 Jun 19 |
nicklas |
849 |
} |
5490 |
13 Jun 19 |
nicklas |
850 |
|
5490 |
13 Jun 19 |
nicklas |
851 |
void addSkippedTile(int lane, String tileNo) |
5490 |
13 Jun 19 |
nicklas |
852 |
{ |
5490 |
13 Jun 19 |
nicklas |
853 |
skippedTiles.add(lane + ":" + tileNo); |
5490 |
13 Jun 19 |
nicklas |
854 |
} |
5490 |
13 Jun 19 |
nicklas |
855 |
|
5490 |
13 Jun 19 |
nicklas |
856 |
@Override |
5490 |
13 Jun 19 |
nicklas |
857 |
public String toString() |
5490 |
13 Jun 19 |
nicklas |
858 |
{ |
5490 |
13 Jun 19 |
nicklas |
859 |
return libName+";"+reads+";"+passedFilter; |
5490 |
13 Jun 19 |
nicklas |
860 |
} |
5490 |
13 Jun 19 |
nicklas |
861 |
} |
5486 |
12 Jun 19 |
nicklas |
862 |
} |