4288 |
09 Jan 17 |
nicklas |
1 |
package net.sf.basedb.reggie.grid; |
3714 |
20 Jan 16 |
nicklas |
2 |
|
5492 |
13 Jun 19 |
nicklas |
3 |
import java.io.StringWriter; |
3714 |
20 Jan 16 |
nicklas |
4 |
import java.util.ArrayList; |
6280 |
09 Jun 21 |
nicklas |
5 |
import java.util.Collections; |
5492 |
13 Jun 19 |
nicklas |
6 |
import java.util.HashSet; |
3714 |
20 Jan 16 |
nicklas |
7 |
import java.util.List; |
5492 |
13 Jun 19 |
nicklas |
8 |
import java.util.Map; |
5492 |
13 Jun 19 |
nicklas |
9 |
import java.util.Set; |
3714 |
20 Jan 16 |
nicklas |
10 |
|
3714 |
20 Jan 16 |
nicklas |
11 |
import org.json.simple.JSONArray; |
3714 |
20 Jan 16 |
nicklas |
12 |
|
5492 |
13 Jun 19 |
nicklas |
13 |
import net.sf.basedb.core.BioMaterialEventSource; |
5492 |
13 Jun 19 |
nicklas |
14 |
import net.sf.basedb.core.BooleanParameterType; |
3714 |
20 Jan 16 |
nicklas |
15 |
import net.sf.basedb.core.DbControl; |
3714 |
20 Jan 16 |
nicklas |
16 |
import net.sf.basedb.core.DerivedBioAssay; |
5492 |
13 Jun 19 |
nicklas |
17 |
import net.sf.basedb.core.Extract; |
3714 |
20 Jan 16 |
nicklas |
18 |
import net.sf.basedb.core.Hardware; |
5492 |
13 Jun 19 |
nicklas |
19 |
import net.sf.basedb.core.IntegerParameterType; |
5492 |
13 Jun 19 |
nicklas |
20 |
import net.sf.basedb.core.ItemQuery; |
3714 |
20 Jan 16 |
nicklas |
21 |
import net.sf.basedb.core.Job; |
5492 |
13 Jun 19 |
nicklas |
22 |
import net.sf.basedb.core.PhysicalBioAssay; |
3714 |
20 Jan 16 |
nicklas |
23 |
import net.sf.basedb.core.Protocol; |
5492 |
13 Jun 19 |
nicklas |
24 |
import net.sf.basedb.core.SessionControl; |
3714 |
20 Jan 16 |
nicklas |
25 |
import net.sf.basedb.core.Software; |
5492 |
13 Jun 19 |
nicklas |
26 |
import net.sf.basedb.core.SpecialQuery; |
5492 |
13 Jun 19 |
nicklas |
27 |
import net.sf.basedb.core.StringParameterType; |
5492 |
13 Jun 19 |
nicklas |
28 |
import net.sf.basedb.core.Type; |
5492 |
13 Jun 19 |
nicklas |
29 |
import net.sf.basedb.core.query.Expressions; |
5492 |
13 Jun 19 |
nicklas |
30 |
import net.sf.basedb.core.query.Hql; |
5492 |
13 Jun 19 |
nicklas |
31 |
import net.sf.basedb.core.query.Orders; |
5492 |
13 Jun 19 |
nicklas |
32 |
import net.sf.basedb.core.query.Restrictions; |
5492 |
13 Jun 19 |
nicklas |
33 |
import net.sf.basedb.opengrid.JobDefinition; |
4274 |
19 Dec 16 |
nicklas |
34 |
import net.sf.basedb.opengrid.OpenGridCluster; |
5492 |
13 Jun 19 |
nicklas |
35 |
import net.sf.basedb.opengrid.ScriptBuilder; |
5492 |
13 Jun 19 |
nicklas |
36 |
import net.sf.basedb.opengrid.filetransfer.StringUploadSource; |
5492 |
13 Jun 19 |
nicklas |
37 |
import net.sf.basedb.reggie.Reggie; |
3714 |
20 Jan 16 |
nicklas |
38 |
import net.sf.basedb.reggie.dao.Annotationtype; |
5791 |
16 Dec 19 |
nicklas |
39 |
import net.sf.basedb.reggie.dao.DoNotUse; |
5492 |
13 Jun 19 |
nicklas |
40 |
import net.sf.basedb.reggie.dao.FlowCell; |
5492 |
13 Jun 19 |
nicklas |
41 |
import net.sf.basedb.reggie.dao.Library; |
5492 |
13 Jun 19 |
nicklas |
42 |
import net.sf.basedb.reggie.dao.MergedSequences; |
5492 |
13 Jun 19 |
nicklas |
43 |
import net.sf.basedb.reggie.dao.Pipeline; |
3714 |
20 Jan 16 |
nicklas |
44 |
import net.sf.basedb.reggie.dao.SequencingRun; |
5492 |
13 Jun 19 |
nicklas |
45 |
import net.sf.basedb.reggie.dao.Subtype; |
5492 |
13 Jun 19 |
nicklas |
46 |
import net.sf.basedb.reggie.plugins.BarcodeFilesForDemuxExporter; |
5492 |
13 Jun 19 |
nicklas |
47 |
import net.sf.basedb.util.Values; |
3714 |
20 Jan 16 |
nicklas |
48 |
|
3714 |
20 Jan 16 |
nicklas |
49 |
/** |
3714 |
20 Jan 16 |
nicklas |
Helper class for creating items needed for demuxing and merging |
5486 |
12 Jun 19 |
nicklas |
sequencing data. Subclasses are responsible for generating the |
5486 |
12 Jun 19 |
nicklas |
demux script and send it to the cluster for execution. |
3714 |
20 Jan 16 |
nicklas |
53 |
|
3714 |
20 Jan 16 |
nicklas |
@author nicklas |
5486 |
12 Jun 19 |
nicklas |
@since 4.1, 4.23 |
3714 |
20 Jan 16 |
nicklas |
56 |
*/ |
5486 |
12 Jun 19 |
nicklas |
57 |
public abstract class DemuxJobCreator |
6674 |
11 Apr 22 |
nicklas |
58 |
extends AbstractJobCreator |
3714 |
20 Jan 16 |
nicklas |
59 |
{ |
3714 |
20 Jan 16 |
nicklas |
60 |
|
3714 |
20 Jan 16 |
nicklas |
61 |
|
3714 |
20 Jan 16 |
nicklas |
62 |
/** |
3714 |
20 Jan 16 |
nicklas |
Max number of skipped tiles in a demux before a warning is issued. |
3714 |
20 Jan 16 |
nicklas |
64 |
*/ |
3714 |
20 Jan 16 |
nicklas |
65 |
public static final int MAX_SKIPPED_TILES = 10; |
3714 |
20 Jan 16 |
nicklas |
66 |
|
3714 |
20 Jan 16 |
nicklas |
67 |
/** |
3714 |
20 Jan 16 |
nicklas |
Path to the directory on the BASE file system where demultiplex metrics |
3714 |
20 Jan 16 |
nicklas |
file are saved. |
3714 |
20 Jan 16 |
nicklas |
70 |
*/ |
3714 |
20 Jan 16 |
nicklas |
71 |
public static final String DEMULTIPLEX_METRICS_DIR = "/home/SCANB/DemultiplexMetrics"; |
3714 |
20 Jan 16 |
nicklas |
72 |
|
5492 |
13 Jun 19 |
nicklas |
73 |
protected final Pipeline pipeline; |
5492 |
13 Jun 19 |
nicklas |
74 |
|
5486 |
12 Jun 19 |
nicklas |
75 |
protected Software demuxSoftware; |
5486 |
12 Jun 19 |
nicklas |
76 |
protected Protocol demuxProtocol; |
3714 |
20 Jan 16 |
nicklas |
77 |
|
5486 |
12 Jun 19 |
nicklas |
78 |
protected Software mergeSoftware; |
5486 |
12 Jun 19 |
nicklas |
79 |
protected Protocol mergeProtocol; |
3714 |
20 Jan 16 |
nicklas |
80 |
|
5492 |
13 Jun 19 |
nicklas |
81 |
protected DemuxJobCreator(Pipeline pipeline) |
5492 |
13 Jun 19 |
nicklas |
82 |
{ |
5492 |
13 Jun 19 |
nicklas |
83 |
this.pipeline = pipeline; |
5492 |
13 Jun 19 |
nicklas |
84 |
} |
3714 |
20 Jan 16 |
nicklas |
85 |
|
3714 |
20 Jan 16 |
nicklas |
86 |
|
3714 |
20 Jan 16 |
nicklas |
87 |
/** |
3714 |
20 Jan 16 |
nicklas |
Set the software item to set on created DemuxedSequences. |
3714 |
20 Jan 16 |
nicklas |
@see DerivedBioAssay#setSoftware(Software) |
3714 |
20 Jan 16 |
nicklas |
90 |
*/ |
3714 |
20 Jan 16 |
nicklas |
91 |
public void setDemuxSoftware(Software software) |
3714 |
20 Jan 16 |
nicklas |
92 |
{ |
3714 |
20 Jan 16 |
nicklas |
93 |
this.demuxSoftware = software; |
3714 |
20 Jan 16 |
nicklas |
94 |
} |
3714 |
20 Jan 16 |
nicklas |
95 |
|
3714 |
20 Jan 16 |
nicklas |
96 |
/** |
3714 |
20 Jan 16 |
nicklas |
Set the protocol item to set on created DemuxedSequences. |
3714 |
20 Jan 16 |
nicklas |
@see DerivedBioAssay#setProtocol(Protocol) |
3714 |
20 Jan 16 |
nicklas |
99 |
*/ |
3714 |
20 Jan 16 |
nicklas |
100 |
public void setDemuxProtocol(Protocol protocol) |
3714 |
20 Jan 16 |
nicklas |
101 |
{ |
3714 |
20 Jan 16 |
nicklas |
102 |
this.demuxProtocol = protocol; |
3714 |
20 Jan 16 |
nicklas |
103 |
} |
3714 |
20 Jan 16 |
nicklas |
104 |
|
3714 |
20 Jan 16 |
nicklas |
105 |
/** |
3714 |
20 Jan 16 |
nicklas |
Set the software item to set on created MergedSequences. |
3714 |
20 Jan 16 |
nicklas |
@see DerivedBioAssay#setSoftware(Software) |
3714 |
20 Jan 16 |
nicklas |
108 |
*/ |
3714 |
20 Jan 16 |
nicklas |
109 |
public void setMergeSoftware(Software software) |
3714 |
20 Jan 16 |
nicklas |
110 |
{ |
3714 |
20 Jan 16 |
nicklas |
111 |
this.mergeSoftware = software; |
3714 |
20 Jan 16 |
nicklas |
112 |
} |
3714 |
20 Jan 16 |
nicklas |
113 |
|
3714 |
20 Jan 16 |
nicklas |
114 |
/** |
3714 |
20 Jan 16 |
nicklas |
Set the protocol item to set on created MergedSequences. |
3714 |
20 Jan 16 |
nicklas |
@see DerivedBioAssay#setProtocol(Protocol) |
3714 |
20 Jan 16 |
nicklas |
117 |
*/ |
3714 |
20 Jan 16 |
nicklas |
118 |
public void setMergeProtocol(Protocol protocol) |
3714 |
20 Jan 16 |
nicklas |
119 |
{ |
3714 |
20 Jan 16 |
nicklas |
120 |
this.mergeProtocol = protocol; |
3714 |
20 Jan 16 |
nicklas |
121 |
} |
3714 |
20 Jan 16 |
nicklas |
122 |
|
5486 |
12 Jun 19 |
nicklas |
123 |
public abstract Job createDemuxJob(DbControl dc, OpenGridCluster cluster, List<DemuxDefinition> demuxDefs, JSONArray jsonMessages); |
3714 |
20 Jan 16 |
nicklas |
124 |
|
3714 |
20 Jan 16 |
nicklas |
125 |
/** |
3714 |
20 Jan 16 |
nicklas |
Get sequencer name that is safe to use in picard. Use SERIAL_NUMBER if |
3714 |
20 Jan 16 |
nicklas |
present on the hardware item, otherwise the name. Remove characters |
3714 |
20 Jan 16 |
nicklas |
that are not letter, numbers or hyphen. |
3714 |
20 Jan 16 |
nicklas |
129 |
*/ |
5486 |
12 Jun 19 |
nicklas |
130 |
protected String getSequencerName(Hardware sequencer) |
3714 |
20 Jan 16 |
nicklas |
131 |
{ |
3714 |
20 Jan 16 |
nicklas |
132 |
String name = "Unknown"; |
3714 |
20 Jan 16 |
nicklas |
133 |
if (sequencer != null) |
3714 |
20 Jan 16 |
nicklas |
134 |
{ |
3714 |
20 Jan 16 |
nicklas |
135 |
name = (String)Annotationtype.SERIAL_NUMBER.getAnnotationValue(sequencer.getDbControl(), sequencer); |
3714 |
20 Jan 16 |
nicklas |
136 |
if (name == null) |
3714 |
20 Jan 16 |
nicklas |
137 |
{ |
3714 |
20 Jan 16 |
nicklas |
138 |
name = sequencer.getName(); |
3714 |
20 Jan 16 |
nicklas |
139 |
} |
3714 |
20 Jan 16 |
nicklas |
// Only keep letters and numbers and - |
3714 |
20 Jan 16 |
nicklas |
141 |
name = name.replaceAll("[^\\w\\d-]", ""); |
3714 |
20 Jan 16 |
nicklas |
142 |
} |
3714 |
20 Jan 16 |
nicklas |
143 |
return name; |
3714 |
20 Jan 16 |
nicklas |
144 |
} |
3714 |
20 Jan 16 |
nicklas |
145 |
|
5492 |
13 Jun 19 |
nicklas |
146 |
/** |
5492 |
13 Jun 19 |
nicklas |
Helper method for create a job item for this demux. It will set |
5492 |
13 Jun 19 |
nicklas |
the proper job type, pipeline, version and auto-generate a name from |
5492 |
13 Jun 19 |
nicklas |
the participating sequencing runs. |
5492 |
13 Jun 19 |
nicklas |
150 |
*/ |
5492 |
13 Jun 19 |
nicklas |
151 |
protected Job createJobItem(DbControl dc, String jobName, List<DemuxDefinition> demuxDefs) |
5492 |
13 Jun 19 |
nicklas |
152 |
{ |
5492 |
13 Jun 19 |
nicklas |
153 |
SessionControl sc = dc.getSessionControl(); |
5492 |
13 Jun 19 |
nicklas |
154 |
Job demuxJob = Job.getNew(dc, null, null, null); |
5492 |
13 Jun 19 |
nicklas |
155 |
demuxJob.setItemSubtype(Subtype.DEMUX_MERGE_JOB.get(dc)); |
5492 |
13 Jun 19 |
nicklas |
156 |
demuxJob.setPluginVersion("reggie-"+Reggie.VERSION); |
5492 |
13 Jun 19 |
nicklas |
157 |
demuxJob.setSendMessage(Values.getBoolean(sc.getUserClientSetting("plugins.sendmessage"), false)); |
5492 |
13 Jun 19 |
nicklas |
158 |
|
5492 |
13 Jun 19 |
nicklas |
// Add name of sequencing runs to the job name |
5492 |
13 Jun 19 |
nicklas |
160 |
demuxJob.setName(jobName + " " + demuxDefs.get(0).seqRun.getName()); |
5492 |
13 Jun 19 |
nicklas |
161 |
if (demuxDefs.size() == 2) demuxJob.setName(demuxJob.getName() + " and " + demuxDefs.get(1).seqRun.getName()); |
5492 |
13 Jun 19 |
nicklas |
162 |
if (demuxDefs.size() > 2) demuxJob.setName(demuxJob.getName() + " and " + (demuxDefs.size()-1) + " others"); |
5492 |
13 Jun 19 |
nicklas |
163 |
if (debug) demuxJob.setName(demuxJob.getName() + " (debug)"); |
6981 |
17 Jan 23 |
nicklas |
164 |
if (partition != null) demuxJob.setParameterValue("partition", new StringParameterType(), partition); |
5492 |
13 Jun 19 |
nicklas |
165 |
|
5492 |
13 Jun 19 |
nicklas |
166 |
StringParameterType sType = new StringParameterType(); |
5492 |
13 Jun 19 |
nicklas |
167 |
BooleanParameterType bType = new BooleanParameterType(); |
5492 |
13 Jun 19 |
nicklas |
168 |
IntegerParameterType iType = new IntegerParameterType(); |
5492 |
13 Jun 19 |
nicklas |
169 |
|
5492 |
13 Jun 19 |
nicklas |
170 |
demuxJob.setParameterValue("pipeline", sType, pipeline.getId()); |
5492 |
13 Jun 19 |
nicklas |
171 |
if (debug) demuxJob.setParameterValue("debug", bType, debug); |
5492 |
13 Jun 19 |
nicklas |
172 |
if (autoConfirm) demuxJob.setParameterValue("autoConfirm", bType, autoConfirm); |
5492 |
13 Jun 19 |
nicklas |
173 |
if (priority != null) demuxJob.setParameterValue("priority", iType, priority.intValue()); |
7372 |
06 Oct 23 |
nicklas |
174 |
if (submitOptionsOverride != null) demuxJob.setParameterValue("jobOptions", new StringParameterType(), submitOptionsOverride); |
5492 |
13 Jun 19 |
nicklas |
175 |
|
5492 |
13 Jun 19 |
nicklas |
176 |
dc.saveItem(demuxJob); |
5492 |
13 Jun 19 |
nicklas |
177 |
return demuxJob; |
5492 |
13 Jun 19 |
nicklas |
178 |
} |
5492 |
13 Jun 19 |
nicklas |
179 |
|
5492 |
13 Jun 19 |
nicklas |
180 |
/** |
5492 |
13 Jun 19 |
nicklas |
Helper method for creating a DemuxedSequences item that is part of this job. |
5492 |
13 Jun 19 |
nicklas |
The method will generate a name and set the correct subtype, pipeline, protocol, |
5492 |
13 Jun 19 |
nicklas |
software, etc. |
5492 |
13 Jun 19 |
nicklas |
184 |
*/ |
5492 |
13 Jun 19 |
nicklas |
185 |
protected DerivedBioAssay createDemuxedSequences(DbControl dc, Job demuxJob, DemuxDefinition def) |
5492 |
13 Jun 19 |
nicklas |
186 |
{ |
5492 |
13 Jun 19 |
nicklas |
187 |
String demuxName = ScriptUtil.checkValidFilename(def.seqRun.getNextDemuxedSequencesName(dc)); |
5492 |
13 Jun 19 |
nicklas |
188 |
|
5492 |
13 Jun 19 |
nicklas |
189 |
DerivedBioAssay demux = DerivedBioAssay.getNew(dc, false, demuxJob); |
5492 |
13 Jun 19 |
nicklas |
190 |
demux.setItemSubtype(Subtype.DEMUXED_SEQUENCES.get(dc)); |
5492 |
13 Jun 19 |
nicklas |
191 |
pipeline.setAnnotation(dc, demux); |
5492 |
13 Jun 19 |
nicklas |
192 |
demux.setName(demuxName); |
5492 |
13 Jun 19 |
nicklas |
193 |
demux.setSoftware(demuxSoftware); |
5492 |
13 Jun 19 |
nicklas |
194 |
demux.setProtocol(demuxProtocol); |
5492 |
13 Jun 19 |
nicklas |
195 |
demux.addParent(def.seqRun.getDerivedBioAssay()); |
5492 |
13 Jun 19 |
nicklas |
196 |
Annotationtype.OMIT_LANES.setAnnotationValues(dc, demux, def.omitLanes); |
5492 |
13 Jun 19 |
nicklas |
197 |
Annotationtype.READ_STRING.setAnnotationValue(dc, demux, def.readString); |
5492 |
13 Jun 19 |
nicklas |
198 |
if (autoConfirm) |
5492 |
13 Jun 19 |
nicklas |
199 |
{ |
5492 |
13 Jun 19 |
nicklas |
200 |
Annotationtype.AUTO_PROCESSING.setAnnotationValue(dc, demux, "AutoConfirm"); |
5492 |
13 Jun 19 |
nicklas |
201 |
} |
5492 |
13 Jun 19 |
nicklas |
202 |
dc.saveItem(demux); |
5492 |
13 Jun 19 |
nicklas |
203 |
def.demux = demux; |
5492 |
13 Jun 19 |
nicklas |
204 |
return demux; |
5492 |
13 Jun 19 |
nicklas |
205 |
} |
5492 |
13 Jun 19 |
nicklas |
206 |
|
5492 |
13 Jun 19 |
nicklas |
207 |
/** |
5492 |
13 Jun 19 |
nicklas |
Helper method for creating a MergedSequences item that is part of this job. |
5523 |
24 Jun 19 |
nicklas |
The method will generate a name and set the correct subtype, pipeline, |
5492 |
13 Jun 19 |
nicklas |
protocol, software, etc. |
5492 |
13 Jun 19 |
nicklas |
211 |
*/ |
5492 |
13 Jun 19 |
nicklas |
212 |
protected DerivedBioAssay createMergedSequences(DbControl dc, Job demuxJob, Library lib) |
5492 |
13 Jun 19 |
nicklas |
213 |
{ |
5492 |
13 Jun 19 |
nicklas |
214 |
String mergeName = ScriptUtil.checkValidFilename(lib.getNextMergedSequencesName(dc)); |
5492 |
13 Jun 19 |
nicklas |
215 |
|
5492 |
13 Jun 19 |
nicklas |
216 |
DerivedBioAssay merged = DerivedBioAssay.getNew(dc, false, demuxJob); |
5492 |
13 Jun 19 |
nicklas |
217 |
merged.setItemSubtype(Subtype.MERGED_SEQUENCES.get(dc)); |
5492 |
13 Jun 19 |
nicklas |
218 |
pipeline.setAnnotation(dc, merged); |
5492 |
13 Jun 19 |
nicklas |
219 |
merged.setName(mergeName); |
5492 |
13 Jun 19 |
nicklas |
220 |
merged.setSoftware(mergeSoftware); |
5492 |
13 Jun 19 |
nicklas |
221 |
merged.setProtocol(mergeProtocol); |
5492 |
13 Jun 19 |
nicklas |
222 |
merged.setExtract(lib.getExtract()); |
5791 |
16 Dec 19 |
nicklas |
223 |
DoNotUse.copyDoNotUseAnnotations(dc, lib.getExtract(), merged, false); |
5553 |
12 Aug 19 |
nicklas |
224 |
boolean isExternal = Reggie.isExternalItem(mergeName); |
5786 |
13 Dec 19 |
nicklas |
225 |
String rootName = isExternal ? lib.getTopExtractOrSample(dc).getName() : null; |
5523 |
24 Jun 19 |
nicklas |
226 |
|
5523 |
24 Jun 19 |
nicklas |
227 |
String dataFilesFolder = ScriptUtil.checkValidPath(MergedSequences.generateDataFilesFolderForProjectArchive(mergeName, rootName, debug), true, true); |
5492 |
13 Jun 19 |
nicklas |
228 |
Annotationtype.DATA_FILES_FOLDER.setAnnotationValue(dc, merged, dataFilesFolder); |
5492 |
13 Jun 19 |
nicklas |
229 |
|
5492 |
13 Jun 19 |
nicklas |
230 |
dc.saveItem(merged); |
5492 |
13 Jun 19 |
nicklas |
231 |
return merged; |
5492 |
13 Jun 19 |
nicklas |
232 |
} |
5492 |
13 Jun 19 |
nicklas |
233 |
|
5492 |
13 Jun 19 |
nicklas |
234 |
/** |
5492 |
13 Jun 19 |
nicklas |
Helper method for creating all MergedSequences items that are needed for |
5492 |
13 Jun 19 |
nicklas |
demuxing the given flow cell/sequencing run. This method do so many things |
5492 |
13 Jun 19 |
nicklas |
and will populate items in the DemuxDefinition with more data: |
5492 |
13 Jun 19 |
nicklas |
libsOnFlowCell, laneInfo |
5492 |
13 Jun 19 |
nicklas |
239 |
*/ |
5492 |
13 Jun 19 |
nicklas |
240 |
protected void createAllMergedSequences(DbControl dc, Job demuxJob, DemuxDefinition def) |
5492 |
13 Jun 19 |
nicklas |
241 |
{ |
5492 |
13 Jun 19 |
nicklas |
242 |
|
5492 |
13 Jun 19 |
nicklas |
243 |
def.libsOnFlowCell = new HashSet<Extract>(); |
5492 |
13 Jun 19 |
nicklas |
244 |
def.laneInfo = new ArrayList<>(); |
5492 |
13 Jun 19 |
nicklas |
245 |
|
5492 |
13 Jun 19 |
nicklas |
246 |
PhysicalBioAssay flowCell = FlowCell.getBySequencingRun(dc, def.seqRun).getPhysicalBioAssay(); |
5492 |
13 Jun 19 |
nicklas |
// Find out which libraries that are in the pools on the flow cell |
5492 |
13 Jun 19 |
nicklas |
248 |
SpecialQuery<BioMaterialEventSource> query = flowCell.getCreationEvent().getEventSources(); |
5492 |
13 Jun 19 |
nicklas |
// Filter out lanes that should be omitted |
5492 |
13 Jun 19 |
nicklas |
250 |
query.restrict( |
5492 |
13 Jun 19 |
nicklas |
251 |
Restrictions.not( |
5492 |
13 Jun 19 |
nicklas |
252 |
Restrictions.in( |
5492 |
13 Jun 19 |
nicklas |
253 |
Hql.property("position"), |
5492 |
13 Jun 19 |
nicklas |
254 |
Expressions.parameter("lane", def.omitLanes, Type.INT) |
5492 |
13 Jun 19 |
nicklas |
255 |
) |
5492 |
13 Jun 19 |
nicklas |
256 |
)); |
5492 |
13 Jun 19 |
nicklas |
// Order by lane |
5492 |
13 Jun 19 |
nicklas |
258 |
query.order(Orders.asc(Hql.property("position"))); |
5492 |
13 Jun 19 |
nicklas |
259 |
|
5492 |
13 Jun 19 |
nicklas |
260 |
for (BioMaterialEventSource src : query.list(dc)) |
5492 |
13 Jun 19 |
nicklas |
261 |
{ |
5492 |
13 Jun 19 |
nicklas |
262 |
LaneInfo laneInfo = new LaneInfo(); |
5492 |
13 Jun 19 |
nicklas |
263 |
laneInfo.laneNo = src.getPosition(); |
5492 |
13 Jun 19 |
nicklas |
264 |
def.laneInfo.add(laneInfo); |
5492 |
13 Jun 19 |
nicklas |
265 |
|
5492 |
13 Jun 19 |
nicklas |
// Aliquot from pool for current lane |
5492 |
13 Jun 19 |
nicklas |
267 |
Extract poolA = (Extract)src.getBioMaterial(); |
5492 |
13 Jun 19 |
nicklas |
// The actual pool is the parent item |
5492 |
13 Jun 19 |
nicklas |
269 |
laneInfo.pool = (Extract)poolA.getParent(); |
5492 |
13 Jun 19 |
nicklas |
270 |
|
5492 |
13 Jun 19 |
nicklas |
// Get each lib in the pool |
5492 |
13 Jun 19 |
nicklas |
272 |
ItemQuery<Extract> libQuery = laneInfo.pool.getCreationEvent().getSources(); |
5492 |
13 Jun 19 |
nicklas |
273 |
libQuery.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT); |
5492 |
13 Jun 19 |
nicklas |
274 |
libQuery.order(Orders.asc(Hql.property("tag.name"))); // Order by barcode |
5492 |
13 Jun 19 |
nicklas |
275 |
|
5492 |
13 Jun 19 |
nicklas |
276 |
laneInfo.libsInPool = libQuery.list(dc); |
5492 |
13 Jun 19 |
nicklas |
277 |
laneInfo.mergedInPool = new ArrayList<DerivedBioAssay>(); |
6280 |
09 Jun 21 |
nicklas |
278 |
laneInfo.ignoreInPool = new ArrayList<Extract>(); |
5492 |
13 Jun 19 |
nicklas |
279 |
|
5492 |
13 Jun 19 |
nicklas |
280 |
for (Extract lib : laneInfo.libsInPool) |
5492 |
13 Jun 19 |
nicklas |
281 |
{ |
5492 |
13 Jun 19 |
nicklas |
282 |
String doNotUse = (String)Annotationtype.DO_NOT_USE.getAnnotationValue(dc, lib); |
6280 |
09 Jun 21 |
nicklas |
283 |
if (def.excludeDoNotUse && doNotUse != null || def.libIgnore.contains(lib)) |
5492 |
13 Jun 19 |
nicklas |
284 |
{ |
6280 |
09 Jun 21 |
nicklas |
285 |
laneInfo.ignoreInPool.add(lib); |
5492 |
13 Jun 19 |
nicklas |
286 |
} |
5492 |
13 Jun 19 |
nicklas |
287 |
else |
5492 |
13 Jun 19 |
nicklas |
288 |
{ |
5492 |
13 Jun 19 |
nicklas |
// Create MergedSequences item if needed |
5492 |
13 Jun 19 |
nicklas |
290 |
DerivedBioAssay merged = def.existingMergedSequences.get(lib); |
5492 |
13 Jun 19 |
nicklas |
291 |
if (merged == null) |
5492 |
13 Jun 19 |
nicklas |
292 |
{ |
5492 |
13 Jun 19 |
nicklas |
293 |
merged = createMergedSequences(dc, demuxJob, Library.get(lib)); |
5492 |
13 Jun 19 |
nicklas |
294 |
def.existingMergedSequences.put(lib, merged); |
5492 |
13 Jun 19 |
nicklas |
295 |
} |
5492 |
13 Jun 19 |
nicklas |
296 |
laneInfo.mergedInPool.add(merged); |
5492 |
13 Jun 19 |
nicklas |
297 |
if (def.libsOnFlowCell.add(lib)) |
5492 |
13 Jun 19 |
nicklas |
298 |
{ |
5492 |
13 Jun 19 |
nicklas |
// First time for this lib on the flow cell |
5492 |
13 Jun 19 |
nicklas |
300 |
merged.addParent(def.demux); |
5492 |
13 Jun 19 |
nicklas |
301 |
} |
5492 |
13 Jun 19 |
nicklas |
302 |
} |
5492 |
13 Jun 19 |
nicklas |
303 |
} |
5492 |
13 Jun 19 |
nicklas |
304 |
} |
5492 |
13 Jun 19 |
nicklas |
305 |
} |
5492 |
13 Jun 19 |
nicklas |
306 |
|
5492 |
13 Jun 19 |
nicklas |
307 |
/** |
5492 |
13 Jun 19 |
nicklas |
Helper method for exporting files that are needed by Picard for demuxing. |
5492 |
13 Jun 19 |
nicklas |
File names are stored in LaneInfo. |
5492 |
13 Jun 19 |
nicklas |
310 |
*/ |
5592 |
06 Sep 19 |
nicklas |
311 |
protected void exportMultiplexFiles(DbControl dc, BarcodeFilesForDemuxExporter exporter, JobDefinition jobDef, DemuxDefinition def, LaneInfo lane, boolean useReverseComplementOnBarcode2) |
5492 |
13 Jun 19 |
nicklas |
312 |
{ |
5586 |
29 Aug 19 |
nicklas |
//String demuxName = def.demux.getName(); |
5586 |
29 Aug 19 |
nicklas |
314 |
String flowCellId = def.getFlowCellId(dc); |
5492 |
13 Jun 19 |
nicklas |
315 |
|
6669 |
06 Apr 22 |
nicklas |
316 |
lane.barcodesFile = flowCellId + "." + lane.laneNo + "_barcodes.csv"; |
6669 |
06 Apr 22 |
nicklas |
317 |
lane.multiplexFile = flowCellId + "." + lane.laneNo + "_multiplex.csv"; |
5586 |
29 Aug 19 |
nicklas |
318 |
|
6669 |
06 Apr 22 |
nicklas |
319 |
lane.metricsFile = flowCellId + "." + lane.laneNo + "_metrics.csv"; |
6669 |
06 Apr 22 |
nicklas |
320 |
lane.barcodesDir = flowCellId + "." + lane.laneNo; |
5586 |
29 Aug 19 |
nicklas |
321 |
|
5492 |
13 Jun 19 |
nicklas |
// Export all required barcode files, 2 files per lane per flow cell |
5492 |
13 Jun 19 |
nicklas |
323 |
StringWriter out = new StringWriter(); |
6280 |
09 Jun 21 |
nicklas |
324 |
exporter.exportBarcodesFile(dc, flowCellId, lane.laneNo, def.numBarcodeReads, useReverseComplementOnBarcode2, lane.mergedInPool, lane.ignoreInPool, out); |
5492 |
13 Jun 19 |
nicklas |
325 |
jobDef.addFile(new StringUploadSource(lane.barcodesFile, out.toString())); |
5492 |
13 Jun 19 |
nicklas |
326 |
|
5492 |
13 Jun 19 |
nicklas |
327 |
out = new StringWriter(); |
6280 |
09 Jun 21 |
nicklas |
328 |
exporter.exportMultiplexFile(dc, flowCellId, lane.laneNo, def.numBarcodeReads, useReverseComplementOnBarcode2, lane.mergedInPool, lane.ignoreInPool, out, "fastq"); |
5492 |
13 Jun 19 |
nicklas |
329 |
jobDef.addFile(new StringUploadSource(lane.multiplexFile, out.toString())); |
5492 |
13 Jun 19 |
nicklas |
330 |
} |
5492 |
13 Jun 19 |
nicklas |
331 |
|
5492 |
13 Jun 19 |
nicklas |
332 |
/** |
5492 |
13 Jun 19 |
nicklas |
Generates a script snippet that searches the given list of run-archive folders for |
5492 |
13 Jun 19 |
nicklas |
the folder with data for the sequencing run that is about to be demuxed. |
5492 |
13 Jun 19 |
nicklas |
335 |
*/ |
5492 |
13 Jun 19 |
nicklas |
336 |
protected void scriptSnippetFindRunArchive(DbControl dc, ScriptBuilder script, List<String> allRunArchives, DemuxDefinition def) |
5492 |
13 Jun 19 |
nicklas |
337 |
{ |
5492 |
13 Jun 19 |
nicklas |
338 |
String dataFolder = ScriptUtil.checkValidScriptParameter((String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, def.seqRun.getDerivedBioAssay())); |
5492 |
13 Jun 19 |
nicklas |
339 |
|
5492 |
13 Jun 19 |
nicklas |
340 |
script.cmd("RUN_ARCHIVE=`find " + Values.getString(allRunArchives, " ", true) + " -maxdepth 2 -name " + dataFolder + " -type d -print -quit 2> /dev/null || true`"); |
5492 |
13 Jun 19 |
nicklas |
341 |
script.cmd("if [ -z \"$RUN_ARCHIVE\" ]; then"); |
5492 |
13 Jun 19 |
nicklas |
342 |
script.cmd("echo \"Can't find data folder "+dataFolder+" in " + allRunArchives + "\" 1>&2"); |
5492 |
13 Jun 19 |
nicklas |
343 |
script.cmd("exit 1"); |
5492 |
13 Jun 19 |
nicklas |
344 |
script.cmd("fi"); |
5492 |
13 Jun 19 |
nicklas |
345 |
script.newLine(); |
5492 |
13 Jun 19 |
nicklas |
346 |
} |
5492 |
13 Jun 19 |
nicklas |
347 |
|
5492 |
13 Jun 19 |
nicklas |
348 |
|
3714 |
20 Jan 16 |
nicklas |
349 |
public static class DemuxDefinition |
3714 |
20 Jan 16 |
nicklas |
350 |
{ |
3714 |
20 Jan 16 |
nicklas |
351 |
|
3714 |
20 Jan 16 |
nicklas |
352 |
final SequencingRun seqRun; |
3714 |
20 Jan 16 |
nicklas |
353 |
final String readString; |
5865 |
12 Mar 20 |
nicklas |
354 |
final int numBarcodeReads; |
6280 |
09 Jun 21 |
nicklas |
355 |
final List<Extract> libIgnore; |
3714 |
20 Jan 16 |
nicklas |
356 |
final List<Integer> omitLanes; |
4985 |
28 Sep 18 |
nicklas |
357 |
final boolean excludeDoNotUse; |
3714 |
20 Jan 16 |
nicklas |
358 |
|
6280 |
09 Jun 21 |
nicklas |
359 |
public DemuxDefinition(SequencingRun seqRun, String readString, List<Extract> libIgnore, List<? extends Number> omitLanes, boolean excludeDoNotUse) |
3714 |
20 Jan 16 |
nicklas |
360 |
{ |
3714 |
20 Jan 16 |
nicklas |
361 |
this.seqRun = seqRun; |
3714 |
20 Jan 16 |
nicklas |
362 |
this.readString = readString; |
5865 |
12 Mar 20 |
nicklas |
363 |
this.numBarcodeReads = count(readString, 'B'); |
6280 |
09 Jun 21 |
nicklas |
364 |
this.libIgnore = libIgnore == null ? Collections.emptyList() : libIgnore; |
3714 |
20 Jan 16 |
nicklas |
365 |
this.omitLanes = new ArrayList<Integer>(); |
3714 |
20 Jan 16 |
nicklas |
366 |
if (omitLanes != null) |
3714 |
20 Jan 16 |
nicklas |
367 |
{ |
3714 |
20 Jan 16 |
nicklas |
368 |
for (Number n : omitLanes) |
3714 |
20 Jan 16 |
nicklas |
369 |
{ |
3714 |
20 Jan 16 |
nicklas |
370 |
this.omitLanes.add(n.intValue()); |
3714 |
20 Jan 16 |
nicklas |
371 |
} |
3714 |
20 Jan 16 |
nicklas |
372 |
} |
4985 |
28 Sep 18 |
nicklas |
373 |
this.excludeDoNotUse = excludeDoNotUse; |
3714 |
20 Jan 16 |
nicklas |
374 |
} |
3714 |
20 Jan 16 |
nicklas |
375 |
|
5492 |
13 Jun 19 |
nicklas |
// Extra variables that are populated as the demux script is being generated |
5492 |
13 Jun 19 |
nicklas |
377 |
// ----------------- |
5492 |
13 Jun 19 |
nicklas |
378 |
|
5492 |
13 Jun 19 |
nicklas |
// The DemuxedSequences item -- set by createDemuxedSequences() |
5492 |
13 Jun 19 |
nicklas |
380 |
DerivedBioAssay demux; |
5492 |
13 Jun 19 |
nicklas |
381 |
|
5492 |
13 Jun 19 |
nicklas |
// Existing child MergedSequences -- needed so that we don't create multiple child items for the same library |
5492 |
13 Jun 19 |
nicklas |
383 |
Map<Extract, DerivedBioAssay> existingMergedSequences; |
5492 |
13 Jun 19 |
nicklas |
384 |
|
5492 |
13 Jun 19 |
nicklas |
// All libraries on the flow cell |
5492 |
13 Jun 19 |
nicklas |
386 |
Set<Extract> libsOnFlowCell; |
5492 |
13 Jun 19 |
nicklas |
387 |
|
5492 |
13 Jun 19 |
nicklas |
// Information about each lane on the flow cell |
5492 |
13 Jun 19 |
nicklas |
389 |
List<LaneInfo> laneInfo; |
5586 |
29 Aug 19 |
nicklas |
390 |
|
5586 |
29 Aug 19 |
nicklas |
// Lazy loaded by getFlowCellId() |
5586 |
29 Aug 19 |
nicklas |
392 |
private PhysicalBioAssay flowCell; |
5586 |
29 Aug 19 |
nicklas |
393 |
|
5586 |
29 Aug 19 |
nicklas |
394 |
public String getFlowCellId(DbControl dc) |
5586 |
29 Aug 19 |
nicklas |
395 |
{ |
5586 |
29 Aug 19 |
nicklas |
396 |
if (flowCell == null) |
5586 |
29 Aug 19 |
nicklas |
397 |
{ |
5586 |
29 Aug 19 |
nicklas |
398 |
flowCell = FlowCell.getBySequencingRun(dc, seqRun).getItem(); |
5586 |
29 Aug 19 |
nicklas |
399 |
} |
5586 |
29 Aug 19 |
nicklas |
400 |
return (String)Annotationtype.FLOWCELL_ID.getAnnotationValue(dc, flowCell); |
5586 |
29 Aug 19 |
nicklas |
401 |
} |
5865 |
12 Mar 20 |
nicklas |
402 |
|
5865 |
12 Mar 20 |
nicklas |
403 |
private int count(String readString, char match) |
5865 |
12 Mar 20 |
nicklas |
404 |
{ |
5865 |
12 Mar 20 |
nicklas |
405 |
int count = 0; |
5865 |
12 Mar 20 |
nicklas |
406 |
for (int i = 0; i < readString.length(); i++) |
5865 |
12 Mar 20 |
nicklas |
407 |
{ |
5865 |
12 Mar 20 |
nicklas |
408 |
if (readString.charAt(i) == match) count++; |
5865 |
12 Mar 20 |
nicklas |
409 |
} |
5865 |
12 Mar 20 |
nicklas |
410 |
return count; |
5865 |
12 Mar 20 |
nicklas |
411 |
} |
3714 |
20 Jan 16 |
nicklas |
412 |
} |
3714 |
20 Jan 16 |
nicklas |
413 |
|
5492 |
13 Jun 19 |
nicklas |
414 |
|
5492 |
13 Jun 19 |
nicklas |
415 |
public static class LaneInfo |
5492 |
13 Jun 19 |
nicklas |
416 |
{ |
5492 |
13 Jun 19 |
nicklas |
// Lane number |
5492 |
13 Jun 19 |
nicklas |
418 |
int laneNo; |
5492 |
13 Jun 19 |
nicklas |
// Pool on this lane |
5492 |
13 Jun 19 |
nicklas |
420 |
Extract pool; |
5492 |
13 Jun 19 |
nicklas |
// All libraries in the pool |
5492 |
13 Jun 19 |
nicklas |
422 |
List<Extract> libsInPool; |
6280 |
09 Jun 21 |
nicklas |
// Libraries in the pool that should not be demuxed |
6280 |
09 Jun 21 |
nicklas |
424 |
List<Extract> ignoreInPool; |
5492 |
13 Jun 19 |
nicklas |
// Child merged items that are in this pool and that should be demuxed |
5492 |
13 Jun 19 |
nicklas |
426 |
List<DerivedBioAssay> mergedInPool; |
5492 |
13 Jun 19 |
nicklas |
427 |
|
5492 |
13 Jun 19 |
nicklas |
// File names for files needed by Picard |
5492 |
13 Jun 19 |
nicklas |
429 |
String barcodesFile; |
5492 |
13 Jun 19 |
nicklas |
430 |
String multiplexFile; |
5492 |
13 Jun 19 |
nicklas |
431 |
String metricsFile; |
5586 |
29 Aug 19 |
nicklas |
432 |
String barcodesDir; |
5492 |
13 Jun 19 |
nicklas |
433 |
} |
3714 |
20 Jan 16 |
nicklas |
434 |
|
3714 |
20 Jan 16 |
nicklas |
435 |
} |