5684 |
25 Oct 19 |
nicklas |
1 |
package net.sf.basedb.reggie.grid; |
5684 |
25 Oct 19 |
nicklas |
2 |
|
5794 |
16 Dec 19 |
nicklas |
3 |
import java.io.BufferedReader; |
5720 |
13 Nov 19 |
nicklas |
4 |
import java.io.IOException; |
5692 |
29 Oct 19 |
nicklas |
5 |
import java.io.InputStream; |
5794 |
16 Dec 19 |
nicklas |
6 |
import java.io.InputStreamReader; |
5684 |
25 Oct 19 |
nicklas |
7 |
import java.util.ArrayList; |
5872 |
23 Mar 20 |
nicklas |
8 |
import java.util.Arrays; |
5872 |
23 Mar 20 |
nicklas |
9 |
import java.util.HashMap; |
5872 |
23 Mar 20 |
nicklas |
10 |
import java.util.HashSet; |
5684 |
25 Oct 19 |
nicklas |
11 |
import java.util.List; |
5872 |
23 Mar 20 |
nicklas |
12 |
import java.util.Map; |
5720 |
13 Nov 19 |
nicklas |
13 |
import java.util.Set; |
5794 |
16 Dec 19 |
nicklas |
14 |
import java.util.regex.Matcher; |
5794 |
16 Dec 19 |
nicklas |
15 |
import java.util.regex.Pattern; |
5692 |
29 Oct 19 |
nicklas |
16 |
import java.util.zip.GZIPInputStream; |
5684 |
25 Oct 19 |
nicklas |
17 |
|
5684 |
25 Oct 19 |
nicklas |
18 |
import org.slf4j.LoggerFactory; |
5684 |
25 Oct 19 |
nicklas |
19 |
|
5720 |
13 Nov 19 |
nicklas |
20 |
import net.sf.basedb.core.Annotatable; |
5692 |
29 Oct 19 |
nicklas |
21 |
import net.sf.basedb.core.AnyToAny; |
5720 |
13 Nov 19 |
nicklas |
22 |
import net.sf.basedb.core.BasicItem; |
5715 |
12 Nov 19 |
nicklas |
23 |
import net.sf.basedb.core.BooleanParameterType; |
5720 |
13 Nov 19 |
nicklas |
24 |
import net.sf.basedb.core.DataFileType; |
5684 |
25 Oct 19 |
nicklas |
25 |
import net.sf.basedb.core.DbControl; |
5684 |
25 Oct 19 |
nicklas |
26 |
import net.sf.basedb.core.DerivedBioAssay; |
5692 |
29 Oct 19 |
nicklas |
27 |
import net.sf.basedb.core.Directory; |
5684 |
25 Oct 19 |
nicklas |
28 |
import net.sf.basedb.core.File; |
5692 |
29 Oct 19 |
nicklas |
29 |
import net.sf.basedb.core.FileServer; |
5720 |
13 Nov 19 |
nicklas |
30 |
import net.sf.basedb.core.FileSetMember; |
5684 |
25 Oct 19 |
nicklas |
31 |
import net.sf.basedb.core.ItemList; |
5684 |
25 Oct 19 |
nicklas |
32 |
import net.sf.basedb.core.ItemNotFoundException; |
5684 |
25 Oct 19 |
nicklas |
33 |
import net.sf.basedb.core.ItemParameterType; |
5692 |
29 Oct 19 |
nicklas |
34 |
import net.sf.basedb.core.ItemSubtype; |
5684 |
25 Oct 19 |
nicklas |
35 |
import net.sf.basedb.core.Job; |
5692 |
29 Oct 19 |
nicklas |
36 |
import net.sf.basedb.core.Path; |
5720 |
13 Nov 19 |
nicklas |
37 |
import net.sf.basedb.core.RawBioAssay; |
5691 |
28 Oct 19 |
nicklas |
38 |
import net.sf.basedb.core.Sample; |
5684 |
25 Oct 19 |
nicklas |
39 |
import net.sf.basedb.core.SessionControl; |
5684 |
25 Oct 19 |
nicklas |
40 |
import net.sf.basedb.core.Software; |
5725 |
13 Nov 19 |
nicklas |
41 |
import net.sf.basedb.core.StringParameterType; |
5684 |
25 Oct 19 |
nicklas |
42 |
import net.sf.basedb.opengrid.JobDefinition; |
5684 |
25 Oct 19 |
nicklas |
43 |
import net.sf.basedb.opengrid.JobStatus; |
5684 |
25 Oct 19 |
nicklas |
44 |
import net.sf.basedb.opengrid.OpenGridCluster; |
5684 |
25 Oct 19 |
nicklas |
45 |
import net.sf.basedb.opengrid.OpenGridSession; |
5684 |
25 Oct 19 |
nicklas |
46 |
import net.sf.basedb.opengrid.ScriptBuilder; |
5684 |
25 Oct 19 |
nicklas |
47 |
import net.sf.basedb.opengrid.config.ClusterConfig; |
5684 |
25 Oct 19 |
nicklas |
48 |
import net.sf.basedb.opengrid.config.JobConfig; |
6656 |
29 Mar 22 |
nicklas |
49 |
import net.sf.basedb.opengrid.filetransfer.FilePermission; |
5684 |
25 Oct 19 |
nicklas |
50 |
import net.sf.basedb.opengrid.service.JobCompletionHandler; |
5684 |
25 Oct 19 |
nicklas |
51 |
import net.sf.basedb.reggie.Reggie; |
5684 |
25 Oct 19 |
nicklas |
52 |
import net.sf.basedb.reggie.XmlConfig; |
5684 |
25 Oct 19 |
nicklas |
53 |
import net.sf.basedb.reggie.dao.AlignedSequences; |
5684 |
25 Oct 19 |
nicklas |
54 |
import net.sf.basedb.reggie.dao.Annotationtype; |
5684 |
25 Oct 19 |
nicklas |
55 |
import net.sf.basedb.reggie.dao.BiomaterialList; |
5684 |
25 Oct 19 |
nicklas |
56 |
import net.sf.basedb.reggie.dao.Datafiletype; |
5790 |
13 Dec 19 |
nicklas |
57 |
import net.sf.basedb.reggie.dao.DoNotUse; |
5692 |
29 Oct 19 |
nicklas |
58 |
import net.sf.basedb.reggie.dao.Fileserver; |
5684 |
25 Oct 19 |
nicklas |
59 |
import net.sf.basedb.reggie.dao.Library; |
5725 |
13 Nov 19 |
nicklas |
60 |
import net.sf.basedb.reggie.dao.Pipeline; |
5720 |
13 Nov 19 |
nicklas |
61 |
import net.sf.basedb.reggie.dao.Rawbioassay; |
5720 |
13 Nov 19 |
nicklas |
62 |
import net.sf.basedb.reggie.dao.Rawdatatype; |
5684 |
25 Oct 19 |
nicklas |
63 |
import net.sf.basedb.reggie.dao.Subtype; |
5872 |
23 Mar 20 |
nicklas |
64 |
import net.sf.basedb.reggie.vcf.GtData; |
5872 |
23 Mar 20 |
nicklas |
65 |
import net.sf.basedb.reggie.vcf.SnpData; |
5872 |
23 Mar 20 |
nicklas |
66 |
import net.sf.basedb.reggie.vcf.VariantCallingInfoFactory; |
5692 |
29 Oct 19 |
nicklas |
67 |
import net.sf.basedb.reggie.vcf.VcfData; |
5872 |
23 Mar 20 |
nicklas |
68 |
import net.sf.basedb.reggie.vcf.VcfDataFilter; |
5692 |
29 Oct 19 |
nicklas |
69 |
import net.sf.basedb.reggie.vcf.VcfParser; |
5692 |
29 Oct 19 |
nicklas |
70 |
import net.sf.basedb.util.FileUtil; |
5684 |
25 Oct 19 |
nicklas |
71 |
import net.sf.basedb.util.Values; |
7079 |
27 Mar 23 |
nicklas |
72 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7079 |
27 Mar 23 |
nicklas |
73 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
5872 |
23 Mar 20 |
nicklas |
74 |
import net.sf.basedb.util.formatter.Formatter; |
5684 |
25 Oct 19 |
nicklas |
75 |
|
5684 |
25 Oct 19 |
nicklas |
76 |
/** |
5684 |
25 Oct 19 |
nicklas |
Helper class for creating items needed for generating variant |
5684 |
25 Oct 19 |
nicklas |
calling script and send it to the cluster for execution. |
5684 |
25 Oct 19 |
nicklas |
79 |
|
5684 |
25 Oct 19 |
nicklas |
@author nicklas |
5684 |
25 Oct 19 |
nicklas |
@since 4.24 |
5684 |
25 Oct 19 |
nicklas |
82 |
*/ |
5684 |
25 Oct 19 |
nicklas |
83 |
public class VariantCallingJobCreator |
6674 |
11 Apr 22 |
nicklas |
84 |
extends AbstractJobCreator |
5684 |
25 Oct 19 |
nicklas |
85 |
{ |
5684 |
25 Oct 19 |
nicklas |
86 |
private Software software; |
5713 |
12 Nov 19 |
nicklas |
87 |
private boolean rawOnly; |
5713 |
12 Nov 19 |
nicklas |
88 |
private boolean skipRaw; |
5684 |
25 Oct 19 |
nicklas |
89 |
|
5714 |
12 Nov 19 |
nicklas |
90 |
private int numSkipped; |
5714 |
12 Nov 19 |
nicklas |
91 |
|
5684 |
25 Oct 19 |
nicklas |
92 |
public VariantCallingJobCreator() |
5713 |
12 Nov 19 |
nicklas |
93 |
{ |
5713 |
12 Nov 19 |
nicklas |
94 |
this.skipRaw = true; |
5713 |
12 Nov 19 |
nicklas |
95 |
} |
5684 |
25 Oct 19 |
nicklas |
96 |
|
5684 |
25 Oct 19 |
nicklas |
97 |
/** |
5713 |
12 Nov 19 |
nicklas |
If this flag is set, raw variant calling is skipped if |
5713 |
12 Nov 19 |
nicklas |
it already exists on the alignment. Default value is TRUE. |
5713 |
12 Nov 19 |
nicklas |
100 |
*/ |
5713 |
12 Nov 19 |
nicklas |
101 |
public void setSkipRaw(boolean skipRaw) |
5713 |
12 Nov 19 |
nicklas |
102 |
{ |
5713 |
12 Nov 19 |
nicklas |
103 |
this.skipRaw = skipRaw; |
5713 |
12 Nov 19 |
nicklas |
104 |
} |
5713 |
12 Nov 19 |
nicklas |
105 |
|
5713 |
12 Nov 19 |
nicklas |
106 |
/** |
5713 |
12 Nov 19 |
nicklas |
If this flag is set, only raw variant calling is done. |
5713 |
12 Nov 19 |
nicklas |
Default value is FALSE. |
5713 |
12 Nov 19 |
nicklas |
109 |
*/ |
5713 |
12 Nov 19 |
nicklas |
110 |
public void setRawOnly(boolean rawOnly) |
5713 |
12 Nov 19 |
nicklas |
111 |
{ |
5713 |
12 Nov 19 |
nicklas |
112 |
this.rawOnly = rawOnly; |
5713 |
12 Nov 19 |
nicklas |
113 |
} |
5713 |
12 Nov 19 |
nicklas |
114 |
|
5713 |
12 Nov 19 |
nicklas |
115 |
/** |
5684 |
25 Oct 19 |
nicklas |
Set the software item to set on created VCF files. |
5684 |
25 Oct 19 |
nicklas |
117 |
*/ |
5684 |
25 Oct 19 |
nicklas |
118 |
public void setSoftware(Software software) |
5684 |
25 Oct 19 |
nicklas |
119 |
{ |
5684 |
25 Oct 19 |
nicklas |
120 |
this.software = software; |
5684 |
25 Oct 19 |
nicklas |
121 |
} |
5684 |
25 Oct 19 |
nicklas |
122 |
|
5684 |
25 Oct 19 |
nicklas |
123 |
/** |
5714 |
12 Nov 19 |
nicklas |
Get the number of skipped variant calling jobs due to |
5714 |
12 Nov 19 |
nicklas |
raw variants that already exists. Skipping can happen |
5714 |
12 Nov 19 |
nicklas |
if rawOnly=TRUE and skipRaw=TRUE and some of the selected |
5714 |
12 Nov 19 |
nicklas |
alignment(s) already have raw variants. |
5714 |
12 Nov 19 |
nicklas |
128 |
*/ |
5714 |
12 Nov 19 |
nicklas |
129 |
public int getNumSkipped() |
5714 |
12 Nov 19 |
nicklas |
130 |
{ |
5714 |
12 Nov 19 |
nicklas |
131 |
return numSkipped; |
5714 |
12 Nov 19 |
nicklas |
132 |
} |
5714 |
12 Nov 19 |
nicklas |
133 |
|
5714 |
12 Nov 19 |
nicklas |
134 |
/** |
5684 |
25 Oct 19 |
nicklas |
Schedule jobs on the given cluster for running variant calling. |
5684 |
25 Oct 19 |
nicklas |
@return A list with the corresponding jobs in BASE |
5684 |
25 Oct 19 |
nicklas |
137 |
*/ |
5684 |
25 Oct 19 |
nicklas |
138 |
public List<JobDefinition> createVariantCallingJobs(DbControl dc, OpenGridCluster cluster, List<AlignedSequences> alignedSequences) |
5684 |
25 Oct 19 |
nicklas |
139 |
{ |
5684 |
25 Oct 19 |
nicklas |
140 |
SessionControl sc = dc.getSessionControl(); |
5684 |
25 Oct 19 |
nicklas |
141 |
|
5684 |
25 Oct 19 |
nicklas |
142 |
ClusterConfig clusterCfg = cluster.getConfig(); |
5684 |
25 Oct 19 |
nicklas |
143 |
XmlConfig cfg = Reggie.getConfig(cluster.getId()); |
5684 |
25 Oct 19 |
nicklas |
144 |
if (cfg == null) |
5684 |
25 Oct 19 |
nicklas |
145 |
{ |
5684 |
25 Oct 19 |
nicklas |
146 |
throw new ItemNotFoundException("No configuration in reggie-config.xml for cluster: " + cluster.getId()); |
5684 |
25 Oct 19 |
nicklas |
147 |
} |
5684 |
25 Oct 19 |
nicklas |
148 |
String parameterSet = (String)Annotationtype.PARAMETER_SET.getAnnotationValue(dc, software); |
5684 |
25 Oct 19 |
nicklas |
149 |
|
5684 |
25 Oct 19 |
nicklas |
// Get global options |
6693 |
22 Apr 22 |
nicklas |
151 |
String global_env = ScriptUtil.multilineIndent(cfg.getConfig("global-env")); |
6656 |
29 Mar 22 |
nicklas |
152 |
String projectArchive = cfg.getRequiredConfig("project-archive", null); |
6656 |
29 Mar 22 |
nicklas |
153 |
String externalArchive = cfg.getConfig("external-archive", null, projectArchive); |
5684 |
25 Oct 19 |
nicklas |
154 |
|
5684 |
25 Oct 19 |
nicklas |
// Options for the programs |
7372 |
06 Oct 23 |
nicklas |
156 |
String vcall_submit = cfg.getConfig("variant-call/submit", parameterSet, null); |
7372 |
06 Oct 23 |
nicklas |
157 |
String vcall_submit_debug = cfg.getConfig("variant-call/submit-debug", parameterSet, null); |
6656 |
29 Mar 22 |
nicklas |
158 |
String vcall_env = ScriptUtil.multilineIndent(cfg.getRequiredConfig("variant-call/env", parameterSet)); |
6656 |
29 Mar 22 |
nicklas |
159 |
String vcall_envdebug = ScriptUtil.multilineIndent(cfg.getConfig("variant-call/env-debug", parameterSet, null)); |
6656 |
29 Mar 22 |
nicklas |
160 |
String vcall_execute = ScriptUtil.multilineIndent(cfg.getConfig("variant-call/execute", parameterSet, "./variantcall.sh")); |
6656 |
29 Mar 22 |
nicklas |
161 |
|
5684 |
25 Oct 19 |
nicklas |
// Selected items must be removed from this list |
5684 |
25 Oct 19 |
nicklas |
163 |
ItemList vcallPipeline = BiomaterialList.VARIANT_CALLING_PIPELINE.load(dc); |
5684 |
25 Oct 19 |
nicklas |
164 |
|
5720 |
13 Nov 19 |
nicklas |
// Create VariantCall raw bioassays |
5720 |
13 Nov 19 |
nicklas |
166 |
Rawdatatype variantCallType = Rawdatatype.VARIANT_CALL; |
5720 |
13 Nov 19 |
nicklas |
167 |
|
5684 |
25 Oct 19 |
nicklas |
// Options common for all jobs |
5684 |
25 Oct 19 |
nicklas |
169 |
JobConfig jobConfig = new JobConfig(); |
5684 |
25 Oct 19 |
nicklas |
170 |
if (priority != null) jobConfig.setPriority(priority); |
7372 |
06 Oct 23 |
nicklas |
171 |
if (partition != null) jobConfig.setSbatchOption("partition", ScriptUtil.checkValidScriptParameter(partition)); |
7372 |
06 Oct 23 |
nicklas |
172 |
jobConfig.convertOptionsTo(clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
173 |
if (submitOptionsOverride != null) |
7372 |
06 Oct 23 |
nicklas |
174 |
{ |
7372 |
06 Oct 23 |
nicklas |
175 |
ScriptUtil.addSubmitOptions(jobConfig, submitOptionsOverride, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
176 |
} |
7372 |
06 Oct 23 |
nicklas |
177 |
else |
7372 |
06 Oct 23 |
nicklas |
178 |
{ |
7372 |
06 Oct 23 |
nicklas |
179 |
ScriptUtil.addSubmitOptions(jobConfig, vcall_submit, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
180 |
if (debug) ScriptUtil.addSubmitOptions(jobConfig, vcall_submit_debug, clusterCfg.getType()); |
7372 |
06 Oct 23 |
nicklas |
181 |
} |
5684 |
25 Oct 19 |
nicklas |
182 |
|
5684 |
25 Oct 19 |
nicklas |
// We submit one job for each raw bioassay to the cluster |
5684 |
25 Oct 19 |
nicklas |
184 |
List<JobDefinition> jobDefs = new ArrayList<JobDefinition>(alignedSequences.size()); |
5714 |
12 Nov 19 |
nicklas |
185 |
numSkipped = 0; |
5715 |
12 Nov 19 |
nicklas |
186 |
BooleanParameterType booleanType = new BooleanParameterType(); |
5684 |
25 Oct 19 |
nicklas |
187 |
|
5684 |
25 Oct 19 |
nicklas |
188 |
for (AlignedSequences as : alignedSequences) |
5684 |
25 Oct 19 |
nicklas |
189 |
{ |
5684 |
25 Oct 19 |
nicklas |
190 |
as = AlignedSequences.getById(dc, as.getId()); // Ensure item is loaded in this transaction |
5684 |
25 Oct 19 |
nicklas |
191 |
|
5684 |
25 Oct 19 |
nicklas |
// Get some information about the aligned data that we need |
5684 |
25 Oct 19 |
nicklas |
193 |
DerivedBioAssay aligned = as.getDerivedBioAssay(); |
5684 |
25 Oct 19 |
nicklas |
194 |
vcallPipeline.removeItem(aligned); |
5684 |
25 Oct 19 |
nicklas |
195 |
|
5721 |
13 Nov 19 |
nicklas |
196 |
File rawVariants = as.getLinkedFile(dc, "variants-raw.vcf.gz"); |
5715 |
12 Nov 19 |
nicklas |
197 |
if (rawVariants != null && skipRaw && rawOnly) |
5714 |
12 Nov 19 |
nicklas |
198 |
{ |
5714 |
12 Nov 19 |
nicklas |
// We do not have to do anything with this raw bioassay |
5714 |
12 Nov 19 |
nicklas |
200 |
numSkipped++; |
5714 |
12 Nov 19 |
nicklas |
201 |
continue; |
5714 |
12 Nov 19 |
nicklas |
202 |
} |
5715 |
12 Nov 19 |
nicklas |
203 |
boolean rawCallingSkipped = rawVariants != null && skipRaw; |
5714 |
12 Nov 19 |
nicklas |
204 |
|
5684 |
25 Oct 19 |
nicklas |
205 |
Library lib = Library.get(aligned.getExtract()); |
5691 |
28 Oct 19 |
nicklas |
206 |
Sample specimen = (Sample)lib.findSingleParent(dc, Subtype.SPECIMEN); |
5684 |
25 Oct 19 |
nicklas |
207 |
boolean isExternal = Reggie.isExternalItem(aligned.getName()); |
6656 |
29 Mar 22 |
nicklas |
208 |
String archiveFolder = isExternal ? externalArchive : projectArchive; |
5684 |
25 Oct 19 |
nicklas |
209 |
String bamFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, aligned); |
5684 |
25 Oct 19 |
nicklas |
210 |
File bamFile = Datafiletype.BAM.getFile(dc, aligned); |
5684 |
25 Oct 19 |
nicklas |
211 |
|
5998 |
09 Sep 20 |
nicklas |
212 |
String alignedName = ScriptUtil.checkValidScriptParameter(aligned.getName()); |
5691 |
28 Oct 19 |
nicklas |
213 |
if (specimen != null && specimen.getExternalId() != null) |
5691 |
28 Oct 19 |
nicklas |
214 |
{ |
5998 |
09 Sep 20 |
nicklas |
// Replace SCANB-ID in alignedName with Sample.externalId |
5998 |
09 Sep 20 |
nicklas |
216 |
alignedName = alignedName.replace(specimen.getName(), specimen.getExternalId()); |
5691 |
28 Oct 19 |
nicklas |
217 |
} |
5691 |
28 Oct 19 |
nicklas |
218 |
|
5684 |
25 Oct 19 |
nicklas |
// Create job |
5684 |
25 Oct 19 |
nicklas |
220 |
Job vCallJob = Job.getNew(dc, null, null, null); |
5684 |
25 Oct 19 |
nicklas |
221 |
vCallJob.setItemSubtype(Subtype.VARIANT_CALLING_JOB.get(dc)); |
5684 |
25 Oct 19 |
nicklas |
222 |
vCallJob.setPluginVersion("reggie-"+Reggie.VERSION); |
5684 |
25 Oct 19 |
nicklas |
223 |
vCallJob.setSendMessage(Values.getBoolean(sc.getUserClientSetting("plugins.sendmessage"), false)); |
5684 |
25 Oct 19 |
nicklas |
224 |
vCallJob.setName("Run variant calling " + aligned.getName()); |
5725 |
13 Nov 19 |
nicklas |
225 |
vCallJob.setParameterValue("pipeline", new StringParameterType(), Pipeline.RNASEQ_HISAT_VARIANTCALL.getId()); |
5684 |
25 Oct 19 |
nicklas |
226 |
vCallJob.setParameterValue("alignment", new ItemParameterType<DerivedBioAssay>(DerivedBioAssay.class, null), aligned); |
5684 |
25 Oct 19 |
nicklas |
227 |
if (software != null) |
5684 |
25 Oct 19 |
nicklas |
228 |
{ |
5684 |
25 Oct 19 |
nicklas |
229 |
vCallJob.setParameterValue("software", new ItemParameterType<Software>(Software.class, null), software); |
5684 |
25 Oct 19 |
nicklas |
230 |
} |
5715 |
12 Nov 19 |
nicklas |
231 |
if (rawCallingSkipped) vCallJob.setParameterValue("rawCallingSkipped", booleanType, true); |
5716 |
12 Nov 19 |
nicklas |
232 |
if (rawOnly) vCallJob.setParameterValue("filterSkipped", booleanType, true); |
5684 |
25 Oct 19 |
nicklas |
233 |
|
5684 |
25 Oct 19 |
nicklas |
234 |
if (debug) vCallJob.setName(vCallJob.getName() + " (debug)"); |
6981 |
17 Jan 23 |
nicklas |
235 |
if (partition != null) vCallJob.setParameterValue("partition", new StringParameterType(), partition); |
7372 |
06 Oct 23 |
nicklas |
236 |
if (submitOptionsOverride != null) vCallJob.setParameterValue("jobOptions", new StringParameterType(), submitOptionsOverride); |
5684 |
25 Oct 19 |
nicklas |
237 |
dc.saveItem(vCallJob); |
5684 |
25 Oct 19 |
nicklas |
238 |
|
5720 |
13 Nov 19 |
nicklas |
// Create raw bioassay if filtering is included |
5720 |
13 Nov 19 |
nicklas |
240 |
String vCallFolder = null; |
5720 |
13 Nov 19 |
nicklas |
241 |
if (!rawOnly) |
5720 |
13 Nov 19 |
nicklas |
242 |
{ |
5720 |
13 Nov 19 |
nicklas |
243 |
String rawName = as.getNextRawBioAssayName(dc, variantCallType); |
5720 |
13 Nov 19 |
nicklas |
244 |
RawBioAssay raw = variantCallType.createRawBioAssay(dc); |
5725 |
13 Nov 19 |
nicklas |
245 |
Pipeline.RNASEQ_HISAT_VARIANTCALL.setAnnotation(dc, raw); |
5720 |
13 Nov 19 |
nicklas |
246 |
raw.setJob(vCallJob); |
5720 |
13 Nov 19 |
nicklas |
247 |
raw.setName(rawName); |
5720 |
13 Nov 19 |
nicklas |
248 |
raw.setParentExtract(lib.getExtract()); |
5720 |
13 Nov 19 |
nicklas |
249 |
raw.setSoftware(software); |
5720 |
13 Nov 19 |
nicklas |
250 |
raw.setProtocol(null); // To prevent a default value |
5720 |
13 Nov 19 |
nicklas |
251 |
raw.setArrayDesign(null); // To prevent the default hg38 |
5720 |
13 Nov 19 |
nicklas |
252 |
raw.setParentBioAssay(aligned); |
5790 |
13 Dec 19 |
nicklas |
253 |
DoNotUse.copyDoNotUseAnnotations(dc, aligned, raw, false); |
5720 |
13 Nov 19 |
nicklas |
254 |
dc.saveItem(raw); |
5720 |
13 Nov 19 |
nicklas |
255 |
|
5720 |
13 Nov 19 |
nicklas |
256 |
vCallFolder = bamFolder + "/"+raw.getName().substring(aligned.getName().length()+1); |
5720 |
13 Nov 19 |
nicklas |
257 |
if (debug && !vCallFolder.startsWith("/debug")) |
5720 |
13 Nov 19 |
nicklas |
258 |
{ |
5720 |
13 Nov 19 |
nicklas |
259 |
vCallFolder = "/debug" + vCallFolder; |
5720 |
13 Nov 19 |
nicklas |
260 |
} |
5720 |
13 Nov 19 |
nicklas |
261 |
Annotationtype.DATA_FILES_FOLDER.setAnnotationValue(dc, raw, vCallFolder); |
5720 |
13 Nov 19 |
nicklas |
262 |
if (autoConfirm) |
5720 |
13 Nov 19 |
nicklas |
263 |
{ |
5720 |
13 Nov 19 |
nicklas |
264 |
Annotationtype.AUTO_PROCESSING.setAnnotationValue(dc, raw, "AutoConfirm"); |
5720 |
13 Nov 19 |
nicklas |
265 |
} |
5720 |
13 Nov 19 |
nicklas |
266 |
|
5720 |
13 Nov 19 |
nicklas |
// Checks to make sure no bad things are included in script file |
5720 |
13 Nov 19 |
nicklas |
268 |
ScriptUtil.checkValidPath(vCallFolder, true, true); |
5720 |
13 Nov 19 |
nicklas |
269 |
ScriptUtil.checkValidScriptParameter(raw.getName()); |
5720 |
13 Nov 19 |
nicklas |
270 |
} |
5723 |
13 Nov 19 |
nicklas |
271 |
else |
5684 |
25 Oct 19 |
nicklas |
272 |
{ |
6022 |
26 Oct 20 |
nicklas |
// Register a handler for auto-confirmation (RawOnlyVariantCallingAutoConfirmer) |
5723 |
13 Nov 19 |
nicklas |
274 |
if (autoConfirm) |
5723 |
13 Nov 19 |
nicklas |
275 |
{ |
6022 |
26 Oct 20 |
nicklas |
276 |
vCallJob.setParameterValue("AutoConfirmHandler", new StringParameterType(), "RawOnlyVariantCallingAutoConfirmer"); |
5723 |
13 Nov 19 |
nicklas |
277 |
} |
5684 |
25 Oct 19 |
nicklas |
278 |
} |
5684 |
25 Oct 19 |
nicklas |
279 |
|
5684 |
25 Oct 19 |
nicklas |
// Checks to make sure no bad things are included in script file |
5684 |
25 Oct 19 |
nicklas |
281 |
String bamName = ScriptUtil.checkValidScriptParameter(bamFile.getName()); |
5684 |
25 Oct 19 |
nicklas |
282 |
|
5684 |
25 Oct 19 |
nicklas |
283 |
ScriptBuilder script = new ScriptBuilder(); |
6665 |
05 Apr 22 |
nicklas |
284 |
script.cmd(debug ? "set -eox pipefail" : "set -eo pipefail"); // 'set -e' is not enough to catch failures if a command is piped |
5684 |
25 Oct 19 |
nicklas |
// Set file permissions based on consent or external group! |
5684 |
25 Oct 19 |
nicklas |
286 |
String externalGroup = isExternal ? Reggie.getExternalGroup(aligned.getName()) : null; |
6656 |
29 Mar 22 |
nicklas |
287 |
ScriptUtil.setUmaskForItem(dc, lib, externalGroup, script); |
6693 |
22 Apr 22 |
nicklas |
288 |
script.newLine(); |
5701 |
05 Nov 19 |
nicklas |
289 |
|
6693 |
22 Apr 22 |
nicklas |
290 |
script.cmd(global_env); |
6656 |
29 Mar 22 |
nicklas |
291 |
script.export("ArchiveFolder", archiveFolder); |
6656 |
29 Mar 22 |
nicklas |
292 |
script.export("BamFolder", "${ArchiveFolder}"+bamFolder); |
6656 |
29 Mar 22 |
nicklas |
293 |
script.export("BamName", bamName.replace(".bam", "")); |
6656 |
29 Mar 22 |
nicklas |
294 |
script.export("AlignedName", alignedName); |
6656 |
29 Mar 22 |
nicklas |
295 |
|
5715 |
12 Nov 19 |
nicklas |
296 |
if (rawCallingSkipped) |
5715 |
12 Nov 19 |
nicklas |
297 |
{ |
5715 |
12 Nov 19 |
nicklas |
298 |
String vcfName = ScriptUtil.checkValidScriptParameter(rawVariants.getName()); |
6656 |
29 Mar 22 |
nicklas |
299 |
script.export("RawCallingVcf", "${BamFolder}/"+vcfName); |
5715 |
12 Nov 19 |
nicklas |
300 |
} |
5716 |
12 Nov 19 |
nicklas |
301 |
if (!rawOnly) |
5716 |
12 Nov 19 |
nicklas |
302 |
{ |
6656 |
29 Mar 22 |
nicklas |
303 |
script.export("FilteredFolder", "${ArchiveFolder}"+vCallFolder); |
5716 |
12 Nov 19 |
nicklas |
304 |
} |
6656 |
29 Mar 22 |
nicklas |
305 |
|
6656 |
29 Mar 22 |
nicklas |
306 |
script.cmd(vcall_env); |
6656 |
29 Mar 22 |
nicklas |
307 |
if (debug) script.cmd(vcall_envdebug); |
6656 |
29 Mar 22 |
nicklas |
308 |
script.cmd(vcall_execute); |
6656 |
29 Mar 22 |
nicklas |
309 |
if (externalGroup != null) |
5715 |
12 Nov 19 |
nicklas |
310 |
{ |
6656 |
29 Mar 22 |
nicklas |
311 |
if (!rawCallingSkipped) |
5720 |
13 Nov 19 |
nicklas |
312 |
{ |
5930 |
06 May 20 |
nicklas |
313 |
ScriptUtil.addChgrp(externalGroup, "${BamFolder}/variants-*", aligned.getName(), null, script); |
5720 |
13 Nov 19 |
nicklas |
314 |
} |
6656 |
29 Mar 22 |
nicklas |
315 |
if (!rawOnly) |
5720 |
13 Nov 19 |
nicklas |
316 |
{ |
5930 |
06 May 20 |
nicklas |
317 |
ScriptUtil.addChgrp(externalGroup, "${FilteredFolder}", aligned.getName(), null, script); |
5720 |
13 Nov 19 |
nicklas |
318 |
} |
5715 |
12 Nov 19 |
nicklas |
319 |
} |
5684 |
25 Oct 19 |
nicklas |
320 |
|
6674 |
11 Apr 22 |
nicklas |
321 |
JobDefinition jobDef = new JobDefinition("VariantCalling", jobConfig, batchConfig, vCallJob); |
6656 |
29 Mar 22 |
nicklas |
322 |
jobDef.addFile(ScriptUtil.upload("variantcall.sh")); |
6656 |
29 Mar 22 |
nicklas |
323 |
jobDef.addFile(ScriptUtil.upload("variantcall-utils.sh")); |
6656 |
29 Mar 22 |
nicklas |
324 |
jobDef.addFile(ScriptUtil.upload("reggie-utils.sh")); |
6656 |
29 Mar 22 |
nicklas |
325 |
jobDef.addFile(ScriptUtil.upload("stdwrap.sh")); |
6656 |
29 Mar 22 |
nicklas |
326 |
jobDef.addFile(ScriptUtil.upload("stderrwrap.sh")); |
6656 |
29 Mar 22 |
nicklas |
327 |
jobDef.addFile(ScriptUtil.upload("gc_stat.py"), FilePermission.USER_RWX); |
6656 |
29 Mar 22 |
nicklas |
328 |
jobDef.addFile(ScriptUtil.upload("mutation_signature.R"), FilePermission.USER_RWX); |
5684 |
25 Oct 19 |
nicklas |
329 |
jobDef.setDebug(debug); |
5684 |
25 Oct 19 |
nicklas |
330 |
jobDef.setCmd(script.toString()); |
5684 |
25 Oct 19 |
nicklas |
331 |
jobDefs.add(jobDef); |
5684 |
25 Oct 19 |
nicklas |
332 |
} |
5684 |
25 Oct 19 |
nicklas |
333 |
|
5684 |
25 Oct 19 |
nicklas |
334 |
return jobDefs; |
5684 |
25 Oct 19 |
nicklas |
335 |
} |
5684 |
25 Oct 19 |
nicklas |
336 |
|
5684 |
25 Oct 19 |
nicklas |
337 |
|
5684 |
25 Oct 19 |
nicklas |
338 |
/** |
5684 |
25 Oct 19 |
nicklas |
Job completion handler for Variant call jobs. The handler downloads the |
5736 |
19 Nov 19 |
nicklas |
'files.out' and 'stats.out' files. Links are created to the listed files |
5736 |
19 Nov 19 |
nicklas |
and some information is added as annotations to either the alignment or |
5736 |
19 Nov 19 |
nicklas |
raw bioassay. |
5684 |
25 Oct 19 |
nicklas |
343 |
*/ |
5684 |
25 Oct 19 |
nicklas |
344 |
public static class VariantCallJobCompletionHandler |
5684 |
25 Oct 19 |
nicklas |
345 |
implements JobCompletionHandler |
5684 |
25 Oct 19 |
nicklas |
346 |
{ |
7079 |
27 Mar 23 |
nicklas |
347 |
static final ExtensionsLogger logger = |
7079 |
27 Mar 23 |
nicklas |
348 |
ExtensionsLog.getLogger(JobCompletionHandlerFactory.ID, true).wrap(LoggerFactory.getLogger(VariantCallJobCompletionHandler.class)); |
5684 |
25 Oct 19 |
nicklas |
349 |
|
5684 |
25 Oct 19 |
nicklas |
350 |
public VariantCallJobCompletionHandler() |
5684 |
25 Oct 19 |
nicklas |
351 |
{} |
5684 |
25 Oct 19 |
nicklas |
352 |
|
5684 |
25 Oct 19 |
nicklas |
353 |
@Override |
5684 |
25 Oct 19 |
nicklas |
354 |
public String jobCompleted(SessionControl sc, OpenGridSession session, Job job, JobStatus status) |
5684 |
25 Oct 19 |
nicklas |
355 |
{ |
5684 |
25 Oct 19 |
nicklas |
356 |
String jobName = status.getName(); |
5684 |
25 Oct 19 |
nicklas |
357 |
String files = session.getJobFileAsString(jobName, "files.out", "UTF-8"); |
5701 |
05 Nov 19 |
nicklas |
358 |
String stats = session.getJobFileAsString(jobName, "stats.out", "UTF-8"); |
5701 |
05 Nov 19 |
nicklas |
359 |
String msg = parseFiles(sc, job, files, stats); |
5684 |
25 Oct 19 |
nicklas |
360 |
return "Variant calling completed. " + msg; |
5684 |
25 Oct 19 |
nicklas |
361 |
} |
5684 |
25 Oct 19 |
nicklas |
362 |
|
5701 |
05 Nov 19 |
nicklas |
363 |
private String parseFiles(SessionControl sc, Job job, String filesOut, String statsOut) |
5684 |
25 Oct 19 |
nicklas |
364 |
{ |
5692 |
29 Oct 19 |
nicklas |
365 |
DbControl dc = null; |
5692 |
29 Oct 19 |
nicklas |
366 |
String msg = null; |
5692 |
29 Oct 19 |
nicklas |
367 |
try |
5692 |
29 Oct 19 |
nicklas |
368 |
{ |
6599 |
22 Feb 22 |
nicklas |
369 |
dc = sc.newDbControl("Reggie: Variant calling completed handler"); |
5692 |
29 Oct 19 |
nicklas |
370 |
|
5692 |
29 Oct 19 |
nicklas |
371 |
DerivedBioAssay aligned = (DerivedBioAssay)job.getParameterValue("alignment"); |
5692 |
29 Oct 19 |
nicklas |
372 |
aligned = DerivedBioAssay.getById(dc, aligned.getId()); |
5692 |
29 Oct 19 |
nicklas |
373 |
Software software = (Software)job.getParameterValue("software"); |
5715 |
12 Nov 19 |
nicklas |
374 |
boolean rawCallingSkipped = Boolean.TRUE.equals(job.getParameterValue("rawCallingSkipped")); |
5716 |
12 Nov 19 |
nicklas |
375 |
boolean filterSkipped = Boolean.TRUE.equals(job.getParameterValue("filterSkipped")); |
5720 |
13 Nov 19 |
nicklas |
376 |
RawBioAssay raw = null; |
5720 |
13 Nov 19 |
nicklas |
377 |
if (!filterSkipped) |
5720 |
13 Nov 19 |
nicklas |
378 |
{ |
5720 |
13 Nov 19 |
nicklas |
379 |
Rawbioassay rawVCall = Rawbioassay.getByJob(dc, job); |
5720 |
13 Nov 19 |
nicklas |
380 |
raw = rawVCall.getItem(); |
5720 |
13 Nov 19 |
nicklas |
381 |
} |
5692 |
29 Oct 19 |
nicklas |
382 |
|
5692 |
29 Oct 19 |
nicklas |
// Create file links |
5692 |
29 Oct 19 |
nicklas |
384 |
boolean useExternalProjectArchive = Reggie.isExternalItem(aligned.getName()); |
5692 |
29 Oct 19 |
nicklas |
385 |
FileServer fileArchive = useExternalProjectArchive ? Fileserver.EXTERNAL_ARCHIVE.load(dc) : Fileserver.PROJECT_ARCHIVE.load(dc); |
5692 |
29 Oct 19 |
nicklas |
386 |
String analysisDir = useExternalProjectArchive ? Reggie.EXTERNAL_ANALYSIS_DIR : Reggie.SECONDARY_ANALYSIS_DIR; |
5692 |
29 Oct 19 |
nicklas |
387 |
|
5720 |
13 Nov 19 |
nicklas |
388 |
ParseFiles pf = new ParseFiles(); |
5720 |
13 Nov 19 |
nicklas |
389 |
pf.stat = Stats.parse(statsOut); |
5720 |
13 Nov 19 |
nicklas |
390 |
pf.software = software; |
5720 |
13 Nov 19 |
nicklas |
391 |
pf.fileArchive = fileArchive; |
5720 |
13 Nov 19 |
nicklas |
392 |
pf.vcfType = Subtype.VARIANT_CALL_FORMAT.load(dc); |
5720 |
13 Nov 19 |
nicklas |
393 |
pf.vcfData = Datafiletype.VCF.load(dc); |
5701 |
05 Nov 19 |
nicklas |
394 |
|
5720 |
13 Nov 19 |
nicklas |
395 |
if (!rawCallingSkipped) |
5701 |
05 Nov 19 |
nicklas |
396 |
{ |
5720 |
13 Nov 19 |
nicklas |
397 |
FileOwner alignedOwner = FileOwner.create(dc, aligned, analysisDir); |
5794 |
16 Dec 19 |
nicklas |
398 |
msg = pf.parseFiles(dc, alignedOwner, filesOut, Set.of("variants-callable.bed", "variants-raw.vcf.gz")); |
5721 |
13 Nov 19 |
nicklas |
399 |
Annotationtype.CALLABLE_BASES.setAnnotationValue(dc, aligned, pf.stat.numCallableBases); |
5721 |
13 Nov 19 |
nicklas |
400 |
Annotationtype.VARIANTS_RAW.setAnnotationValue(dc, aligned, pf.stat.numRawVariants); |
5720 |
13 Nov 19 |
nicklas |
401 |
} |
5872 |
23 Mar 20 |
nicklas |
402 |
int maxMutationSignatureIndex = -1; |
5720 |
13 Nov 19 |
nicklas |
403 |
if (!filterSkipped) |
5720 |
13 Nov 19 |
nicklas |
404 |
{ |
5720 |
13 Nov 19 |
nicklas |
405 |
FileOwner vCallOwner = FileOwner.create(dc, raw, analysisDir); |
5794 |
16 Dec 19 |
nicklas |
406 |
msg = pf.parseFiles(dc, vCallOwner, filesOut, Set.of("variants-annotated.vcf.gz", "variants-filtered.vcf", "mutation_signature.txt", "mutation_signature.pdf")); |
5721 |
13 Nov 19 |
nicklas |
407 |
Annotationtype.VARIANTS_PASSED_FILTER.setAnnotationValue(dc, raw, pf.stat.numFiltered); |
5872 |
23 Mar 20 |
nicklas |
408 |
maxMutationSignatureIndex = pf.updateMutationSignatures(dc, raw); |
5872 |
23 Mar 20 |
nicklas |
409 |
pf.variants.updateVariantAnnotations(dc, raw); |
5720 |
13 Nov 19 |
nicklas |
410 |
} |
5720 |
13 Nov 19 |
nicklas |
411 |
|
5720 |
13 Nov 19 |
nicklas |
412 |
if (msg == null) |
5720 |
13 Nov 19 |
nicklas |
413 |
{ |
5720 |
13 Nov 19 |
nicklas |
414 |
if (rawCallingSkipped) |
5701 |
05 Nov 19 |
nicklas |
415 |
{ |
5794 |
16 Dec 19 |
nicklas |
416 |
msg = "Used existing raw variants; annotated " + pf.stat.numAnnotated + " variants; " + pf.stat.numFiltered + " passed filter"; |
5701 |
05 Nov 19 |
nicklas |
417 |
} |
5720 |
13 Nov 19 |
nicklas |
418 |
else if (filterSkipped) |
5701 |
05 Nov 19 |
nicklas |
419 |
{ |
5721 |
13 Nov 19 |
nicklas |
420 |
msg = "Found " + pf.stat.numRawVariants + " variants; skipped filtering; "; |
5794 |
16 Dec 19 |
nicklas |
421 |
msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases"; |
5701 |
05 Nov 19 |
nicklas |
422 |
} |
5720 |
13 Nov 19 |
nicklas |
423 |
else |
5716 |
12 Nov 19 |
nicklas |
424 |
{ |
5794 |
16 Dec 19 |
nicklas |
425 |
msg = "Found " + pf.stat.numRawVariants + " variants; " + pf.stat.numFiltered + " passed filter "; |
5794 |
16 Dec 19 |
nicklas |
426 |
msg += Values.formatNumber(pf.stat.numCallableBases/1000000f, 1) + "M callable bases"; |
5716 |
12 Nov 19 |
nicklas |
427 |
} |
5872 |
23 Mar 20 |
nicklas |
428 |
if (maxMutationSignatureIndex != -1) |
5794 |
16 Dec 19 |
nicklas |
429 |
{ |
5872 |
23 Mar 20 |
nicklas |
430 |
Float score = pf.mutationSignatureScore[maxMutationSignatureIndex]; |
5872 |
23 Mar 20 |
nicklas |
431 |
Annotationtype maxMutSig = Annotationtype.mutationSignature(maxMutationSignatureIndex+1); |
5872 |
23 Mar 20 |
nicklas |
432 |
msg += "; " + maxMutSig.getName() + "=" + Values.formatNumber(score, 2); |
5794 |
16 Dec 19 |
nicklas |
433 |
} |
5794 |
16 Dec 19 |
nicklas |
434 |
msg += "."; |
5701 |
05 Nov 19 |
nicklas |
435 |
} |
5720 |
13 Nov 19 |
nicklas |
436 |
dc.commit(); |
5720 |
13 Nov 19 |
nicklas |
437 |
} |
5720 |
13 Nov 19 |
nicklas |
438 |
finally |
5720 |
13 Nov 19 |
nicklas |
439 |
{ |
5720 |
13 Nov 19 |
nicklas |
440 |
if (dc != null) dc.close(); |
5720 |
13 Nov 19 |
nicklas |
441 |
} |
5720 |
13 Nov 19 |
nicklas |
442 |
|
5720 |
13 Nov 19 |
nicklas |
443 |
return msg == null ? "" : msg; |
5720 |
13 Nov 19 |
nicklas |
444 |
} |
5720 |
13 Nov 19 |
nicklas |
445 |
|
5720 |
13 Nov 19 |
nicklas |
446 |
|
5720 |
13 Nov 19 |
nicklas |
447 |
|
5720 |
13 Nov 19 |
nicklas |
448 |
} |
5720 |
13 Nov 19 |
nicklas |
449 |
|
5720 |
13 Nov 19 |
nicklas |
450 |
/** |
5720 |
13 Nov 19 |
nicklas |
Statistics from the variant calling. Parsed from |
5720 |
13 Nov 19 |
nicklas |
stats.out. |
5720 |
13 Nov 19 |
nicklas |
453 |
*/ |
5720 |
13 Nov 19 |
nicklas |
454 |
static class Stats |
5720 |
13 Nov 19 |
nicklas |
455 |
{ |
5720 |
13 Nov 19 |
nicklas |
456 |
int numCallableBases = 0; |
5721 |
13 Nov 19 |
nicklas |
457 |
int numRawVariants = 0; |
5720 |
13 Nov 19 |
nicklas |
458 |
int numAnnotated = 0; |
5720 |
13 Nov 19 |
nicklas |
459 |
int numFiltered = 0; |
5720 |
13 Nov 19 |
nicklas |
460 |
|
5720 |
13 Nov 19 |
nicklas |
461 |
static Stats parse(String statsOut) |
5720 |
13 Nov 19 |
nicklas |
462 |
{ |
5720 |
13 Nov 19 |
nicklas |
463 |
Stats s = new Stats(); |
5720 |
13 Nov 19 |
nicklas |
464 |
for (String line : statsOut.split("\n")) |
5720 |
13 Nov 19 |
nicklas |
465 |
{ |
5720 |
13 Nov 19 |
nicklas |
466 |
String[] stat = line.split(":", 2); |
5720 |
13 Nov 19 |
nicklas |
467 |
String key = stat[0].strip(); |
5720 |
13 Nov 19 |
nicklas |
468 |
int val = Values.getInt(stat[1].strip()); |
5720 |
13 Nov 19 |
nicklas |
469 |
if ("Callable bases".equals(key)) |
5720 |
13 Nov 19 |
nicklas |
470 |
{ |
5720 |
13 Nov 19 |
nicklas |
471 |
s.numCallableBases = val; |
5720 |
13 Nov 19 |
nicklas |
472 |
} |
5721 |
13 Nov 19 |
nicklas |
473 |
else if ("Raw variants".equals(key)) |
5720 |
13 Nov 19 |
nicklas |
474 |
{ |
5721 |
13 Nov 19 |
nicklas |
475 |
s.numRawVariants = val; |
5720 |
13 Nov 19 |
nicklas |
476 |
} |
5720 |
13 Nov 19 |
nicklas |
477 |
else if ("Annotated variants".equals(key)) |
5720 |
13 Nov 19 |
nicklas |
478 |
{ |
5720 |
13 Nov 19 |
nicklas |
479 |
s.numAnnotated = val; |
5720 |
13 Nov 19 |
nicklas |
480 |
} |
5720 |
13 Nov 19 |
nicklas |
481 |
else if ("Filtered variants".equals(key)) |
5720 |
13 Nov 19 |
nicklas |
482 |
{ |
5720 |
13 Nov 19 |
nicklas |
483 |
s.numFiltered = val; |
5720 |
13 Nov 19 |
nicklas |
484 |
} |
5720 |
13 Nov 19 |
nicklas |
485 |
} |
5720 |
13 Nov 19 |
nicklas |
486 |
return s; |
5720 |
13 Nov 19 |
nicklas |
487 |
} |
5720 |
13 Nov 19 |
nicklas |
488 |
|
5720 |
13 Nov 19 |
nicklas |
489 |
} |
5720 |
13 Nov 19 |
nicklas |
490 |
|
5720 |
13 Nov 19 |
nicklas |
491 |
/** |
5720 |
13 Nov 19 |
nicklas |
Helper class for wrapping information about were files to an |
5720 |
13 Nov 19 |
nicklas |
item should go. From the owner 'item' we get the DataFilesFolder |
5720 |
13 Nov 19 |
nicklas |
annotation and the corresponding local directory in the BASE file |
5720 |
13 Nov 19 |
nicklas |
system. |
5720 |
13 Nov 19 |
nicklas |
496 |
*/ |
5720 |
13 Nov 19 |
nicklas |
497 |
static class FileOwner |
5720 |
13 Nov 19 |
nicklas |
498 |
{ |
5720 |
13 Nov 19 |
nicklas |
499 |
|
5720 |
13 Nov 19 |
nicklas |
500 |
static FileOwner create(DbControl dc, BasicItem item, String analysisDir) |
5720 |
13 Nov 19 |
nicklas |
501 |
{ |
5720 |
13 Nov 19 |
nicklas |
502 |
FileOwner owner = new FileOwner(); |
5720 |
13 Nov 19 |
nicklas |
503 |
owner.item = item; |
5720 |
13 Nov 19 |
nicklas |
504 |
owner.dataFilesFolder = (String)Annotationtype.DATA_FILES_FOLDER.getAnnotationValue(dc, (Annotatable)item); |
5720 |
13 Nov 19 |
nicklas |
505 |
String baseFolder = Reggie.convertDataFilesFolderToBaseFolder(owner.dataFilesFolder); |
5720 |
13 Nov 19 |
nicklas |
506 |
owner.localDataDir = Directory.getNew(dc, new Path(analysisDir+baseFolder, Path.Type.DIRECTORY)); |
5720 |
13 Nov 19 |
nicklas |
507 |
return owner; |
5720 |
13 Nov 19 |
nicklas |
508 |
} |
5720 |
13 Nov 19 |
nicklas |
509 |
|
5720 |
13 Nov 19 |
nicklas |
510 |
BasicItem item; |
5720 |
13 Nov 19 |
nicklas |
511 |
String dataFilesFolder; |
5720 |
13 Nov 19 |
nicklas |
512 |
Directory localDataDir; |
5720 |
13 Nov 19 |
nicklas |
513 |
} |
5720 |
13 Nov 19 |
nicklas |
514 |
|
5720 |
13 Nov 19 |
nicklas |
515 |
/** |
5720 |
13 Nov 19 |
nicklas |
Helper class for parsing the 'files.out' file and attaching files |
5720 |
13 Nov 19 |
nicklas |
to the specified owner. A filter with filenames need to be specified. |
5720 |
13 Nov 19 |
nicklas |
518 |
*/ |
5720 |
13 Nov 19 |
nicklas |
519 |
static class ParseFiles |
5720 |
13 Nov 19 |
nicklas |
520 |
{ |
5720 |
13 Nov 19 |
nicklas |
521 |
Software software; |
5720 |
13 Nov 19 |
nicklas |
522 |
FileServer fileArchive; |
5720 |
13 Nov 19 |
nicklas |
523 |
Stats stat; |
5720 |
13 Nov 19 |
nicklas |
524 |
ItemSubtype vcfType; |
5794 |
16 Dec 19 |
nicklas |
525 |
DataFileType vcfData; |
5794 |
16 Dec 19 |
nicklas |
526 |
Float[] mutationSignatureScore = new Float[Annotationtype.NUM_MUTATION_SIGNATURES]; |
5872 |
23 Mar 20 |
nicklas |
// key=GENE, value=list of variants found in that gene |
5872 |
23 Mar 20 |
nicklas |
528 |
VariantCollector variants; |
5720 |
13 Nov 19 |
nicklas |
529 |
|
5720 |
13 Nov 19 |
nicklas |
530 |
String parseFiles(DbControl dc, FileOwner owner, String filesOut, Set<String> filenames) |
5720 |
13 Nov 19 |
nicklas |
531 |
{ |
5720 |
13 Nov 19 |
nicklas |
532 |
String msg = null; |
5720 |
13 Nov 19 |
nicklas |
533 |
|
5720 |
13 Nov 19 |
nicklas |
534 |
for (String line : filesOut.split("\n")) |
5720 |
13 Nov 19 |
nicklas |
535 |
{ |
5720 |
13 Nov 19 |
nicklas |
536 |
String filename = line.substring(line.lastIndexOf("/")+1); |
5720 |
13 Nov 19 |
nicklas |
537 |
if (!filenames.contains(filename)) continue; // Skip files not owned by this owner |
5701 |
05 Nov 19 |
nicklas |
538 |
|
5720 |
13 Nov 19 |
nicklas |
539 |
File f = File.getFile(dc, owner.localDataDir, filename, true); |
5720 |
13 Nov 19 |
nicklas |
540 |
f.setFileServer(fileArchive); |
5720 |
13 Nov 19 |
nicklas |
541 |
String fileUrl = "sftp://" + fileArchive.getHost() + owner.dataFilesFolder + "/" + f.getName(); |
5720 |
13 Nov 19 |
nicklas |
542 |
try |
5692 |
29 Oct 19 |
nicklas |
543 |
{ |
5720 |
13 Nov 19 |
nicklas |
544 |
f.setUrl(fileUrl, true); |
5720 |
13 Nov 19 |
nicklas |
545 |
} |
5720 |
13 Nov 19 |
nicklas |
546 |
catch (RuntimeException ex) |
5720 |
13 Nov 19 |
nicklas |
547 |
{ |
5720 |
13 Nov 19 |
nicklas |
548 |
f.setUrl(fileUrl, false); |
5720 |
13 Nov 19 |
nicklas |
549 |
} |
5720 |
13 Nov 19 |
nicklas |
550 |
|
5720 |
13 Nov 19 |
nicklas |
551 |
if (!f.isInDatabase()) |
5720 |
13 Nov 19 |
nicklas |
552 |
{ |
5720 |
13 Nov 19 |
nicklas |
553 |
dc.saveItem(f); |
5720 |
13 Nov 19 |
nicklas |
554 |
} |
5720 |
13 Nov 19 |
nicklas |
555 |
|
5720 |
13 Nov 19 |
nicklas |
556 |
if (filename.equals("variants-filtered.vcf")) |
5720 |
13 Nov 19 |
nicklas |
557 |
{ |
5720 |
13 Nov 19 |
nicklas |
558 |
RawBioAssay raw = (RawBioAssay)owner.item; |
5720 |
13 Nov 19 |
nicklas |
559 |
FileSetMember member = raw.getFileSet().addMember(f, vcfData); |
5692 |
29 Oct 19 |
nicklas |
560 |
try |
5692 |
29 Oct 19 |
nicklas |
561 |
{ |
5720 |
13 Nov 19 |
nicklas |
562 |
VcfData x = parseVcf(f); |
5720 |
13 Nov 19 |
nicklas |
563 |
member.setValid(true, null); |
5720 |
13 Nov 19 |
nicklas |
564 |
raw.setNumFileSpots(x.getGtCount()); |
5692 |
29 Oct 19 |
nicklas |
565 |
} |
5720 |
13 Nov 19 |
nicklas |
566 |
catch (IOException | RuntimeException ex) |
5692 |
29 Oct 19 |
nicklas |
567 |
{ |
5720 |
13 Nov 19 |
nicklas |
568 |
msg = "Could not parse " + filename + " (" + ex.getMessage() + ")"; |
5720 |
13 Nov 19 |
nicklas |
569 |
member.setValid(false, ex.getMessage()); |
7079 |
27 Mar 23 |
nicklas |
570 |
VariantCallJobCompletionHandler.logger.warn("Could not parse file: " + f, ex); |
5692 |
29 Oct 19 |
nicklas |
571 |
} |
5720 |
13 Nov 19 |
nicklas |
572 |
} |
5720 |
13 Nov 19 |
nicklas |
573 |
else |
5720 |
13 Nov 19 |
nicklas |
574 |
{ |
5720 |
13 Nov 19 |
nicklas |
575 |
AnyToAny link = AnyToAny.getNewOrExisting(dc, owner.item, f.getName(), f, false); |
5692 |
29 Oct 19 |
nicklas |
576 |
if (!link.isInDatabase()) dc.saveItem(link); |
5693 |
30 Oct 19 |
nicklas |
577 |
if (software != null) |
5693 |
30 Oct 19 |
nicklas |
578 |
{ |
5693 |
30 Oct 19 |
nicklas |
579 |
link.setDescription("Created with " + software.getName()); |
5693 |
30 Oct 19 |
nicklas |
580 |
} |
5692 |
29 Oct 19 |
nicklas |
581 |
} |
5794 |
16 Dec 19 |
nicklas |
582 |
if (filename.equals("mutation_signature.txt")) |
5794 |
16 Dec 19 |
nicklas |
583 |
{ |
5794 |
16 Dec 19 |
nicklas |
584 |
try |
5794 |
16 Dec 19 |
nicklas |
585 |
{ |
5794 |
16 Dec 19 |
nicklas |
586 |
parseMutationSignatureScores(f); |
5794 |
16 Dec 19 |
nicklas |
587 |
} |
5794 |
16 Dec 19 |
nicklas |
588 |
catch (IOException | RuntimeException ex) |
5794 |
16 Dec 19 |
nicklas |
589 |
{ |
5794 |
16 Dec 19 |
nicklas |
590 |
msg = "Could not parse " + filename + " (" + ex.getMessage() + ")"; |
7079 |
27 Mar 23 |
nicklas |
591 |
VariantCallJobCompletionHandler.logger.warn("Could not parse file: " + f, ex); |
5794 |
16 Dec 19 |
nicklas |
592 |
} |
5794 |
16 Dec 19 |
nicklas |
593 |
} |
5720 |
13 Nov 19 |
nicklas |
594 |
|
5720 |
13 Nov 19 |
nicklas |
595 |
if (filename.endsWith(".vcf.gz")) f.setMimeTypeAuto("application/x-gzip", vcfType); |
5720 |
13 Nov 19 |
nicklas |
596 |
if (filename.endsWith(".vcf")) f.setMimeTypeAuto("text/plain", vcfType); |
5720 |
13 Nov 19 |
nicklas |
597 |
|
5721 |
13 Nov 19 |
nicklas |
598 |
if (filename.equals("variants-raw.vcf.gz")) |
5715 |
12 Nov 19 |
nicklas |
599 |
{ |
5721 |
13 Nov 19 |
nicklas |
600 |
f.setDescription(stat.numRawVariants + " variants."); |
5715 |
12 Nov 19 |
nicklas |
601 |
} |
5720 |
13 Nov 19 |
nicklas |
602 |
else if (filename.equals("variants-annotated.vcf.gz")) |
5716 |
12 Nov 19 |
nicklas |
603 |
{ |
5720 |
13 Nov 19 |
nicklas |
604 |
f.setDescription(stat.numAnnotated + " variants."); |
5716 |
12 Nov 19 |
nicklas |
605 |
} |
5720 |
13 Nov 19 |
nicklas |
606 |
else if (filename.equals("variants-filtered.vcf")) |
5715 |
12 Nov 19 |
nicklas |
607 |
{ |
5720 |
13 Nov 19 |
nicklas |
608 |
f.setDescription(stat.numFiltered + " variants."); |
5715 |
12 Nov 19 |
nicklas |
609 |
} |
5720 |
13 Nov 19 |
nicklas |
610 |
else if (filename.equals("variants-callable.bed")) |
5720 |
13 Nov 19 |
nicklas |
611 |
{ |
5720 |
13 Nov 19 |
nicklas |
612 |
f.setDescription(Values.formatNumber(stat.numCallableBases/1000000f, 1) + "M callable bases"); |
5720 |
13 Nov 19 |
nicklas |
613 |
} |
5692 |
29 Oct 19 |
nicklas |
614 |
} |
5720 |
13 Nov 19 |
nicklas |
615 |
|
5720 |
13 Nov 19 |
nicklas |
616 |
return msg; |
5684 |
25 Oct 19 |
nicklas |
617 |
} |
5684 |
25 Oct 19 |
nicklas |
618 |
|
5692 |
29 Oct 19 |
nicklas |
619 |
/** |
5720 |
13 Nov 19 |
nicklas |
Parses the VCF file for validation. |
5692 |
29 Oct 19 |
nicklas |
621 |
*/ |
5720 |
13 Nov 19 |
nicklas |
622 |
private VcfData parseVcf(File vcfFile) |
5720 |
13 Nov 19 |
nicklas |
623 |
throws IOException |
5692 |
29 Oct 19 |
nicklas |
624 |
{ |
5872 |
23 Mar 20 |
nicklas |
625 |
variants = new VariantCollector(); |
5720 |
13 Nov 19 |
nicklas |
626 |
VcfParser parser = new VcfParser(); |
5872 |
23 Mar 20 |
nicklas |
627 |
parser.setInfoFactory(new VariantCallingInfoFactory(Arrays.asList("ncbiRefSeq"), null)); |
5872 |
23 Mar 20 |
nicklas |
628 |
|
5692 |
29 Oct 19 |
nicklas |
629 |
InputStream fromFileServer = null; |
5692 |
29 Oct 19 |
nicklas |
630 |
VcfData vcfData = null; |
5692 |
29 Oct 19 |
nicklas |
631 |
try |
5692 |
29 Oct 19 |
nicklas |
632 |
{ |
5720 |
13 Nov 19 |
nicklas |
633 |
fromFileServer = vcfFile.getDownloadStream(0); |
5720 |
13 Nov 19 |
nicklas |
634 |
if (vcfFile.getName().endsWith(".gz")) |
5720 |
13 Nov 19 |
nicklas |
635 |
{ |
5720 |
13 Nov 19 |
nicklas |
636 |
fromFileServer = new GZIPInputStream(fromFileServer); |
5720 |
13 Nov 19 |
nicklas |
637 |
} |
5692 |
29 Oct 19 |
nicklas |
638 |
parser.setUseLineNoAsId(true); |
5872 |
23 Mar 20 |
nicklas |
639 |
vcfData = parser.parse(fromFileServer, vcfFile.getName(), variants); |
5692 |
29 Oct 19 |
nicklas |
640 |
} |
5692 |
29 Oct 19 |
nicklas |
641 |
finally |
5692 |
29 Oct 19 |
nicklas |
642 |
{ |
5692 |
29 Oct 19 |
nicklas |
643 |
FileUtil.close(fromFileServer); |
5692 |
29 Oct 19 |
nicklas |
644 |
} |
5692 |
29 Oct 19 |
nicklas |
645 |
return vcfData; |
5692 |
29 Oct 19 |
nicklas |
646 |
} |
5794 |
16 Dec 19 |
nicklas |
647 |
|
5794 |
16 Dec 19 |
nicklas |
648 |
/** |
5794 |
16 Dec 19 |
nicklas |
Parse the mutation signature file. Lines should have pattern: |
5794 |
16 Dec 19 |
nicklas |
650 |
|
5794 |
16 Dec 19 |
nicklas |
Signature.<NN><tab><score> |
5794 |
16 Dec 19 |
nicklas |
652 |
|
5794 |
16 Dec 19 |
nicklas |
where <NN> is the signature index and <score> is a positive |
5794 |
16 Dec 19 |
nicklas |
floating point number. Lines with '0' score are parsed as null. |
5794 |
16 Dec 19 |
nicklas |
655 |
*/ |
5794 |
16 Dec 19 |
nicklas |
656 |
private void parseMutationSignatureScores(File f) |
5794 |
16 Dec 19 |
nicklas |
657 |
throws IOException |
5794 |
16 Dec 19 |
nicklas |
658 |
{ |
5794 |
16 Dec 19 |
nicklas |
659 |
InputStream in = null; |
5794 |
16 Dec 19 |
nicklas |
660 |
try |
5794 |
16 Dec 19 |
nicklas |
661 |
{ |
5794 |
16 Dec 19 |
nicklas |
662 |
Pattern p = Pattern.compile("Signature\\.(\\d+)\\t([0-9.]+)"); |
5794 |
16 Dec 19 |
nicklas |
663 |
in = f.getDownloadStream(0); |
5794 |
16 Dec 19 |
nicklas |
664 |
BufferedReader r = new BufferedReader(new InputStreamReader(in, "UTF-8")); |
5794 |
16 Dec 19 |
nicklas |
665 |
|
5794 |
16 Dec 19 |
nicklas |
666 |
String line = r.readLine(); |
5794 |
16 Dec 19 |
nicklas |
667 |
while (line != null) |
5794 |
16 Dec 19 |
nicklas |
668 |
{ |
5794 |
16 Dec 19 |
nicklas |
669 |
Matcher m = p.matcher(line); |
5794 |
16 Dec 19 |
nicklas |
670 |
if (m.matches()) |
5794 |
16 Dec 19 |
nicklas |
671 |
{ |
6015 |
29 Sep 20 |
nicklas |
672 |
int index = Values.getInt(m.group(1))-1; |
5794 |
16 Dec 19 |
nicklas |
673 |
float score = Values.getFloat(m.group(2)); |
6015 |
29 Sep 20 |
nicklas |
674 |
if (score > 0 && index >= 0 && index < mutationSignatureScore.length) |
5794 |
16 Dec 19 |
nicklas |
675 |
{ |
5794 |
16 Dec 19 |
nicklas |
676 |
mutationSignatureScore[index] = score; |
5794 |
16 Dec 19 |
nicklas |
677 |
} |
5794 |
16 Dec 19 |
nicklas |
678 |
} |
5794 |
16 Dec 19 |
nicklas |
679 |
line = r.readLine(); |
5794 |
16 Dec 19 |
nicklas |
680 |
} |
5794 |
16 Dec 19 |
nicklas |
681 |
} |
5794 |
16 Dec 19 |
nicklas |
682 |
finally |
5794 |
16 Dec 19 |
nicklas |
683 |
{ |
5794 |
16 Dec 19 |
nicklas |
684 |
FileUtil.close(in); |
5794 |
16 Dec 19 |
nicklas |
685 |
} |
5872 |
23 Mar 20 |
nicklas |
686 |
} |
5872 |
23 Mar 20 |
nicklas |
687 |
|
5872 |
23 Mar 20 |
nicklas |
688 |
int updateMutationSignatures(DbControl dc, RawBioAssay raw) |
5872 |
23 Mar 20 |
nicklas |
689 |
{ |
5872 |
23 Mar 20 |
nicklas |
690 |
Float maxScore = null; |
5872 |
23 Mar 20 |
nicklas |
691 |
int maxIndex = -1; |
5794 |
16 Dec 19 |
nicklas |
692 |
|
5872 |
23 Mar 20 |
nicklas |
693 |
for (int index = 0; index < mutationSignatureScore.length; index++) |
5872 |
23 Mar 20 |
nicklas |
694 |
{ |
5872 |
23 Mar 20 |
nicklas |
695 |
Float score = mutationSignatureScore[index]; |
5872 |
23 Mar 20 |
nicklas |
696 |
if (score != null) |
5872 |
23 Mar 20 |
nicklas |
697 |
{ |
5872 |
23 Mar 20 |
nicklas |
698 |
Annotationtype.mutationSignature(index+1).setAnnotationValue(dc, raw, score); |
5872 |
23 Mar 20 |
nicklas |
699 |
if (maxScore == null || maxScore < score) |
5872 |
23 Mar 20 |
nicklas |
700 |
{ |
5872 |
23 Mar 20 |
nicklas |
701 |
maxScore = score; |
5872 |
23 Mar 20 |
nicklas |
702 |
maxIndex = index; |
5872 |
23 Mar 20 |
nicklas |
703 |
} |
5872 |
23 Mar 20 |
nicklas |
704 |
} |
5872 |
23 Mar 20 |
nicklas |
705 |
} |
5872 |
23 Mar 20 |
nicklas |
706 |
return maxIndex; |
5794 |
16 Dec 19 |
nicklas |
707 |
} |
5872 |
23 Mar 20 |
nicklas |
708 |
|
5684 |
25 Oct 19 |
nicklas |
709 |
} |
5872 |
23 Mar 20 |
nicklas |
710 |
|
5872 |
23 Mar 20 |
nicklas |
711 |
public static class VariantCollector |
5872 |
23 Mar 20 |
nicklas |
712 |
implements VcfDataFilter |
5872 |
23 Mar 20 |
nicklas |
713 |
{ |
5720 |
13 Nov 19 |
nicklas |
714 |
|
5872 |
23 Mar 20 |
nicklas |
715 |
private final Set<String> genes; |
5872 |
23 Mar 20 |
nicklas |
716 |
private final Set<String> esr1; |
5872 |
23 Mar 20 |
nicklas |
717 |
|
5872 |
23 Mar 20 |
nicklas |
718 |
private final Map<String, List<SnpData>> found; |
5872 |
23 Mar 20 |
nicklas |
719 |
|
5872 |
23 Mar 20 |
nicklas |
720 |
public VariantCollector() |
5872 |
23 Mar 20 |
nicklas |
721 |
{ |
5872 |
23 Mar 20 |
nicklas |
722 |
this.genes = new HashSet<>(Arrays.asList( |
5872 |
23 Mar 20 |
nicklas |
723 |
"AKT1", "ARHGAP35", "ARID1A", "CBFB", "CDH1", |
5872 |
23 Mar 20 |
nicklas |
724 |
"ERBB2", "FOXA1", "GATA3", "KMT2C", "MAP3K1", |
5872 |
23 Mar 20 |
nicklas |
725 |
"NFIC", "PIK3CA", "PLEC", "PTEN", "RNF213", |
5872 |
23 Mar 20 |
nicklas |
726 |
"RUNX1", "SF3B1", "TBX3", "TP53", "TRPS1")); |
5872 |
23 Mar 20 |
nicklas |
727 |
this.esr1 = new HashSet<>(Arrays.asList( |
5872 |
23 Mar 20 |
nicklas |
728 |
"chr6:152011697:G>C", // (E380Q) |
5872 |
23 Mar 20 |
nicklas |
729 |
"chr6:152011733:G>A", // (V392I) |
5872 |
23 Mar 20 |
nicklas |
730 |
"chr6:152061020:ATGG>A", // (V422del) |
5872 |
23 Mar 20 |
nicklas |
731 |
"chr6:152094402:T>C", // (S463P) |
5872 |
23 Mar 20 |
nicklas |
732 |
"chr6:152098775:G>A", // (V533M) |
5872 |
23 Mar 20 |
nicklas |
733 |
"chr6:152098779:T>A", // (V534E) |
5872 |
23 Mar 20 |
nicklas |
734 |
"chr6:152098785:TC>AG", // (L536Q) |
5872 |
23 Mar 20 |
nicklas |
735 |
"chr6:152098785:T>C", // (L536P) |
5872 |
23 Mar 20 |
nicklas |
736 |
"chr6:152098785:T>G", // (L536R) |
5872 |
23 Mar 20 |
nicklas |
737 |
"chr6:152098787:T>G", // (Y537D) |
5872 |
23 Mar 20 |
nicklas |
738 |
"chr6:152098787:T>A", // (Y537N) |
5872 |
23 Mar 20 |
nicklas |
739 |
"chr6:152098788:A>C", // (Y537S) |
5872 |
23 Mar 20 |
nicklas |
740 |
"chr6:152098791:A>G" // (D538G) |
5872 |
23 Mar 20 |
nicklas |
741 |
)); |
5872 |
23 Mar 20 |
nicklas |
742 |
|
5872 |
23 Mar 20 |
nicklas |
743 |
this.found = new HashMap<>(); |
5872 |
23 Mar 20 |
nicklas |
744 |
} |
5872 |
23 Mar 20 |
nicklas |
745 |
|
5872 |
23 Mar 20 |
nicklas |
746 |
@Override |
5872 |
23 Mar 20 |
nicklas |
747 |
public boolean accept(GtData gt, SnpData snp) |
5872 |
23 Mar 20 |
nicklas |
748 |
{ |
5872 |
23 Mar 20 |
nicklas |
749 |
String gene = (String)snp.getInfo().getInfo("ncbiRefSeq"); |
5872 |
23 Mar 20 |
nicklas |
750 |
String key = snp.getChromosome()+":"+snp.getPosition()+":"+snp.getRef()+">"+snp.getAlt(); |
5872 |
23 Mar 20 |
nicklas |
751 |
boolean add = genes.contains(gene) || esr1.contains(key); |
5872 |
23 Mar 20 |
nicklas |
752 |
if (add) |
5872 |
23 Mar 20 |
nicklas |
753 |
{ |
5872 |
23 Mar 20 |
nicklas |
754 |
List<SnpData> list = found.get(gene); |
5872 |
23 Mar 20 |
nicklas |
755 |
if (list == null) |
5872 |
23 Mar 20 |
nicklas |
756 |
{ |
5872 |
23 Mar 20 |
nicklas |
757 |
list = new ArrayList<>(); |
5872 |
23 Mar 20 |
nicklas |
758 |
found.put(gene, list); |
5872 |
23 Mar 20 |
nicklas |
759 |
} |
5872 |
23 Mar 20 |
nicklas |
760 |
list.add(snp); |
5872 |
23 Mar 20 |
nicklas |
761 |
} |
5872 |
23 Mar 20 |
nicklas |
762 |
return true; |
5872 |
23 Mar 20 |
nicklas |
763 |
} |
5872 |
23 Mar 20 |
nicklas |
764 |
|
5872 |
23 Mar 20 |
nicklas |
765 |
public Set<String> getGenes() |
5872 |
23 Mar 20 |
nicklas |
766 |
{ |
5872 |
23 Mar 20 |
nicklas |
767 |
return genes; |
5872 |
23 Mar 20 |
nicklas |
768 |
} |
5872 |
23 Mar 20 |
nicklas |
769 |
|
5877 |
24 Mar 20 |
nicklas |
770 |
public int updateVariantAnnotations(DbControl dc, RawBioAssay raw) |
5872 |
23 Mar 20 |
nicklas |
771 |
{ |
5872 |
23 Mar 20 |
nicklas |
772 |
Formatter<SnpData> ff = new VariantFormatter(); |
5877 |
24 Mar 20 |
nicklas |
773 |
int numVariants = 0; |
5872 |
23 Mar 20 |
nicklas |
774 |
for (Map.Entry<String, List<SnpData>> entry : found.entrySet()) |
5872 |
23 Mar 20 |
nicklas |
775 |
{ |
5872 |
23 Mar 20 |
nicklas |
776 |
Annotationtype at = Annotationtype.geneVariantAnnotation(entry.getKey()); |
5877 |
24 Mar 20 |
nicklas |
777 |
numVariants += entry.getValue().size(); |
5872 |
23 Mar 20 |
nicklas |
778 |
String value = Values.getString(entry.getValue(), ", ", false, ff); |
5872 |
23 Mar 20 |
nicklas |
779 |
at.setAnnotationValue(dc, raw, value); |
5872 |
23 Mar 20 |
nicklas |
780 |
} |
5877 |
24 Mar 20 |
nicklas |
781 |
return numVariants; |
5872 |
23 Mar 20 |
nicklas |
782 |
} |
5872 |
23 Mar 20 |
nicklas |
783 |
|
5872 |
23 Mar 20 |
nicklas |
784 |
} |
5872 |
23 Mar 20 |
nicklas |
785 |
|
5872 |
23 Mar 20 |
nicklas |
786 |
static class VariantFormatter |
5872 |
23 Mar 20 |
nicklas |
787 |
implements Formatter<SnpData> |
5872 |
23 Mar 20 |
nicklas |
788 |
{ |
5872 |
23 Mar 20 |
nicklas |
789 |
@Override |
5872 |
23 Mar 20 |
nicklas |
790 |
public String format(SnpData snp) |
5872 |
23 Mar 20 |
nicklas |
791 |
{ |
5872 |
23 Mar 20 |
nicklas |
792 |
String ref = snp.getRef(); |
5872 |
23 Mar 20 |
nicklas |
793 |
String alt = snp.getAlt(); |
5872 |
23 Mar 20 |
nicklas |
794 |
if (ref.length() > 8) |
5872 |
23 Mar 20 |
nicklas |
795 |
{ |
5872 |
23 Mar 20 |
nicklas |
// X>del8 |
5872 |
23 Mar 20 |
nicklas |
797 |
alt = "del" + (ref.length()-1); |
5872 |
23 Mar 20 |
nicklas |
798 |
ref = ref.substring(0, 1); |
5872 |
23 Mar 20 |
nicklas |
799 |
} |
5872 |
23 Mar 20 |
nicklas |
800 |
else if (alt.length() > 8) |
5872 |
23 Mar 20 |
nicklas |
801 |
{ |
5872 |
23 Mar 20 |
nicklas |
// X>ins8 |
5872 |
23 Mar 20 |
nicklas |
803 |
alt = "ins" + (alt.length()-1); |
5872 |
23 Mar 20 |
nicklas |
804 |
} |
5872 |
23 Mar 20 |
nicklas |
805 |
return snp.getChromosome()+":"+snp.getPosition()+":"+ref+">"+alt; |
5872 |
23 Mar 20 |
nicklas |
806 |
} |
5872 |
23 Mar 20 |
nicklas |
807 |
|
5872 |
23 Mar 20 |
nicklas |
808 |
@Override |
5872 |
23 Mar 20 |
nicklas |
809 |
public SnpData parseString(String s) |
5872 |
23 Mar 20 |
nicklas |
810 |
{ |
5872 |
23 Mar 20 |
nicklas |
811 |
throw new UnsupportedOperationException("parseString: " + s); |
5872 |
23 Mar 20 |
nicklas |
812 |
} |
5872 |
23 Mar 20 |
nicklas |
813 |
|
5872 |
23 Mar 20 |
nicklas |
814 |
} |
5684 |
25 Oct 19 |
nicklas |
815 |
} |