5873 |
23 Mar 20 |
nicklas |
1 |
package net.sf.basedb.reggie.plugins.release; |
5873 |
23 Mar 20 |
nicklas |
2 |
|
5880 |
25 Mar 20 |
nicklas |
3 |
import java.util.ArrayList; |
5873 |
23 Mar 20 |
nicklas |
4 |
import java.util.Arrays; |
5873 |
23 Mar 20 |
nicklas |
5 |
import java.util.List; |
6387 |
15 Sep 21 |
nicklas |
6 |
import java.util.regex.Pattern; |
5873 |
23 Mar 20 |
nicklas |
7 |
|
5873 |
23 Mar 20 |
nicklas |
8 |
import org.json.simple.JSONArray; |
5873 |
23 Mar 20 |
nicklas |
9 |
import org.json.simple.JSONObject; |
5873 |
23 Mar 20 |
nicklas |
10 |
|
5880 |
25 Mar 20 |
nicklas |
11 |
import net.sf.basedb.core.AnnotationType; |
5873 |
23 Mar 20 |
nicklas |
12 |
import net.sf.basedb.core.DbControl; |
5873 |
23 Mar 20 |
nicklas |
13 |
import net.sf.basedb.core.DerivedBioAssay; |
5873 |
23 Mar 20 |
nicklas |
14 |
import net.sf.basedb.core.Extract; |
5873 |
23 Mar 20 |
nicklas |
15 |
import net.sf.basedb.core.File; |
5873 |
23 Mar 20 |
nicklas |
16 |
import net.sf.basedb.core.Item; |
5880 |
25 Mar 20 |
nicklas |
17 |
import net.sf.basedb.core.ItemQuery; |
5873 |
23 Mar 20 |
nicklas |
18 |
import net.sf.basedb.core.Location; |
5873 |
23 Mar 20 |
nicklas |
19 |
import net.sf.basedb.core.RawBioAssay; |
5873 |
23 Mar 20 |
nicklas |
20 |
import net.sf.basedb.core.RawDataType; |
5873 |
23 Mar 20 |
nicklas |
21 |
import net.sf.basedb.core.Type; |
5880 |
25 Mar 20 |
nicklas |
22 |
import net.sf.basedb.core.query.Expressions; |
5880 |
25 Mar 20 |
nicklas |
23 |
import net.sf.basedb.core.query.Hql; |
5880 |
25 Mar 20 |
nicklas |
24 |
import net.sf.basedb.core.query.Orders; |
5880 |
25 Mar 20 |
nicklas |
25 |
import net.sf.basedb.core.query.Restrictions; |
5880 |
25 Mar 20 |
nicklas |
26 |
import net.sf.basedb.reggie.Reggie; |
5873 |
23 Mar 20 |
nicklas |
27 |
import net.sf.basedb.reggie.dao.Annotationtype; |
5873 |
23 Mar 20 |
nicklas |
28 |
import net.sf.basedb.reggie.dao.Datafiletype; |
5873 |
23 Mar 20 |
nicklas |
29 |
import net.sf.basedb.reggie.dao.Rawbioassay; |
5873 |
23 Mar 20 |
nicklas |
30 |
import net.sf.basedb.reggie.dao.Rawdatatype; |
5873 |
23 Mar 20 |
nicklas |
31 |
import net.sf.basedb.reggie.json.FilteredJSONArray; |
6387 |
15 Sep 21 |
nicklas |
32 |
import net.sf.basedb.util.filter.AllOfFilter; |
5873 |
23 Mar 20 |
nicklas |
33 |
import net.sf.basedb.util.filter.Filter; |
5873 |
23 Mar 20 |
nicklas |
34 |
import net.sf.basedb.util.filter.NotNullFilter; |
6387 |
15 Sep 21 |
nicklas |
35 |
import net.sf.basedb.util.filter.OneOfFilter; |
5873 |
23 Mar 20 |
nicklas |
36 |
|
5873 |
23 Mar 20 |
nicklas |
37 |
/** |
5873 |
23 Mar 20 |
nicklas |
Cohort writer implementation for VariantCalling data. |
5873 |
23 Mar 20 |
nicklas |
@since 4.26 |
5873 |
23 Mar 20 |
nicklas |
40 |
*/ |
5873 |
23 Mar 20 |
nicklas |
41 |
public class VariantCallingWriter |
5873 |
23 Mar 20 |
nicklas |
42 |
extends RawBioAssayWriter |
5873 |
23 Mar 20 |
nicklas |
43 |
{ |
5873 |
23 Mar 20 |
nicklas |
44 |
|
5873 |
23 Mar 20 |
nicklas |
45 |
private final ScriptWriter scriptWriter; |
5873 |
23 Mar 20 |
nicklas |
46 |
private final Filter<File> fileFilterVariantCall; |
5880 |
25 Mar 20 |
nicklas |
47 |
private final List<Annotationtype> vcallTypes; |
5880 |
25 Mar 20 |
nicklas |
48 |
|
5873 |
23 Mar 20 |
nicklas |
49 |
public VariantCallingWriter(DbControl dc, ReleaseWriterOptions options, ScriptWriter scriptWriter) |
5873 |
23 Mar 20 |
nicklas |
50 |
{ |
5873 |
23 Mar 20 |
nicklas |
51 |
super(dc, options); |
5873 |
23 Mar 20 |
nicklas |
52 |
this.scriptWriter = scriptWriter; |
6387 |
15 Sep 21 |
nicklas |
53 |
this.fileFilterVariantCall = |
6387 |
15 Sep 21 |
nicklas |
54 |
new AllOfFilter<>(Arrays.asList( |
6387 |
15 Sep 21 |
nicklas |
55 |
new FileLocationFilter(Location.EXTERNAL, true), |
6387 |
15 Sep 21 |
nicklas |
56 |
new OneOfFilter<>(Arrays.asList( |
6387 |
15 Sep 21 |
nicklas |
57 |
new FileListFilter("variants-filtered.vcf", "variants-annotated.vcf.gz"), |
6387 |
15 Sep 21 |
nicklas |
58 |
new RegexpFileFilter(Pattern.compile("genotype_.*\\.vcf")) |
5873 |
23 Mar 20 |
nicklas |
59 |
)) |
6387 |
15 Sep 21 |
nicklas |
60 |
)); |
5880 |
25 Mar 20 |
nicklas |
61 |
|
5880 |
25 Mar 20 |
nicklas |
// Load all "VCALL" annotation types |
5880 |
25 Mar 20 |
nicklas |
63 |
ItemQuery<AnnotationType> query = AnnotationType.getQuery(Item.RAWBIOASSAY); |
5880 |
25 Mar 20 |
nicklas |
64 |
query.setIncludes(Reggie.INCLUDE_IN_CURRENT_PROJECT); |
5880 |
25 Mar 20 |
nicklas |
65 |
query.join(Hql.innerJoin("categories", "cat")); |
5880 |
25 Mar 20 |
nicklas |
66 |
query.restrict(Restrictions.eq(Hql.property("cat", "name"), Expressions.string("VCALL_Release"))); |
5880 |
25 Mar 20 |
nicklas |
67 |
query.order(Orders.asc(Hql.property("name"))); |
5880 |
25 Mar 20 |
nicklas |
68 |
List<AnnotationType> tmp = query.list(dc); |
5880 |
25 Mar 20 |
nicklas |
69 |
|
5880 |
25 Mar 20 |
nicklas |
70 |
vcallTypes = new ArrayList<>(tmp.size()); |
5880 |
25 Mar 20 |
nicklas |
71 |
AnnotationType passedFilter = Annotationtype.VARIANTS_PASSED_FILTER.get(dc); |
5880 |
25 Mar 20 |
nicklas |
72 |
for (AnnotationType at : tmp) |
5880 |
25 Mar 20 |
nicklas |
73 |
{ |
5880 |
25 Mar 20 |
nicklas |
74 |
if (!at.equals(passedFilter)) |
5880 |
25 Mar 20 |
nicklas |
75 |
{ |
5880 |
25 Mar 20 |
nicklas |
76 |
vcallTypes.add(Annotationtype.get(at)); |
5880 |
25 Mar 20 |
nicklas |
77 |
} |
5880 |
25 Mar 20 |
nicklas |
78 |
} |
5873 |
23 Mar 20 |
nicklas |
79 |
} |
5873 |
23 Mar 20 |
nicklas |
80 |
|
5873 |
23 Mar 20 |
nicklas |
81 |
@Override |
5873 |
23 Mar 20 |
nicklas |
82 |
public JSONArray toJSONObjects(CohortItem item) |
5873 |
23 Mar 20 |
nicklas |
83 |
{ |
5873 |
23 Mar 20 |
nicklas |
84 |
List<Rawbioassay> variantCalls = item.getVariantCallBioAssays(); |
5873 |
23 Mar 20 |
nicklas |
85 |
if (variantCalls.size() == 0) return null; |
5873 |
23 Mar 20 |
nicklas |
86 |
DbControl dc = item.getDbControl(); |
5873 |
23 Mar 20 |
nicklas |
87 |
|
5873 |
23 Mar 20 |
nicklas |
88 |
JSONArray json = new JSONArray(); |
5873 |
23 Mar 20 |
nicklas |
89 |
for (Rawbioassay raw : variantCalls) |
5873 |
23 Mar 20 |
nicklas |
90 |
{ |
5873 |
23 Mar 20 |
nicklas |
91 |
RawBioAssay rba = raw.getItem(); |
5873 |
23 Mar 20 |
nicklas |
92 |
RawDataType rawType = rba.getRawDataType(); |
5873 |
23 Mar 20 |
nicklas |
93 |
Extract lib = rba.getParentExtract(); |
5873 |
23 Mar 20 |
nicklas |
94 |
|
5873 |
23 Mar 20 |
nicklas |
95 |
DerivedBioAssay aligned = item.getParentBioAssay(rba.getName()); |
5873 |
23 Mar 20 |
nicklas |
96 |
DerivedBioAssay masked = item.getParentBioAssay(aligned.getName()); |
5873 |
23 Mar 20 |
nicklas |
97 |
DerivedBioAssay merged = item.getParentBioAssay(masked.getName()); |
5873 |
23 Mar 20 |
nicklas |
98 |
|
5873 |
23 Mar 20 |
nicklas |
99 |
JSONObject jsonRaw = new JSONObject(); |
5873 |
23 Mar 20 |
nicklas |
100 |
jsonRaw.put("name", item.toReleaseId(raw.getName())); |
5873 |
23 Mar 20 |
nicklas |
101 |
jsonRaw.put("type", rba.getType().name()); |
5873 |
23 Mar 20 |
nicklas |
102 |
jsonRaw.put("rawdatatype", rawType.getId()); |
5873 |
23 Mar 20 |
nicklas |
103 |
jsonRaw.put("platform", Rawdatatype.VARIANT_CALL.getPlatformId()); |
5873 |
23 Mar 20 |
nicklas |
104 |
jsonRaw.put("platformVariant", Rawdatatype.VARIANT_CALL.getVariantId()); |
5873 |
23 Mar 20 |
nicklas |
105 |
jsonRaw.put("parent", item.toReleaseId(merged.getName())); |
5873 |
23 Mar 20 |
nicklas |
106 |
jsonRaw.put("extract", item.toReleaseId(lib.getName())); |
5873 |
23 Mar 20 |
nicklas |
107 |
|
5873 |
23 Mar 20 |
nicklas |
108 |
JSONArray jsonAnnotations = new FilteredJSONArray(new NotNullFilter<>(false)); |
5873 |
23 Mar 20 |
nicklas |
109 |
jsonRaw.put("annotations", jsonAnnotations); |
5873 |
23 Mar 20 |
nicklas |
110 |
|
5873 |
23 Mar 20 |
nicklas |
// From the rawbioassay item |
5873 |
23 Mar 20 |
nicklas |
112 |
jsonAnnotations.add(item.createAnnotationJSON("FeatureSoftware", getName(rba.getSoftware()))); |
5873 |
23 Mar 20 |
nicklas |
113 |
jsonAnnotations.add(item.getAnnotationJSON(Annotationtype.PIPELINE, rba, null)); |
5873 |
23 Mar 20 |
nicklas |
114 |
jsonAnnotations.add(item.getAnnotationJSON(Annotationtype.VARIANTS_PASSED_FILTER, rba, null)); |
5873 |
23 Mar 20 |
nicklas |
115 |
|
5880 |
25 Mar 20 |
nicklas |
116 |
for (Annotationtype at : vcallTypes) |
5873 |
23 Mar 20 |
nicklas |
117 |
{ |
5880 |
25 Mar 20 |
nicklas |
118 |
jsonAnnotations.add(item.getAnnotationJSON(at, rba, null)); |
5873 |
23 Mar 20 |
nicklas |
119 |
} |
5873 |
23 Mar 20 |
nicklas |
120 |
|
5873 |
23 Mar 20 |
nicklas |
// From the AlignedSequences item |
5875 |
24 Mar 20 |
nicklas |
122 |
jsonAnnotations.add(item.createAnnotationJSON("AlignmentName", item.toReleaseId(aligned.getName()))); |
5873 |
23 Mar 20 |
nicklas |
123 |
jsonAnnotations.add(item.getAnnotationJSON(Annotationtype.VARIANTS_RAW, aligned, null)); |
5873 |
23 Mar 20 |
nicklas |
124 |
jsonAnnotations.add(item.getAnnotationJSON(Annotationtype.CALLABLE_BASES, aligned, null)); |
5873 |
23 Mar 20 |
nicklas |
125 |
|
5873 |
23 Mar 20 |
nicklas |
// Folders and files |
5873 |
23 Mar 20 |
nicklas |
127 |
String dataFilesFolder = (String)item.getAnnotationValue(Annotationtype.DATA_FILES_FOLDER, rba); |
5873 |
23 Mar 20 |
nicklas |
128 |
String releaseDataFilesFolder = item.toReleaseFolder(dataFilesFolder); |
5873 |
23 Mar 20 |
nicklas |
129 |
JSONObject jsonDataFilesFolder = item.createAnnotationJSON(Annotationtype.DATA_FILES_FOLDER.getName(), releaseDataFilesFolder); |
5873 |
23 Mar 20 |
nicklas |
130 |
jsonAnnotations.add(jsonDataFilesFolder); |
5873 |
23 Mar 20 |
nicklas |
131 |
|
5873 |
23 Mar 20 |
nicklas |
132 |
JSONArray jsonFiles = new FilteredJSONArray(new NotNullFilter<>(false)); |
5873 |
23 Mar 20 |
nicklas |
133 |
jsonFiles.addAll(item.getDataFilesJSON(Datafiletype.VCF, rba, fileFilterVariantCall, false)); |
5873 |
23 Mar 20 |
nicklas |
134 |
jsonFiles.addAll(item.getLinkedFilesJSON(rba, fileFilterVariantCall, false)); |
5873 |
23 Mar 20 |
nicklas |
135 |
jsonRaw.put("files", jsonFiles); |
5873 |
23 Mar 20 |
nicklas |
136 |
|
5873 |
23 Mar 20 |
nicklas |
137 |
if (scriptWriter != null) |
5873 |
23 Mar 20 |
nicklas |
138 |
{ |
5873 |
23 Mar 20 |
nicklas |
139 |
scriptWriter.addFiles(releaseDataFilesFolder, dataFilesFolder, "variantcall.lst", jsonFiles); |
5873 |
23 Mar 20 |
nicklas |
140 |
} |
5873 |
23 Mar 20 |
nicklas |
141 |
|
5873 |
23 Mar 20 |
nicklas |
142 |
json.add(jsonRaw); |
5873 |
23 Mar 20 |
nicklas |
143 |
} |
5873 |
23 Mar 20 |
nicklas |
144 |
return json; |
5873 |
23 Mar 20 |
nicklas |
145 |
} |
5873 |
23 Mar 20 |
nicklas |
146 |
|
5873 |
23 Mar 20 |
nicklas |
147 |
@Override |
5873 |
23 Mar 20 |
nicklas |
148 |
public List<CohortTypeDef> getTypeDefsInJSON() |
5873 |
23 Mar 20 |
nicklas |
149 |
{ |
5873 |
23 Mar 20 |
nicklas |
150 |
DbControl dc = getDbControl(); |
5873 |
23 Mar 20 |
nicklas |
151 |
|
5873 |
23 Mar 20 |
nicklas |
152 |
CohortTypeDefFactory rawFactory = new CohortTypeDefFactory(dc, Item.RAWBIOASSAY, "VariantCall"); |
5873 |
23 Mar 20 |
nicklas |
153 |
|
5873 |
23 Mar 20 |
nicklas |
// From the AlignedSequences item |
5875 |
24 Mar 20 |
nicklas |
155 |
rawFactory.createAnnotationType("AlignmentName", Type.STRING); |
5873 |
23 Mar 20 |
nicklas |
156 |
rawFactory.createAnnotationType(Annotationtype.VARIANTS_RAW); |
5873 |
23 Mar 20 |
nicklas |
157 |
rawFactory.createAnnotationType(Annotationtype.CALLABLE_BASES); |
5873 |
23 Mar 20 |
nicklas |
158 |
|
5873 |
23 Mar 20 |
nicklas |
// From the rawbioassay item |
5873 |
23 Mar 20 |
nicklas |
160 |
rawFactory.createAnnotationType("FeatureSoftware", Type.STRING); |
5873 |
23 Mar 20 |
nicklas |
161 |
rawFactory.createAnnotationType(Annotationtype.PIPELINE); |
5873 |
23 Mar 20 |
nicklas |
162 |
rawFactory.createAnnotationType(Annotationtype.VARIANTS_PASSED_FILTER); |
5880 |
25 Mar 20 |
nicklas |
163 |
rawFactory.createAnnotationType(Annotationtype.DATA_FILES_FOLDER).setProjectSpecificValues(true); |
7014 |
26 Jan 23 |
nicklas |
164 |
rawFactory.createFileType(Datafiletype.VCF, Rawdatatype.VARIANT_CALL); |
5880 |
25 Mar 20 |
nicklas |
165 |
|
5880 |
25 Mar 20 |
nicklas |
166 |
for (Annotationtype at : vcallTypes) |
5873 |
23 Mar 20 |
nicklas |
167 |
{ |
5880 |
25 Mar 20 |
nicklas |
168 |
CohortAnnotationType cat = rawFactory.createAnnotationType(at); |
5873 |
23 Mar 20 |
nicklas |
169 |
} |
5873 |
23 Mar 20 |
nicklas |
170 |
|
5873 |
23 Mar 20 |
nicklas |
171 |
return rawFactory.allCreated(); |
5873 |
23 Mar 20 |
nicklas |
172 |
} |
5873 |
23 Mar 20 |
nicklas |
173 |
|
5873 |
23 Mar 20 |
nicklas |
174 |
} |