7406 |
08 Nov 23 |
nicklas |
#!/bin/bash |
7406 |
08 Nov 23 |
nicklas |
2 |
## |
7406 |
08 Nov 23 |
nicklas |
## Pipeline script for WGS VariantCall analysis of panel-of-normals. |
7406 |
08 Nov 23 |
nicklas |
4 |
## |
7406 |
08 Nov 23 |
nicklas |
5 |
## |
7406 |
08 Nov 23 |
nicklas |
## Environment variables that should be defined (in job.sh) before calling this script |
7406 |
08 Nov 23 |
nicklas |
## -BamFolder: Path to folder where the alignment.bam is found |
7406 |
08 Nov 23 |
nicklas |
## -BamName: Base part of alignment BAM file (eg. alignment) |
7406 |
08 Nov 23 |
nicklas |
## -PonFolder: Folder for output of VCF files. Will be created or cleared from existing files. |
7406 |
08 Nov 23 |
nicklas |
## -GATK: Path to the gatk program |
7406 |
08 Nov 23 |
nicklas |
## -REF_FASTA: Path to the reference FASTA file |
7439 |
16 Nov 23 |
nicklas |
## -PON: Path to the panel-of-normal VCF file |
7406 |
08 Nov 23 |
nicklas |
## -TMPDIR: Path to a local temporary working directory |
7406 |
08 Nov 23 |
nicklas |
## -BaseRecalibratorOptions: Options for the BaseRecalibratorSpark step |
7439 |
16 Nov 23 |
nicklas |
## -ApplyBQSROptions: Options for the ApplyBQSRSpark step (optional) |
7439 |
16 Nov 23 |
nicklas |
## -Mutect2Options: Options for the Mutect2 step (optional) |
7439 |
16 Nov 23 |
nicklas |
## -GetPileupSummariesOptions: Options for the GetPileupSummaries step (optional) |
7439 |
16 Nov 23 |
nicklas |
## -CalculateContaminationOptions: Options for the CalculateContamination step (optional) |
7439 |
16 Nov 23 |
nicklas |
## -LearnReadOrientationModelOptions: Options for the LearnReadOrientationModel step (optional) |
7439 |
16 Nov 23 |
nicklas |
## -FilterMutectCallsOptions: Options for the FilterMutectCalls step (optional) |
7439 |
16 Nov 23 |
nicklas |
## -MergeVcfsOptions: Options for the MergeVcfs step (optional) |
7439 |
16 Nov 23 |
nicklas |
## -VcfannoOptions: Options for the Vcfanno step (optional) |
7439 |
16 Nov 23 |
nicklas |
## -SnpEffOptions: Options for the SnpEff step (optional) |
7439 |
16 Nov 23 |
nicklas |
## -FuncotatorOptions: Options for the Funcotator step (optional; if not set the Funcotator step is skipped) |
7439 |
16 Nov 23 |
nicklas |
## -CommonGATKOptions: Other options that are added to all GATK steps (optional) |
7406 |
08 Nov 23 |
nicklas |
26 |
## |
7406 |
08 Nov 23 |
nicklas |
27 |
|
7406 |
08 Nov 23 |
nicklas |
28 |
set -eo pipefail |
7406 |
08 Nov 23 |
nicklas |
29 |
|
7406 |
08 Nov 23 |
nicklas |
## Import utility functions |
7406 |
08 Nov 23 |
nicklas |
31 |
source ./reggie-utils.sh |
7406 |
08 Nov 23 |
nicklas |
32 |
|
7406 |
08 Nov 23 |
nicklas |
## Verify settings in options |
7406 |
08 Nov 23 |
nicklas |
34 |
rg_var_isdir "TumorFolder" "NormalFolder" "TMPDIR" "WD" |
7406 |
08 Nov 23 |
nicklas |
35 |
rg_var_isset "TumorBam" "NormalBam" "VcallFolder" "TumorSampleName" "NormalSampleName" |
7439 |
16 Nov 23 |
nicklas |
36 |
rg_var_isfile "GATK" "REF_FASTA" "PON" |
7406 |
08 Nov 23 |
nicklas |
37 |
rg_file_exists "${TumorFolder}/${TumorBam}.bam" "${TumorFolder}/${TumorBam}.bai" |
7406 |
08 Nov 23 |
nicklas |
38 |
rg_file_exists "${NormalFolder}/${NormalBam}.bam" "${NormalFolder}/${NormalBam}.bai" |
7406 |
08 Nov 23 |
nicklas |
39 |
|
7406 |
08 Nov 23 |
nicklas |
40 |
base_recalibrate() |
7406 |
08 Nov 23 |
nicklas |
41 |
{ |
7406 |
08 Nov 23 |
nicklas |
## 'tumor' or 'normal' |
7406 |
08 Nov 23 |
nicklas |
43 |
local tn=$1 |
7406 |
08 Nov 23 |
nicklas |
44 |
|
7406 |
08 Nov 23 |
nicklas |
45 |
${WD}/stdwrap.sh ${GATK} BaseRecalibratorSpark \ |
7406 |
08 Nov 23 |
nicklas |
46 |
-R ${REF_FASTA} \ |
7406 |
08 Nov 23 |
nicklas |
47 |
-I bam/${tn}.bam \ |
7406 |
08 Nov 23 |
nicklas |
48 |
-O recal/${tn}.table \ |
7406 |
08 Nov 23 |
nicklas |
49 |
${BaseRecalibratorOptions} \ |
7409 |
08 Nov 23 |
nicklas |
50 |
${CommonGATKOptions} \ |
7406 |
08 Nov 23 |
nicklas |
51 |
> recalibrate_${tn}.out |
7406 |
08 Nov 23 |
nicklas |
52 |
} |
7406 |
08 Nov 23 |
nicklas |
53 |
|
7406 |
08 Nov 23 |
nicklas |
54 |
apply_bqsr() |
7406 |
08 Nov 23 |
nicklas |
55 |
{ |
7406 |
08 Nov 23 |
nicklas |
## 'tumor' or 'normal' |
7406 |
08 Nov 23 |
nicklas |
57 |
local tn=$1 |
7406 |
08 Nov 23 |
nicklas |
58 |
|
7406 |
08 Nov 23 |
nicklas |
59 |
${WD}/stdwrap.sh ${GATK} --java-options "-Dsamjdk.compression_level=6" ApplyBQSRSpark \ |
7406 |
08 Nov 23 |
nicklas |
60 |
-R ${REF_FASTA} \ |
7406 |
08 Nov 23 |
nicklas |
61 |
-I bam/${tn}.bam \ |
7406 |
08 Nov 23 |
nicklas |
62 |
--bqsr-recal-file recal/${tn}.table \ |
7406 |
08 Nov 23 |
nicklas |
63 |
-O recal/${tn}.bam \ |
7406 |
08 Nov 23 |
nicklas |
64 |
${ApplyBQSROptions} \ |
7409 |
08 Nov 23 |
nicklas |
65 |
${CommonGATKOptions} \ |
7406 |
08 Nov 23 |
nicklas |
66 |
> apply_bqsr_${tn}.out |
7406 |
08 Nov 23 |
nicklas |
67 |
} |
7406 |
08 Nov 23 |
nicklas |
68 |
|
7407 |
08 Nov 23 |
nicklas |
69 |
calculate_contamination() |
7407 |
08 Nov 23 |
nicklas |
70 |
{ |
7407 |
08 Nov 23 |
nicklas |
71 |
if [ ! -f "done/pileup_tumor.done" ]; then |
7407 |
08 Nov 23 |
nicklas |
72 |
${WD}/stdwrap.sh ${GATK} GetPileupSummaries \ |
7407 |
08 Nov 23 |
nicklas |
73 |
-I recal/tumor.bam \ |
7409 |
08 Nov 23 |
nicklas |
74 |
-O pileup/tumor.table \ |
7407 |
08 Nov 23 |
nicklas |
75 |
${GetPileupSummariesOptions} \ |
7409 |
08 Nov 23 |
nicklas |
76 |
${CommonGATKOptions} \ |
7407 |
08 Nov 23 |
nicklas |
77 |
> pileup_tumor.out |
7407 |
08 Nov 23 |
nicklas |
78 |
touch "done/pileup_tumor.done" |
7407 |
08 Nov 23 |
nicklas |
79 |
fi |
7407 |
08 Nov 23 |
nicklas |
80 |
|
7407 |
08 Nov 23 |
nicklas |
81 |
if [ ! -f "done/pileup_normal.done" ]; then |
7407 |
08 Nov 23 |
nicklas |
82 |
${WD}/stdwrap.sh ${GATK} GetPileupSummaries \ |
7407 |
08 Nov 23 |
nicklas |
83 |
-I recal/normal.bam \ |
7409 |
08 Nov 23 |
nicklas |
84 |
-O pileup/normal.table \ |
7407 |
08 Nov 23 |
nicklas |
85 |
${GetPileupSummariesOptions} \ |
7409 |
08 Nov 23 |
nicklas |
86 |
${CommonGATKOptions} \ |
7407 |
08 Nov 23 |
nicklas |
87 |
> pileup_normal.out |
7407 |
08 Nov 23 |
nicklas |
88 |
touch "done/pileup_normal.done" |
7407 |
08 Nov 23 |
nicklas |
89 |
fi |
7407 |
08 Nov 23 |
nicklas |
90 |
|
7407 |
08 Nov 23 |
nicklas |
91 |
if [ ! -f "done/contamination.done" ]; then |
7407 |
08 Nov 23 |
nicklas |
92 |
${WD}/stdwrap.sh ${GATK} CalculateContamination \ |
7407 |
08 Nov 23 |
nicklas |
93 |
-I pileup/tumor.table \ |
7407 |
08 Nov 23 |
nicklas |
94 |
-matched pileup/normal.table \ |
7408 |
08 Nov 23 |
nicklas |
95 |
-O results/contamination.table \ |
7408 |
08 Nov 23 |
nicklas |
96 |
--tumor-segmentation results/segments.table \ |
7407 |
08 Nov 23 |
nicklas |
97 |
${CalculateContaminationOptions} \ |
7409 |
08 Nov 23 |
nicklas |
98 |
${CommonGATKOptions} \ |
7407 |
08 Nov 23 |
nicklas |
99 |
> contamination.out |
7407 |
08 Nov 23 |
nicklas |
100 |
|
7407 |
08 Nov 23 |
nicklas |
101 |
touch "done/contamination.done" |
7407 |
08 Nov 23 |
nicklas |
102 |
fi |
7407 |
08 Nov 23 |
nicklas |
103 |
} |
7407 |
08 Nov 23 |
nicklas |
104 |
|
7406 |
08 Nov 23 |
nicklas |
105 |
mutect2() |
7406 |
08 Nov 23 |
nicklas |
106 |
{ |
7406 |
08 Nov 23 |
nicklas |
107 |
local CHR=$1 |
7406 |
08 Nov 23 |
nicklas |
108 |
local VCF_OUT=vcf/${CHR}.vcf.gz |
7406 |
08 Nov 23 |
nicklas |
109 |
local F1R2_OUT=f1r2/${CHR}.tar.gz |
7406 |
08 Nov 23 |
nicklas |
110 |
|
7406 |
08 Nov 23 |
nicklas |
111 |
if [ ! -f "done/mutect2_${CHR}.done" ]; then |
7406 |
08 Nov 23 |
nicklas |
112 |
${WD}/stdwrap.sh ${GATK} Mutect2 \ |
7406 |
08 Nov 23 |
nicklas |
113 |
-R ${REF_FASTA} \ |
7406 |
08 Nov 23 |
nicklas |
114 |
-I recal/tumor.bam \ |
7406 |
08 Nov 23 |
nicklas |
115 |
-I recal/normal.bam \ |
7406 |
08 Nov 23 |
nicklas |
116 |
-normal ${NormalSampleName} \ |
7406 |
08 Nov 23 |
nicklas |
117 |
--f1r2-tar-gz ${F1R2_OUT} \ |
7406 |
08 Nov 23 |
nicklas |
118 |
-L ${CHR} \ |
7406 |
08 Nov 23 |
nicklas |
119 |
${Mutect2Options} \ |
7409 |
08 Nov 23 |
nicklas |
120 |
${CommonGATKOptions} \ |
7406 |
08 Nov 23 |
nicklas |
121 |
-O ${VCF_OUT} \ |
7406 |
08 Nov 23 |
nicklas |
122 |
> mutect2_${CHR}.out |
7406 |
08 Nov 23 |
nicklas |
123 |
echo "${CHR}.variants: `zcat ${VCF_OUT} | grep -c -v '^#'`" >> ${WD}/stats.out |
7406 |
08 Nov 23 |
nicklas |
124 |
echo "${CHR}.callable: `grep 'callable' ${VCF_OUT}.stats | cut -f 2`" >> ${WD}/stats.out |
7406 |
08 Nov 23 |
nicklas |
125 |
touch "done/mutect2_${CHR}.done" |
7406 |
08 Nov 23 |
nicklas |
126 |
fi |
7406 |
08 Nov 23 |
nicklas |
127 |
} |
7406 |
08 Nov 23 |
nicklas |
128 |
|
7406 |
08 Nov 23 |
nicklas |
## Move to the temporary working directory |
7406 |
08 Nov 23 |
nicklas |
130 |
cd ${TMPDIR} |
7406 |
08 Nov 23 |
nicklas |
131 |
mkdir -p bam |
7406 |
08 Nov 23 |
nicklas |
132 |
mkdir -p recal |
7407 |
08 Nov 23 |
nicklas |
133 |
mkdir -p pileup |
7406 |
08 Nov 23 |
nicklas |
134 |
mkdir -p f1r2 |
7406 |
08 Nov 23 |
nicklas |
135 |
mkdir -p vcf |
7406 |
08 Nov 23 |
nicklas |
136 |
mkdir -p done |
7408 |
08 Nov 23 |
nicklas |
137 |
mkdir -p results |
7406 |
08 Nov 23 |
nicklas |
138 |
|
7406 |
08 Nov 23 |
nicklas |
## Copy BAM file to local working directory |
7406 |
08 Nov 23 |
nicklas |
140 |
if [ ! -f "done/copy.done" ]; then |
7406 |
08 Nov 23 |
nicklas |
141 |
rg_progress 2 "Copying tumor BAM file" |
7406 |
08 Nov 23 |
nicklas |
142 |
cp ${TumorFolder}/${TumorBam}.bam bam/tumor.bam |
7406 |
08 Nov 23 |
nicklas |
143 |
cp ${TumorFolder}/${TumorBam}.bai bam/tumor.bai |
7406 |
08 Nov 23 |
nicklas |
144 |
rg_progress 4 "Copying normal BAM file" |
7406 |
08 Nov 23 |
nicklas |
145 |
cp ${NormalFolder}/${NormalBam}.bam bam/normal.bam |
7406 |
08 Nov 23 |
nicklas |
146 |
cp ${NormalFolder}/${NormalBam}.bai bam/normal.bai |
7406 |
08 Nov 23 |
nicklas |
147 |
touch "done/copy.done" |
7406 |
08 Nov 23 |
nicklas |
148 |
fi |
7406 |
08 Nov 23 |
nicklas |
149 |
|
7406 |
08 Nov 23 |
nicklas |
150 |
if [ ! -f "done/base_recalibrate_tumor.done" ]; then |
7406 |
08 Nov 23 |
nicklas |
151 |
rg_progress 5 "Base Recalibration (tumor): step 1 of 2 (${NumThreads} threads)" |
7406 |
08 Nov 23 |
nicklas |
152 |
base_recalibrate "tumor" |
7406 |
08 Nov 23 |
nicklas |
153 |
touch "done/base_recalibrate_tumor.done" |
7406 |
08 Nov 23 |
nicklas |
154 |
fi |
7406 |
08 Nov 23 |
nicklas |
155 |
|
7406 |
08 Nov 23 |
nicklas |
156 |
if [ ! -f "done/apply_bqsr_tumor.done" ]; then |
7406 |
08 Nov 23 |
nicklas |
157 |
rg_progress 6 "Base Recalibration (tumor): step 2 of 2 (${NumThreads} threads)" |
7406 |
08 Nov 23 |
nicklas |
158 |
apply_bqsr "tumor" |
7406 |
08 Nov 23 |
nicklas |
159 |
touch "done/apply_bqsr_tumor.done" |
7406 |
08 Nov 23 |
nicklas |
160 |
fi |
7406 |
08 Nov 23 |
nicklas |
161 |
|
7406 |
08 Nov 23 |
nicklas |
162 |
if [ ! -f "done/base_recalibrate_normal.done" ]; then |
7406 |
08 Nov 23 |
nicklas |
163 |
rg_progress 8 "Base Recalibration (normal): step 1 of 2 (${NumThreads} threads)" |
7406 |
08 Nov 23 |
nicklas |
164 |
base_recalibrate "normal" |
7406 |
08 Nov 23 |
nicklas |
165 |
touch "done/base_recalibrate_normal.done" |
7406 |
08 Nov 23 |
nicklas |
166 |
fi |
7406 |
08 Nov 23 |
nicklas |
167 |
|
7406 |
08 Nov 23 |
nicklas |
168 |
if [ ! -f "done/apply_bqsr_normal.done" ]; then |
7406 |
08 Nov 23 |
nicklas |
169 |
rg_progress 9 "Base Recalibration (normal): step 2 of 2 (${NumThreads} threads)" |
7406 |
08 Nov 23 |
nicklas |
170 |
apply_bqsr "normal" |
7406 |
08 Nov 23 |
nicklas |
171 |
touch "done/apply_bqsr_normal.done" |
7406 |
08 Nov 23 |
nicklas |
172 |
fi |
7406 |
08 Nov 23 |
nicklas |
173 |
|
7407 |
08 Nov 23 |
nicklas |
174 |
if [ ! -f "done/contamination.done" ]; then |
7407 |
08 Nov 23 |
nicklas |
## We start this in the background since it single-threaded and can run |
7407 |
08 Nov 23 |
nicklas |
## in parallel with Mutect2 |
7407 |
08 Nov 23 |
nicklas |
177 |
calculate_contamination & |
7407 |
08 Nov 23 |
nicklas |
178 |
CC_PID=$! |
7407 |
08 Nov 23 |
nicklas |
179 |
fi |
7407 |
08 Nov 23 |
nicklas |
180 |
|
7407 |
08 Nov 23 |
nicklas |
## Chromosomes for variant calling; start with X so we don't have to wait for it at the end |
7407 |
08 Nov 23 |
nicklas |
182 |
CHRS=(X 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 Y) |
7407 |
08 Nov 23 |
nicklas |
183 |
|
7406 |
08 Nov 23 |
nicklas |
184 |
if [ ! -f "done/mutect2.done" ]; then |
7406 |
08 Nov 23 |
nicklas |
185 |
CURRENT_STEP=0 |
7406 |
08 Nov 23 |
nicklas |
186 |
COMPLETED_STEPS=0 |
7406 |
08 Nov 23 |
nicklas |
187 |
NUM_STEPS=${#CHRS[@]} |
7406 |
08 Nov 23 |
nicklas |
## Each Mutect2 job can use up to 4 threads efficiently |
7406 |
08 Nov 23 |
nicklas |
## We add '3' to make sure JOBS_LIMIT>=1 |
7406 |
08 Nov 23 |
nicklas |
190 |
JOBS_LIMIT=$(( (${NumThreads}+3)/4 )) |
7406 |
08 Nov 23 |
nicklas |
191 |
JOBS_PIDS=() |
7406 |
08 Nov 23 |
nicklas |
192 |
for chr in "${CHRS[@]}" |
7406 |
08 Nov 23 |
nicklas |
193 |
do |
7406 |
08 Nov 23 |
nicklas |
194 |
if (( ${#JOBS_PIDS[@]} >= ${JOBS_LIMIT} )) |
7406 |
08 Nov 23 |
nicklas |
195 |
then |
7406 |
08 Nov 23 |
nicklas |
## Wait for at least one job to complete; errors will be trapped |
7406 |
08 Nov 23 |
nicklas |
197 |
wait -n -p PID ${!JOBS_PIDS[@]} |
7406 |
08 Nov 23 |
nicklas |
198 |
unset JOBS_PIDS[$PID] |
7406 |
08 Nov 23 |
nicklas |
199 |
COMPLETED_STEPS=$(( ${COMPLETED_STEPS}+1 )) |
7406 |
08 Nov 23 |
nicklas |
200 |
fi |
7406 |
08 Nov 23 |
nicklas |
201 |
|
7406 |
08 Nov 23 |
nicklas |
202 |
mutect2 "chr${chr}" & |
7406 |
08 Nov 23 |
nicklas |
203 |
JOBS_PIDS[$!]="chr${chr}" |
7406 |
08 Nov 23 |
nicklas |
204 |
|
7406 |
08 Nov 23 |
nicklas |
205 |
CURRENT_STEP=$(( ${CURRENT_STEP}+1 )) |
7406 |
08 Nov 23 |
nicklas |
206 |
PROGRESS=$(( 10+(${CURRENT_STEP}+${COMPLETED_STEPS})*35/${NUM_STEPS} )) |
7406 |
08 Nov 23 |
nicklas |
## We want to report the active jobs as comma-separated list |
7406 |
08 Nov 23 |
nicklas |
208 |
ACTIVE_JOBS="${JOBS_PIDS[*]}" |
7406 |
08 Nov 23 |
nicklas |
209 |
rg_progress ${PROGRESS} "Running Mutect2: ${COMPLETED_STEPS} of ${NUM_STEPS} completed, working on ${ACTIVE_JOBS// /, }" |
7406 |
08 Nov 23 |
nicklas |
210 |
|
7406 |
08 Nov 23 |
nicklas |
211 |
done |
7406 |
08 Nov 23 |
nicklas |
212 |
|
7406 |
08 Nov 23 |
nicklas |
## Wait for the remaining jobs to complete |
7406 |
08 Nov 23 |
nicklas |
214 |
while (( ${#JOBS_PIDS[@]} > 0 )) |
7406 |
08 Nov 23 |
nicklas |
215 |
do |
7406 |
08 Nov 23 |
nicklas |
216 |
wait -n -p PID ${!JOBS_PIDS[@]} |
7406 |
08 Nov 23 |
nicklas |
217 |
unset JOBS_PIDS[$PID] |
7406 |
08 Nov 23 |
nicklas |
218 |
|
7406 |
08 Nov 23 |
nicklas |
219 |
COMPLETED_STEPS=$(( ${COMPLETED_STEPS}+1 )) |
7407 |
08 Nov 23 |
nicklas |
220 |
PROGRESS=$(( 10+(${CURRENT_STEP}+${COMPLETED_STEPS})*35/${NUM_STEPS} )) |
7406 |
08 Nov 23 |
nicklas |
221 |
ACTIVE_JOBS="${JOBS_PIDS[*]}" |
7406 |
08 Nov 23 |
nicklas |
222 |
rg_progress ${PROGRESS} "Running Mutect2: ${COMPLETED_STEPS} of ${NUM_STEPS} completed, working on ${ACTIVE_JOBS// /, }" |
7406 |
08 Nov 23 |
nicklas |
223 |
done |
7406 |
08 Nov 23 |
nicklas |
224 |
|
7406 |
08 Nov 23 |
nicklas |
225 |
touch "done/mutect2.done" |
7406 |
08 Nov 23 |
nicklas |
226 |
fi |
7406 |
08 Nov 23 |
nicklas |
227 |
|
7407 |
08 Nov 23 |
nicklas |
228 |
if [ ! -f "done/learnreadorientation.done" ]; then |
7407 |
08 Nov 23 |
nicklas |
229 |
rg_progress 85 "Testing for orientation bias" |
7407 |
08 Nov 23 |
nicklas |
230 |
|
7407 |
08 Nov 23 |
nicklas |
231 |
ALL_F1R2="" |
7407 |
08 Nov 23 |
nicklas |
232 |
for chr in "${CHRS[@]}" |
7407 |
08 Nov 23 |
nicklas |
233 |
do |
7407 |
08 Nov 23 |
nicklas |
234 |
ALL_F1R2="${ALL_F1R2} -I f1r2/chr${chr}.tar.gz" |
7407 |
08 Nov 23 |
nicklas |
235 |
done |
7407 |
08 Nov 23 |
nicklas |
236 |
|
7407 |
08 Nov 23 |
nicklas |
237 |
${WD}/stdwrap.sh ${GATK} LearnReadOrientationModel \ |
7407 |
08 Nov 23 |
nicklas |
238 |
${ALL_F1R2} \ |
7407 |
08 Nov 23 |
nicklas |
239 |
-O f1r2/artifact.priors.tar.gz \ |
7407 |
08 Nov 23 |
nicklas |
240 |
${LearnReadOrientationModelOptions} \ |
7409 |
08 Nov 23 |
nicklas |
241 |
${CommonGATKOptions} \ |
7407 |
08 Nov 23 |
nicklas |
242 |
> learnreadorientationmodel.out |
7407 |
08 Nov 23 |
nicklas |
243 |
|
7407 |
08 Nov 23 |
nicklas |
244 |
touch "done/learnreadorientation.done" |
7407 |
08 Nov 23 |
nicklas |
245 |
fi |
7407 |
08 Nov 23 |
nicklas |
246 |
|
7407 |
08 Nov 23 |
nicklas |
247 |
if [ ! -f "done/contamination.done" ]; then |
7407 |
08 Nov 23 |
nicklas |
## We need to make sure that this step has ended |
7407 |
08 Nov 23 |
nicklas |
249 |
rg_progress 85 "Calculating contamination" |
7407 |
08 Nov 23 |
nicklas |
250 |
wait -n ${CC_PID} |
7407 |
08 Nov 23 |
nicklas |
251 |
touch "done/contamination.done" |
7407 |
08 Nov 23 |
nicklas |
252 |
fi |
7407 |
08 Nov 23 |
nicklas |
253 |
|
7408 |
08 Nov 23 |
nicklas |
254 |
if [ ! -f "done/filter.done" ]; then |
7408 |
08 Nov 23 |
nicklas |
255 |
rg_progress 90 "Filtering the variants" |
7407 |
08 Nov 23 |
nicklas |
256 |
|
7408 |
08 Nov 23 |
nicklas |
## Merge *.vcf and *.stats files |
7408 |
08 Nov 23 |
nicklas |
258 |
ALL_VCF="" |
7408 |
08 Nov 23 |
nicklas |
259 |
ALL_STATS="" |
7408 |
08 Nov 23 |
nicklas |
260 |
for chr in "${CHRS[@]}" |
7408 |
08 Nov 23 |
nicklas |
261 |
do |
7408 |
08 Nov 23 |
nicklas |
262 |
ALL_VCF="${ALL_VCF} -I vcf/chr${chr}.vcf.gz" |
7408 |
08 Nov 23 |
nicklas |
263 |
ALL_STATS="${ALL_STATS} -stats vcf/chr${chr}.vcf.gz.stats" |
7408 |
08 Nov 23 |
nicklas |
264 |
done |
7428 |
14 Nov 23 |
nicklas |
265 |
${WD}/stdwrap.sh ${GATK} MergeVcfs \ |
7428 |
14 Nov 23 |
nicklas |
266 |
${ALL_VCF} \ |
7428 |
14 Nov 23 |
nicklas |
267 |
-O vcf/variants-merged.vcf.gz \ |
7428 |
14 Nov 23 |
nicklas |
268 |
${MergeVcfsOptions} |
7428 |
14 Nov 23 |
nicklas |
269 |
> merge_vcfs.out |
7428 |
14 Nov 23 |
nicklas |
270 |
|
7428 |
14 Nov 23 |
nicklas |
## Get rid of '##contig' entries that we no longer use |
7428 |
14 Nov 23 |
nicklas |
## We assume that the first 24 entries are the same that |
7428 |
14 Nov 23 |
nicklas |
## are listed in the CHRS variable |
7428 |
14 Nov 23 |
nicklas |
274 |
echo "##fileformat=VCFv4.2" > contig.vcf |
7428 |
14 Nov 23 |
nicklas |
275 |
(${BCFTOOLS} view vcf/variants-merged.vcf.gz | grep '##contig' || true) | head -n ${#CHRS[@]} >> contig.vcf |
7428 |
14 Nov 23 |
nicklas |
276 |
echo "#CHROM POS ID REF ALT QUAL FILTER INFO" >> contig.vcf |
7428 |
14 Nov 23 |
nicklas |
277 |
${WD}/stdwrap.sh ${GATK} UpdateVCFSequenceDictionary \ |
7428 |
14 Nov 23 |
nicklas |
278 |
-V vcf/variants-merged.vcf.gz \ |
7428 |
14 Nov 23 |
nicklas |
279 |
-O vcf/variants-raw.vcf.gz \ |
7428 |
14 Nov 23 |
nicklas |
280 |
--source-dictionary contig.vcf \ |
7428 |
14 Nov 23 |
nicklas |
281 |
--replace \ |
7428 |
14 Nov 23 |
nicklas |
282 |
${CommonGATKOptions} \ |
7428 |
14 Nov 23 |
nicklas |
283 |
> remove_contig.out |
7428 |
14 Nov 23 |
nicklas |
284 |
|
7428 |
14 Nov 23 |
nicklas |
285 |
${WD}/stdwrap.sh ${GATK} MergeMutectStats \ |
7428 |
14 Nov 23 |
nicklas |
286 |
${ALL_STATS} \ |
7428 |
14 Nov 23 |
nicklas |
287 |
-O vcf/variants-raw.vcf.gz.stats \ |
7428 |
14 Nov 23 |
nicklas |
288 |
${CommonGATKOptions} \ |
7428 |
14 Nov 23 |
nicklas |
289 |
> merge_stats.out |
7408 |
08 Nov 23 |
nicklas |
290 |
|
7408 |
08 Nov 23 |
nicklas |
291 |
${WD}/stdwrap.sh ${GATK} FilterMutectCalls \ |
7408 |
08 Nov 23 |
nicklas |
292 |
-R ${REF_FASTA} \ |
7416 |
13 Nov 23 |
nicklas |
293 |
-V vcf/variants-raw.vcf.gz \ |
7408 |
08 Nov 23 |
nicklas |
294 |
--contamination-table results/contamination.table \ |
7408 |
08 Nov 23 |
nicklas |
295 |
--tumor-segmentation results/segments.table \ |
7416 |
13 Nov 23 |
nicklas |
296 |
--stats vcf/variants-raw.vcf.gz.stats \ |
7408 |
08 Nov 23 |
nicklas |
297 |
--orientation-bias-artifact-priors f1r2/artifact.priors.tar.gz \ |
7408 |
08 Nov 23 |
nicklas |
298 |
--filtering-stats results/filtering-stats.txt \ |
7416 |
13 Nov 23 |
nicklas |
299 |
-O results/variants-raw.vcf.gz \ |
7409 |
08 Nov 23 |
nicklas |
300 |
${FilterMutectCallsOptions} \ |
7409 |
08 Nov 23 |
nicklas |
301 |
${CommonGATKOptions} \ |
7408 |
08 Nov 23 |
nicklas |
302 |
> filter.out |
7408 |
08 Nov 23 |
nicklas |
303 |
|
7416 |
13 Nov 23 |
nicklas |
## FILTER=PASS |
7416 |
13 Nov 23 |
nicklas |
305 |
${BCFTOOLS} filter -i 'FILTER="PASS"' -Oz results/variants-raw.vcf.gz > vcf/variants-filtered.vcf.gz |
7416 |
13 Nov 23 |
nicklas |
306 |
${BCFTOOLS} index -t vcf/variants-filtered.vcf.gz |
7415 |
13 Nov 23 |
nicklas |
307 |
|
7408 |
08 Nov 23 |
nicklas |
308 |
touch "done/filter.done" |
7408 |
08 Nov 23 |
nicklas |
309 |
fi |
7408 |
08 Nov 23 |
nicklas |
310 |
|
7424 |
14 Nov 23 |
nicklas |
311 |
if [ "${FuncotatorOptions}" ]; then |
7424 |
14 Nov 23 |
nicklas |
312 |
NUM_ANNOTATION_STEPS=3 |
7424 |
14 Nov 23 |
nicklas |
313 |
else |
7424 |
14 Nov 23 |
nicklas |
314 |
NUM_ANNOTATION_STEPS=2 |
7424 |
14 Nov 23 |
nicklas |
315 |
fi |
7424 |
14 Nov 23 |
nicklas |
316 |
|
7424 |
14 Nov 23 |
nicklas |
317 |
if [ ! -f "done/vcfanno.done" ]; then |
7424 |
14 Nov 23 |
nicklas |
318 |
rg_progress 92 "Annotating the variants (vcfanno; step 1 of ${NUM_ANNOTATION_STEPS})" |
7424 |
14 Nov 23 |
nicklas |
319 |
${WD}/stderrwrap.sh ${VCFANNO} \ |
7424 |
14 Nov 23 |
nicklas |
320 |
${VcfannoOptions} \ |
7424 |
14 Nov 23 |
nicklas |
321 |
vcf/variants-filtered.vcf.gz \ |
7424 |
14 Nov 23 |
nicklas |
322 |
3> vcfanno.out \ |
7424 |
14 Nov 23 |
nicklas |
323 |
| bgzip -c > vcf/variants-annotated.vcf.gz |
7424 |
14 Nov 23 |
nicklas |
324 |
touch "done/vcfanno.done" |
7424 |
14 Nov 23 |
nicklas |
325 |
fi |
7424 |
14 Nov 23 |
nicklas |
326 |
|
7424 |
14 Nov 23 |
nicklas |
327 |
if [ ! -f "done/snpeff.done" ]; then |
7424 |
14 Nov 23 |
nicklas |
328 |
rg_progress 95 "Annotating the variants (snpEff; step 2 of ${NUM_ANNOTATION_STEPS})" |
7416 |
13 Nov 23 |
nicklas |
329 |
|
7424 |
14 Nov 23 |
nicklas |
330 |
${WD}/stderrwrap.sh ${JAVA} \ |
7424 |
14 Nov 23 |
nicklas |
331 |
-jar ${SNPEFF} \ |
7424 |
14 Nov 23 |
nicklas |
332 |
${SnpEffOptions} \ |
7424 |
14 Nov 23 |
nicklas |
333 |
vcf/variants-annotated.vcf.gz \ |
7424 |
14 Nov 23 |
nicklas |
334 |
3> snpeff.out \ |
7424 |
14 Nov 23 |
nicklas |
335 |
| bgzip -c > results/variants-somatic.vcf.gz |
7424 |
14 Nov 23 |
nicklas |
336 |
|
7416 |
13 Nov 23 |
nicklas |
337 |
${BCFTOOLS} index -t results/variants-somatic.vcf.gz |
7424 |
14 Nov 23 |
nicklas |
338 |
touch "done/snpeff.done" |
7424 |
14 Nov 23 |
nicklas |
339 |
fi |
7424 |
14 Nov 23 |
nicklas |
340 |
|
7424 |
14 Nov 23 |
nicklas |
## This step is optional and is only performed if the |
7424 |
14 Nov 23 |
nicklas |
## FuncotatorOptions variable is set |
7424 |
14 Nov 23 |
nicklas |
343 |
if [ "${FuncotatorOptions}" ]; then |
7424 |
14 Nov 23 |
nicklas |
344 |
if [ ! -f "done/funcotator.done" ]; then |
7424 |
14 Nov 23 |
nicklas |
345 |
rg_progress 96 "Annotating the variants (Funcotator; step 3 of ${NUM_ANNOTATION_STEPS})" |
7416 |
13 Nov 23 |
nicklas |
346 |
|
7424 |
14 Nov 23 |
nicklas |
347 |
${WD}/stdwrap.sh ${GATK} Funcotator \ |
7424 |
14 Nov 23 |
nicklas |
348 |
-R ${REF_FASTA} \ |
7424 |
14 Nov 23 |
nicklas |
349 |
-V results/variants-somatic.vcf.gz \ |
7424 |
14 Nov 23 |
nicklas |
350 |
-O vcf/variants-funcotator.vcf.gz \ |
7424 |
14 Nov 23 |
nicklas |
351 |
--output-file-format VCF \ |
7424 |
14 Nov 23 |
nicklas |
352 |
${FuncotatorOptions} \ |
7424 |
14 Nov 23 |
nicklas |
353 |
${CommonGATKOptions} \ |
7424 |
14 Nov 23 |
nicklas |
354 |
> funcotator.out |
7424 |
14 Nov 23 |
nicklas |
355 |
|
7424 |
14 Nov 23 |
nicklas |
356 |
\cp vcf/variants-funcotator.vcf.gz results/variants-somatic.vcf.gz |
7424 |
14 Nov 23 |
nicklas |
357 |
${BCFTOOLS} index -t results/variants-somatic.vcf.gz |
7424 |
14 Nov 23 |
nicklas |
358 |
|
7424 |
14 Nov 23 |
nicklas |
359 |
touch "done/funcotator.done" |
7424 |
14 Nov 23 |
nicklas |
360 |
fi |
7416 |
13 Nov 23 |
nicklas |
361 |
fi |
7415 |
13 Nov 23 |
nicklas |
362 |
|
7406 |
08 Nov 23 |
nicklas |
363 |
|
7424 |
14 Nov 23 |
nicklas |
364 |
rg_progress 98 "Copying result files to project archive" |
7424 |
14 Nov 23 |
nicklas |
365 |
|
7408 |
08 Nov 23 |
nicklas |
## Save files to the final location |
7410 |
10 Nov 23 |
nicklas |
367 |
mkdir -p ${VcallFolder} |
7410 |
10 Nov 23 |
nicklas |
368 |
rm -rf ${VcallFolder}/* |
7410 |
10 Nov 23 |
nicklas |
369 |
cp results/* ${VcallFolder} |
7410 |
10 Nov 23 |
nicklas |
370 |
ls -1 ${VcallFolder}/* > ${WD}/files.out |
7416 |
13 Nov 23 |
nicklas |
371 |
echo "Filtered variants: `zcat results/variants-somatic.vcf.gz | bcftools view -H -i 'FILTER=\"PASS\"' | wc -l`" >> ${WD}/stats.out |
7406 |
08 Nov 23 |
nicklas |
372 |
|
7406 |
08 Nov 23 |
nicklas |
373 |
rg_progress 99 "Analysis completed, cleaning up..." |