7388 |
31 Oct 23 |
nicklas |
#!/bin/bash |
7388 |
31 Oct 23 |
nicklas |
2 |
## |
7388 |
31 Oct 23 |
nicklas |
## Pipeline script for WGS VariantCall analysis of panel-of-normals. |
7388 |
31 Oct 23 |
nicklas |
4 |
## |
7388 |
31 Oct 23 |
nicklas |
5 |
## |
7390 |
02 Nov 23 |
nicklas |
## Environment variables that should be defined (in job.sh) before calling this script |
7390 |
02 Nov 23 |
nicklas |
## -BamFolder: Path to folder where the alignment.bam is found |
7390 |
02 Nov 23 |
nicklas |
## -BamName: Base part of alignment BAM file (eg. alignment) |
7390 |
02 Nov 23 |
nicklas |
## -PonFolder: Folder for output of VCF files. Will be created or cleared from existing files. |
7390 |
02 Nov 23 |
nicklas |
## -GATK: Path to the gatk program |
7390 |
02 Nov 23 |
nicklas |
## -REF_FASTA: Path to the reference FASTA file |
7388 |
31 Oct 23 |
nicklas |
## -TMPDIR: Path to a local temporary working directory |
7390 |
02 Nov 23 |
nicklas |
## -BaseRecalibratorOptions: Options for the BaseRecalibratorSpark step |
7390 |
02 Nov 23 |
nicklas |
## -ApplyBQSROptions: Options for the ApplyBQSRSpark step |
7390 |
02 Nov 23 |
nicklas |
## -Mutect2Options: Options for the Mutect2 step |
7388 |
31 Oct 23 |
nicklas |
16 |
## |
7388 |
31 Oct 23 |
nicklas |
17 |
|
7388 |
31 Oct 23 |
nicklas |
18 |
set -eo pipefail |
7388 |
31 Oct 23 |
nicklas |
19 |
|
7388 |
31 Oct 23 |
nicklas |
## Import utility functions |
7388 |
31 Oct 23 |
nicklas |
21 |
source ./reggie-utils.sh |
7388 |
31 Oct 23 |
nicklas |
22 |
|
7388 |
31 Oct 23 |
nicklas |
## Verify settings in options |
7388 |
31 Oct 23 |
nicklas |
24 |
rg_var_isdir "BamFolder" "TMPDIR" "WD" |
7388 |
31 Oct 23 |
nicklas |
25 |
rg_var_isfile "GATK" "REF_FASTA" |
7389 |
01 Nov 23 |
nicklas |
26 |
rg_var_isset "BamName" "PonFolder" |
7388 |
31 Oct 23 |
nicklas |
27 |
rg_file_exists "${BamFolder}/${BamName}.bam" "${BamFolder}/${BamName}.bai" |
7388 |
31 Oct 23 |
nicklas |
28 |
|
7389 |
01 Nov 23 |
nicklas |
29 |
mutect2() |
7389 |
01 Nov 23 |
nicklas |
30 |
{ |
7389 |
01 Nov 23 |
nicklas |
31 |
local CHR=$1 |
7390 |
02 Nov 23 |
nicklas |
32 |
local VCF_OUT=vcf/${CHR}.vcf.gz |
7390 |
02 Nov 23 |
nicklas |
33 |
|
7389 |
01 Nov 23 |
nicklas |
34 |
if [ ! -f "done/mutect2_${CHR}.done" ]; then |
7389 |
01 Nov 23 |
nicklas |
35 |
${WD}/stdwrap.sh ${GATK} Mutect2 \ |
7389 |
01 Nov 23 |
nicklas |
36 |
-R ${REF_FASTA} \ |
7409 |
08 Nov 23 |
nicklas |
37 |
-I recal/normal.bam \ |
7389 |
01 Nov 23 |
nicklas |
38 |
--max-mnp-distance 0 \ |
7389 |
01 Nov 23 |
nicklas |
39 |
-L ${CHR} \ |
7389 |
01 Nov 23 |
nicklas |
40 |
${Mutect2Options} \ |
7409 |
08 Nov 23 |
nicklas |
41 |
${CommonGATKOptions} \ |
7390 |
02 Nov 23 |
nicklas |
42 |
-O ${VCF_OUT} \ |
7389 |
01 Nov 23 |
nicklas |
43 |
> mutect2_${CHR}.out |
7390 |
02 Nov 23 |
nicklas |
44 |
echo "${CHR}.variants: `zcat ${VCF_OUT} | grep -c -v '^#'`" >> ${WD}/stats.out |
7390 |
02 Nov 23 |
nicklas |
45 |
echo "${CHR}.callable: `grep 'callable' ${VCF_OUT}.stats | cut -f 2`" >> ${WD}/stats.out |
7389 |
01 Nov 23 |
nicklas |
46 |
touch "done/mutect2_${CHR}.done" |
7389 |
01 Nov 23 |
nicklas |
47 |
fi |
7389 |
01 Nov 23 |
nicklas |
48 |
} |
7389 |
01 Nov 23 |
nicklas |
49 |
|
7388 |
31 Oct 23 |
nicklas |
## Move to the temporary working directory |
7388 |
31 Oct 23 |
nicklas |
51 |
cd ${TMPDIR} |
7390 |
02 Nov 23 |
nicklas |
52 |
mkdir -p bam |
7390 |
02 Nov 23 |
nicklas |
53 |
mkdir -p recal |
7389 |
01 Nov 23 |
nicklas |
54 |
mkdir -p vcf |
7388 |
31 Oct 23 |
nicklas |
55 |
mkdir -p done |
7388 |
31 Oct 23 |
nicklas |
56 |
|
7389 |
01 Nov 23 |
nicklas |
57 |
|
7390 |
02 Nov 23 |
nicklas |
## Copy BAM file to local working directory |
7390 |
02 Nov 23 |
nicklas |
59 |
if [ ! -f "done/copy.done" ]; then |
7390 |
02 Nov 23 |
nicklas |
60 |
rg_progress 2 "Copying BAM file" |
7390 |
02 Nov 23 |
nicklas |
61 |
cp ${BamFolder}/${BamName}.bam bam/normal.bam |
7390 |
02 Nov 23 |
nicklas |
62 |
cp ${BamFolder}/${BamName}.bai bam/normal.bai |
7390 |
02 Nov 23 |
nicklas |
63 |
touch "done/copy.done" |
7390 |
02 Nov 23 |
nicklas |
64 |
fi |
7388 |
31 Oct 23 |
nicklas |
65 |
|
7388 |
31 Oct 23 |
nicklas |
66 |
if [ ! -f "done/base_recalibrate.done" ]; then |
7390 |
02 Nov 23 |
nicklas |
67 |
rg_progress 6 "Base Recalibration: step 1 of 2 (${NumThreads} threads)" |
7388 |
31 Oct 23 |
nicklas |
68 |
|
7388 |
31 Oct 23 |
nicklas |
69 |
${WD}/stdwrap.sh ${GATK} BaseRecalibratorSpark \ |
7388 |
31 Oct 23 |
nicklas |
70 |
-R ${REF_FASTA} \ |
7390 |
02 Nov 23 |
nicklas |
71 |
-I bam/normal.bam \ |
7409 |
08 Nov 23 |
nicklas |
72 |
-O recal/normal.table \ |
7388 |
31 Oct 23 |
nicklas |
73 |
${BaseRecalibratorOptions} \ |
7409 |
08 Nov 23 |
nicklas |
74 |
${CommonGATKOptions} \ |
7388 |
31 Oct 23 |
nicklas |
75 |
> recalibrate.out |
7388 |
31 Oct 23 |
nicklas |
76 |
|
7388 |
31 Oct 23 |
nicklas |
77 |
touch "done/base_recalibrate.done" |
7388 |
31 Oct 23 |
nicklas |
78 |
fi |
7388 |
31 Oct 23 |
nicklas |
79 |
|
7388 |
31 Oct 23 |
nicklas |
80 |
if [ ! -f "done/apply_bqsr.done" ]; then |
7389 |
01 Nov 23 |
nicklas |
81 |
rg_progress 10 "Base Recalibration: step 2 of 2 (${NumThreads} threads)" |
7388 |
31 Oct 23 |
nicklas |
82 |
|
7388 |
31 Oct 23 |
nicklas |
83 |
${WD}/stdwrap.sh ${GATK} --java-options "-Dsamjdk.compression_level=6" ApplyBQSRSpark \ |
7388 |
31 Oct 23 |
nicklas |
84 |
-R ${REF_FASTA} \ |
7390 |
02 Nov 23 |
nicklas |
85 |
-I bam/normal.bam \ |
7409 |
08 Nov 23 |
nicklas |
86 |
--bqsr-recal-file recal/normal.table \ |
7409 |
08 Nov 23 |
nicklas |
87 |
-O recal/normal.bam \ |
7388 |
31 Oct 23 |
nicklas |
88 |
${ApplyBQSROptions} \ |
7409 |
08 Nov 23 |
nicklas |
89 |
${CommonGATKOptions} \ |
7388 |
31 Oct 23 |
nicklas |
90 |
> apply_bqsr.out |
7388 |
31 Oct 23 |
nicklas |
91 |
|
7388 |
31 Oct 23 |
nicklas |
92 |
touch "done/apply_bqsr.done" |
7388 |
31 Oct 23 |
nicklas |
93 |
fi |
7388 |
31 Oct 23 |
nicklas |
94 |
|
7389 |
01 Nov 23 |
nicklas |
95 |
if [ ! -f "done/mutect2.done" ]; then |
7390 |
02 Nov 23 |
nicklas |
## Chromosomes for variant calling; start with X so we don't have to wait for it at the end |
7391 |
03 Nov 23 |
nicklas |
97 |
CHRS=(X 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 Y) |
7389 |
01 Nov 23 |
nicklas |
98 |
CURRENT_STEP=0 |
7390 |
02 Nov 23 |
nicklas |
99 |
COMPLETED_STEPS=0 |
7389 |
01 Nov 23 |
nicklas |
100 |
NUM_STEPS=${#CHRS[@]} |
7390 |
02 Nov 23 |
nicklas |
## Each Mutect2 job can use up to 4 threads efficiently |
7390 |
02 Nov 23 |
nicklas |
## We add '3' to make sure JOBS_LIMIT>=1 |
7390 |
02 Nov 23 |
nicklas |
103 |
JOBS_LIMIT=$(( (${NumThreads}+3)/4 )) |
7390 |
02 Nov 23 |
nicklas |
104 |
JOBS_PIDS=() |
7389 |
01 Nov 23 |
nicklas |
105 |
for chr in "${CHRS[@]}" |
7389 |
01 Nov 23 |
nicklas |
106 |
do |
7390 |
02 Nov 23 |
nicklas |
107 |
if (( ${#JOBS_PIDS[@]} >= ${JOBS_LIMIT} )) |
7390 |
02 Nov 23 |
nicklas |
108 |
then |
7390 |
02 Nov 23 |
nicklas |
## Wait for at least one job to complete; errors will be trapped |
7390 |
02 Nov 23 |
nicklas |
110 |
wait -n -p PID ${!JOBS_PIDS[@]} |
7390 |
02 Nov 23 |
nicklas |
111 |
unset JOBS_PIDS[$PID] |
7390 |
02 Nov 23 |
nicklas |
112 |
COMPLETED_STEPS=$(( ${COMPLETED_STEPS}+1 )) |
7390 |
02 Nov 23 |
nicklas |
113 |
fi |
7390 |
02 Nov 23 |
nicklas |
114 |
|
7390 |
02 Nov 23 |
nicklas |
115 |
mutect2 "chr${chr}" & |
7390 |
02 Nov 23 |
nicklas |
116 |
JOBS_PIDS[$!]="chr${chr}" |
7390 |
02 Nov 23 |
nicklas |
117 |
|
7389 |
01 Nov 23 |
nicklas |
118 |
CURRENT_STEP=$(( ${CURRENT_STEP}+1 )) |
7391 |
03 Nov 23 |
nicklas |
119 |
PROGRESS=$(( 20+(${CURRENT_STEP}+${COMPLETED_STEPS})*35/${NUM_STEPS} )) |
7390 |
02 Nov 23 |
nicklas |
## We want to report the active jobs as comma-separated list |
7390 |
02 Nov 23 |
nicklas |
121 |
ACTIVE_JOBS="${JOBS_PIDS[*]}" |
7391 |
03 Nov 23 |
nicklas |
122 |
rg_progress ${PROGRESS} "Running Mutect2: ${COMPLETED_STEPS} of ${NUM_STEPS} completed, working on ${ACTIVE_JOBS// /, }" |
7390 |
02 Nov 23 |
nicklas |
123 |
|
7389 |
01 Nov 23 |
nicklas |
124 |
done |
7389 |
01 Nov 23 |
nicklas |
125 |
|
7390 |
02 Nov 23 |
nicklas |
## Wait for the remaining jobs to complete |
7390 |
02 Nov 23 |
nicklas |
127 |
while (( ${#JOBS_PIDS[@]} > 0 )) |
7390 |
02 Nov 23 |
nicklas |
128 |
do |
7390 |
02 Nov 23 |
nicklas |
129 |
wait -n -p PID ${!JOBS_PIDS[@]} |
7390 |
02 Nov 23 |
nicklas |
130 |
unset JOBS_PIDS[$PID] |
7390 |
02 Nov 23 |
nicklas |
131 |
|
7390 |
02 Nov 23 |
nicklas |
132 |
COMPLETED_STEPS=$(( ${COMPLETED_STEPS}+1 )) |
7390 |
02 Nov 23 |
nicklas |
133 |
PROGRESS=$(( 15+(${CURRENT_STEP}+${COMPLETED_STEPS})*35/${NUM_STEPS} )) |
7390 |
02 Nov 23 |
nicklas |
134 |
ACTIVE_JOBS="${JOBS_PIDS[*]}" |
7391 |
03 Nov 23 |
nicklas |
135 |
rg_progress ${PROGRESS} "Running Mutect2: ${COMPLETED_STEPS} of ${NUM_STEPS} completed, working on ${ACTIVE_JOBS// /, }" |
7390 |
02 Nov 23 |
nicklas |
136 |
done |
7390 |
02 Nov 23 |
nicklas |
137 |
|
7389 |
01 Nov 23 |
nicklas |
138 |
touch "done/mutect2.done" |
7389 |
01 Nov 23 |
nicklas |
139 |
fi |
7389 |
01 Nov 23 |
nicklas |
140 |
|
7391 |
03 Nov 23 |
nicklas |
141 |
rg_progress 95 "Copying result files to project archive" |
7391 |
03 Nov 23 |
nicklas |
142 |
|
7391 |
03 Nov 23 |
nicklas |
## Merge the VCF files and save to the final location |
7389 |
01 Nov 23 |
nicklas |
144 |
mkdir -p ${PonFolder} |
7390 |
02 Nov 23 |
nicklas |
145 |
rm -f ${PonFolder}/* |
7391 |
03 Nov 23 |
nicklas |
146 |
ls -1 vcf/*.vcf.gz > all.list |
7415 |
13 Nov 23 |
nicklas |
147 |
${WD}/stdwrap.sh ${GATK} MergeVcfs -I all.list -O ${PonFolder}/all.vcf.gz --QUIET --VERBOSITY WARNING |
7388 |
31 Oct 23 |
nicklas |
148 |
|
7388 |
31 Oct 23 |
nicklas |
149 |
rg_progress 99 "Analysis completed, cleaning up..." |