6656 |
29 Mar 22 |
nicklas |
#!/bin/bash |
6656 |
29 Mar 22 |
nicklas |
2 |
## |
6656 |
29 Mar 22 |
nicklas |
## Pipeline script for the VariantCall analysis. It consists of two parts: |
6656 |
29 Mar 22 |
nicklas |
## a raw variant calling part, and an annotation and filtering part. Two |
6656 |
29 Mar 22 |
nicklas |
## environment variables control which steps that should be done: |
6656 |
29 Mar 22 |
nicklas |
6 |
## |
6656 |
29 Mar 22 |
nicklas |
## -RawCallingVcf: Path to an existing raw variant calling VCF that should be re-used |
6656 |
29 Mar 22 |
nicklas |
## for annotating and filtering. If not set, raw variant calling is done. |
6656 |
29 Mar 22 |
nicklas |
## -FilteredFolder: Folder to store the results of annotating and filtering the raw calling. |
6656 |
29 Mar 22 |
nicklas |
## If not set this step is skipped. |
6656 |
29 Mar 22 |
nicklas |
11 |
## |
6656 |
29 Mar 22 |
nicklas |
## Parameters that are used for raw calling: |
6656 |
29 Mar 22 |
nicklas |
## -BamFolder: Path to folder where alignment.bam is found (the raw VCF is saved here) |
6656 |
29 Mar 22 |
nicklas |
## -BamName: Base part of alignment BAM file (eg. alignment) |
6656 |
29 Mar 22 |
nicklas |
## -MOSDEPTH: Path to the mosdepth program |
6656 |
29 Mar 22 |
nicklas |
## -BEDTOOLS: Path to the bedtools program |
6656 |
29 Mar 22 |
nicklas |
## -VARDICT: Path to the VarDict java program |
6656 |
29 Mar 22 |
nicklas |
## -VCFANNO: Path to the vcfanno program |
6656 |
29 Mar 22 |
nicklas |
## -Genome: Path to the reference genome |
6656 |
29 Mar 22 |
nicklas |
## -MosdepthOptions: Options for the mosdepth progarm |
6656 |
29 Mar 22 |
nicklas |
## -MinCallableDepth: Minimum coverage at a location for calling |
6656 |
29 Mar 22 |
nicklas |
## -VarDictOptions: Options to the VarDict program |
6656 |
29 Mar 22 |
nicklas |
## -Var2VcfOptions: Options to the VarDict step that convert results to VCF file |
6656 |
29 Mar 22 |
nicklas |
24 |
## |
6656 |
29 Mar 22 |
nicklas |
## Parameters that are used for annotating and filtering: |
6656 |
29 Mar 22 |
nicklas |
## -JAVA: Path to java |
6656 |
29 Mar 22 |
nicklas |
## -VCFANNO: Path to the vcfanno program |
6656 |
29 Mar 22 |
nicklas |
## -SNPEFF: Path to the SnpEff JAR file |
6656 |
29 Mar 22 |
nicklas |
## -SNPSIFT: Path to the SnpSift JAR file |
6656 |
29 Mar 22 |
nicklas |
## -MutationSignature: Path to the RData file for mutation signature |
6656 |
29 Mar 22 |
nicklas |
## -VcfannoOptions: Options to the vcfanno program |
6656 |
29 Mar 22 |
nicklas |
## -SnpEffOptions: Options to the SnpSift program |
6656 |
29 Mar 22 |
nicklas |
## -SnpSiftOptions: Options to the SnpSift program |
6656 |
29 Mar 22 |
nicklas |
## -AlignedName: External name of the alignment |
6656 |
29 Mar 22 |
nicklas |
35 |
## |
6656 |
29 Mar 22 |
nicklas |
## Other parameters: |
6656 |
29 Mar 22 |
nicklas |
## -TMPDIR: Path to a local temporary working directory |
6656 |
29 Mar 22 |
nicklas |
38 |
## |
6656 |
29 Mar 22 |
nicklas |
39 |
|
6656 |
29 Mar 22 |
nicklas |
40 |
set -eo pipefail |
6656 |
29 Mar 22 |
nicklas |
41 |
|
6656 |
29 Mar 22 |
nicklas |
## Import utility functions |
6656 |
29 Mar 22 |
nicklas |
43 |
source ./reggie-utils.sh |
6656 |
29 Mar 22 |
nicklas |
44 |
source ./variantcall-utils.sh |
6656 |
29 Mar 22 |
nicklas |
45 |
|
6656 |
29 Mar 22 |
nicklas |
## Verify settings in options |
6656 |
29 Mar 22 |
nicklas |
47 |
rg_var_isdir "TMPDIR" "WD" |
6656 |
29 Mar 22 |
nicklas |
48 |
if [ "${RawCallingVcf}" ]; then |
6656 |
29 Mar 22 |
nicklas |
49 |
rg_file_exists "${RawCallingVcf}" |
6656 |
29 Mar 22 |
nicklas |
50 |
else |
6656 |
29 Mar 22 |
nicklas |
51 |
rg_var_isdir "BamFolder" |
6656 |
29 Mar 22 |
nicklas |
52 |
rg_var_isfile "MOSDEPTH" "BEDTOOLS" "VARDICT" "Genome" "VCFANNO" |
6656 |
29 Mar 22 |
nicklas |
53 |
rg_var_isset "BamName" "MinCallableDepth" "VarDictOptions" "Var2VcfOptions" |
6656 |
29 Mar 22 |
nicklas |
54 |
rg_file_exists "${BamFolder}/${BamName}.bam" "${BamFolder}/${BamName}.bai" |
6656 |
29 Mar 22 |
nicklas |
55 |
fi |
6656 |
29 Mar 22 |
nicklas |
56 |
if [ "${FilteredFolder}" ]; then |
6656 |
29 Mar 22 |
nicklas |
57 |
rg_var_isfile "VCFANNO" "SNPEFF" "SNPSIFT" "MutationSignature" |
6656 |
29 Mar 22 |
nicklas |
58 |
rg_var_isset "VcfannoOptions" "SnpEffOptions" "SnpSiftOptions" "AlignedName" |
6656 |
29 Mar 22 |
nicklas |
59 |
fi |
6656 |
29 Mar 22 |
nicklas |
60 |
|
6656 |
29 Mar 22 |
nicklas |
## Move to the temporary working directory |
6656 |
29 Mar 22 |
nicklas |
62 |
cd ${TMPDIR} |
6669 |
06 Apr 22 |
nicklas |
63 |
mkdir -p bam |
6669 |
06 Apr 22 |
nicklas |
64 |
mkdir -p mosdepth |
6669 |
06 Apr 22 |
nicklas |
65 |
mkdir -p tmp |
6669 |
06 Apr 22 |
nicklas |
66 |
mkdir -p resultsraw |
6669 |
06 Apr 22 |
nicklas |
67 |
mkdir -p resultsfilter |
6669 |
06 Apr 22 |
nicklas |
68 |
mkdir -p done |
6656 |
29 Mar 22 |
nicklas |
69 |
|
6656 |
29 Mar 22 |
nicklas |
70 |
if [ "${RawCallingVcf}" ]; then |
6656 |
29 Mar 22 |
nicklas |
# We use the existing raw calling file |
6656 |
29 Mar 22 |
nicklas |
72 |
zcat ${RawCallingVcf} > tmp/variants-raw-2.vcf |
6656 |
29 Mar 22 |
nicklas |
73 |
else |
6656 |
29 Mar 22 |
nicklas |
## Copy BAM file to local working directory |
6669 |
06 Apr 22 |
nicklas |
75 |
if [ ! -f "done/copy.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
76 |
rg_progress 10 "Copying BAM file" |
6669 |
06 Apr 22 |
nicklas |
77 |
cp ${BamFolder}/${BamName}.bam bam/alignment.bam |
6669 |
06 Apr 22 |
nicklas |
78 |
cp ${BamFolder}/${BamName}.bai bam/alignment.bai |
6669 |
06 Apr 22 |
nicklas |
79 |
touch "done/copy.done" |
6669 |
06 Apr 22 |
nicklas |
80 |
fi |
6656 |
29 Mar 22 |
nicklas |
81 |
|
6656 |
29 Mar 22 |
nicklas |
# Find regions with depth >= 5 using mosdepth (and some help from awk) |
6669 |
06 Apr 22 |
nicklas |
83 |
if [ ! -f "done/mosdepth.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
84 |
rg_progress 20 "Finding callable regions" |
6669 |
06 Apr 22 |
nicklas |
85 |
find_callable_regions ${MinCallableDepth} bam/alignment.bam resultsraw/variants-callable.bed |
6669 |
06 Apr 22 |
nicklas |
86 |
touch "done/mosdepth.done" |
6669 |
06 Apr 22 |
nicklas |
87 |
fi |
6656 |
29 Mar 22 |
nicklas |
88 |
|
6669 |
06 Apr 22 |
nicklas |
89 |
if [ ! -f "done/vardict.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
90 |
rg_progress 30 "Running VarDict (${NumThreads} threads)" |
6669 |
06 Apr 22 |
nicklas |
91 |
find_variants bam/alignment.bam resultsraw/variants-callable.bed tmp/variants-raw-1.vcf |
6669 |
06 Apr 22 |
nicklas |
92 |
touch "done/vardict.done" |
6669 |
06 Apr 22 |
nicklas |
93 |
fi |
6656 |
29 Mar 22 |
nicklas |
94 |
|
6669 |
06 Apr 22 |
nicklas |
95 |
if [ ! -f "done/gccontent.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
96 |
NUMVARIANTS=`find_num_variants_in_vcf tmp/variants-raw-1.vcf` |
6669 |
06 Apr 22 |
nicklas |
97 |
if [ ${NUMVARIANTS} -gt 0 ]; then |
6669 |
06 Apr 22 |
nicklas |
98 |
gc_content tmp/variants-raw-1.vcf |
6669 |
06 Apr 22 |
nicklas |
99 |
vcf_annotate tmp/variants-raw-1.vcf tmp/gc_stat.toml tmp/variants-raw-2.vcf |
6669 |
06 Apr 22 |
nicklas |
100 |
else |
6669 |
06 Apr 22 |
nicklas |
# No variants found, simply copy the raw file |
6669 |
06 Apr 22 |
nicklas |
102 |
cp tmp/variants-raw-1.vcf tmp/variants-raw-2.vcf |
6669 |
06 Apr 22 |
nicklas |
103 |
fi |
6669 |
06 Apr 22 |
nicklas |
104 |
touch "done/gccontent.done" |
6656 |
29 Mar 22 |
nicklas |
105 |
fi |
6669 |
06 Apr 22 |
nicklas |
106 |
|
6669 |
06 Apr 22 |
nicklas |
107 |
if [ ! -f "done/rawcalling.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
# We now have the raw variant calling file |
6669 |
06 Apr 22 |
nicklas |
109 |
cat tmp/variants-raw-2.vcf | bgzip -c > resultsraw/variants-raw.vcf.gz |
6669 |
06 Apr 22 |
nicklas |
110 |
touch "done/rawcalling.done" |
6669 |
06 Apr 22 |
nicklas |
111 |
fi |
6656 |
29 Mar 22 |
nicklas |
112 |
fi |
6656 |
29 Mar 22 |
nicklas |
113 |
|
6656 |
29 Mar 22 |
nicklas |
114 |
if [ "${FilteredFolder}" ]; then |
6656 |
29 Mar 22 |
nicklas |
# We should annotate and filter the raw calling |
6656 |
29 Mar 22 |
nicklas |
116 |
NUMVARIANTS=`find_num_variants_in_vcf tmp/variants-raw-2.vcf` |
6656 |
29 Mar 22 |
nicklas |
117 |
if [ $NUMVARIANTS -gt 0 ]; then |
6656 |
29 Mar 22 |
nicklas |
# Annotate variants in two steps, first with vcfanno and several databases |
6669 |
06 Apr 22 |
nicklas |
119 |
if [ ! -f "done/vcfanno.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
120 |
rg_progress 60 "Annotating variants (vcfanno; step 1 of 2)" |
6669 |
06 Apr 22 |
nicklas |
121 |
vcf_annotate tmp/variants-raw-2.vcf "${VcfannoOptions}" tmp/variants-annotated-2.vcf |
6669 |
06 Apr 22 |
nicklas |
122 |
touch "done/vcfanno.done" |
6669 |
06 Apr 22 |
nicklas |
123 |
fi |
6656 |
29 Mar 22 |
nicklas |
124 |
|
6656 |
29 Mar 22 |
nicklas |
# Second annotation step with snpEff |
6669 |
06 Apr 22 |
nicklas |
126 |
if [ ! -f "done/snpeff.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
127 |
rg_progress 70 "Annotating variants (snpEff; step 2 of 2)" |
6669 |
06 Apr 22 |
nicklas |
128 |
snpeff_annotate tmp/variants-annotated-2.vcf "${SnpEffOptions}" resultsfilter/variants-annotated.vcf.gz |
6669 |
06 Apr 22 |
nicklas |
129 |
touch "done/snpeff.done" |
6669 |
06 Apr 22 |
nicklas |
130 |
fi |
6656 |
29 Mar 22 |
nicklas |
131 |
|
6669 |
06 Apr 22 |
nicklas |
132 |
if [ ! -f "done/snpsift.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
133 |
rg_progress 80 "Filtering variants (SnpSift)" |
6669 |
06 Apr 22 |
nicklas |
134 |
snpsift_filter resultsfilter/variants-annotated.vcf.gz "${SnpSiftOptions}" resultsfilter/variants-filtered.vcf |
6669 |
06 Apr 22 |
nicklas |
135 |
touch "done/snpsift.done" |
6656 |
29 Mar 22 |
nicklas |
136 |
fi |
6669 |
06 Apr 22 |
nicklas |
137 |
|
6669 |
06 Apr 22 |
nicklas |
138 |
if [ ! -f "done/mutsign.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
139 |
NUMFILTERED=`find_num_variants_in_vcf resultsfilter/variants-filtered.vcf` |
6669 |
06 Apr 22 |
nicklas |
140 |
if [ $NUMFILTERED -gt 0 ]; then |
6669 |
06 Apr 22 |
nicklas |
141 |
${RSCRIPT} ${WD}/mutation_signature.R ${AlignedName} \ |
6669 |
06 Apr 22 |
nicklas |
142 |
resultsfilter/variants-filtered.vcf \ |
6669 |
06 Apr 22 |
nicklas |
143 |
${MutationSignature} \ |
6669 |
06 Apr 22 |
nicklas |
144 |
resultsfilter/mutation_signature.pdf \ |
6669 |
06 Apr 22 |
nicklas |
145 |
> resultsfilter/mutation_signature.txt |
6669 |
06 Apr 22 |
nicklas |
146 |
fi |
6669 |
06 Apr 22 |
nicklas |
147 |
touch "done/mutsign.done" |
6669 |
06 Apr 22 |
nicklas |
148 |
fi |
6656 |
29 Mar 22 |
nicklas |
149 |
else |
6656 |
29 Mar 22 |
nicklas |
# No variants in the raw VCF file -- we make a copies of it 'variants-annotated.vcf.gz' and 'variants-filtered.vcf' |
6656 |
29 Mar 22 |
nicklas |
151 |
cat tmp/variants-raw-2.vcf | bgzip -c > resultsfilter/variants-annotated.vcf.gz |
6656 |
29 Mar 22 |
nicklas |
152 |
cp tmp/variants-raw-2.vcf resultsfilter/variants-filtered.vcf |
6656 |
29 Mar 22 |
nicklas |
153 |
fi |
6656 |
29 Mar 22 |
nicklas |
154 |
fi |
6656 |
29 Mar 22 |
nicklas |
155 |
|
6656 |
29 Mar 22 |
nicklas |
156 |
rg_progress 90 "Copying result files to project archive" |
6656 |
29 Mar 22 |
nicklas |
157 |
if [ ! "${RawCallingVcf}" ]; then |
6656 |
29 Mar 22 |
nicklas |
158 |
\cp resultsraw/variants-* ${BamFolder} |
6656 |
29 Mar 22 |
nicklas |
159 |
ls -1 resultsraw/variants-* >> ${WD}/files.out |
6656 |
29 Mar 22 |
nicklas |
160 |
echo "Callable bases: `awk -F'\\t' 'BEGIN{SUM=0}{ SUM+=$3-$2 } END{print SUM}' resultsraw/variants-callable.bed`" >> ${WD}/stats.out |
6656 |
29 Mar 22 |
nicklas |
161 |
echo "Raw variants: `zcat resultsraw/variants-raw.vcf.gz | grep -c -v '^#'`" >> ${WD}/stats.out |
6656 |
29 Mar 22 |
nicklas |
162 |
fi |
6656 |
29 Mar 22 |
nicklas |
163 |
if [ "${FilteredFolder}" ]; then |
6656 |
29 Mar 22 |
nicklas |
164 |
mkdir -p ${FilteredFolder} |
6656 |
29 Mar 22 |
nicklas |
165 |
rm -rf ${FilteredFolder}/* |
6656 |
29 Mar 22 |
nicklas |
166 |
cp resultsfilter/* ${FilteredFolder} |
6656 |
29 Mar 22 |
nicklas |
167 |
ls -1 ${FilteredFolder}/* >> ${WD}/files.out |
6656 |
29 Mar 22 |
nicklas |
168 |
echo "Annotated variants: `zcat resultsfilter/variants-annotated.vcf.gz | grep -c -v '^#'`" >> ${WD}/stats.out |
6656 |
29 Mar 22 |
nicklas |
169 |
echo "Filtered variants: `cat resultsfilter/variants-filtered.vcf | grep -c -v '^#'`" >> ${WD}/stats.out |
6656 |
29 Mar 22 |
nicklas |
170 |
fi |
6656 |
29 Mar 22 |
nicklas |
171 |
|
6656 |
29 Mar 22 |
nicklas |
172 |
rg_progress 99 "Analysis completed, cleaning up..." |