6656 |
29 Mar 22 |
nicklas |
# $Id $ |
6656 |
29 Mar 22 |
nicklas |
2 |
# |
6656 |
29 Mar 22 |
nicklas |
# Utility functions for variant calling. |
6656 |
29 Mar 22 |
nicklas |
4 |
# |
6656 |
29 Mar 22 |
nicklas |
# source ./variantcall-utils.sh |
6656 |
29 Mar 22 |
nicklas |
6 |
|
6656 |
29 Mar 22 |
nicklas |
# Find number of variants in a VCF file. |
6656 |
29 Mar 22 |
nicklas |
# Parameters: |
6656 |
29 Mar 22 |
nicklas |
# $1: The name of the VCF file |
6656 |
29 Mar 22 |
nicklas |
10 |
find_num_variants_in_vcf() |
6656 |
29 Mar 22 |
nicklas |
11 |
{ |
6656 |
29 Mar 22 |
nicklas |
12 |
local VCF_FILE=$1 |
6656 |
29 Mar 22 |
nicklas |
# The ' || true' is critical since 'grep' will exit with code 1 if there are no matches |
6656 |
29 Mar 22 |
nicklas |
14 |
local NUM_VARIANTS=`grep -c -v '^#' ${VCF_FILE} || true` |
6656 |
29 Mar 22 |
nicklas |
15 |
echo "${NUM_VARIANTS}" |
6656 |
29 Mar 22 |
nicklas |
16 |
} |
6656 |
29 Mar 22 |
nicklas |
17 |
|
6656 |
29 Mar 22 |
nicklas |
# Find regions that are "callable" in the BAM file |
6656 |
29 Mar 22 |
nicklas |
# Parameters: |
6656 |
29 Mar 22 |
nicklas |
# $1: Required depth at callable locations |
6656 |
29 Mar 22 |
nicklas |
# $2: Aligned BAM file |
6656 |
29 Mar 22 |
nicklas |
# $3: Output BED file with all callable regions |
6656 |
29 Mar 22 |
nicklas |
23 |
find_callable_regions() |
6656 |
29 Mar 22 |
nicklas |
24 |
{ |
6656 |
29 Mar 22 |
nicklas |
25 |
local MIN_DEPTH=$1 |
6656 |
29 Mar 22 |
nicklas |
26 |
local BAM_FILE=$2 |
6656 |
29 Mar 22 |
nicklas |
27 |
local BED_FILE=$3 |
6656 |
29 Mar 22 |
nicklas |
28 |
${MOSDEPTH} ${MosdepthOptions} \ |
6656 |
29 Mar 22 |
nicklas |
29 |
mosdepth/alignment \ |
6656 |
29 Mar 22 |
nicklas |
30 |
${BAM_FILE} \ |
6656 |
29 Mar 22 |
nicklas |
31 |
> mosdepth/mosdepth.out |
6656 |
29 Mar 22 |
nicklas |
32 |
zcat mosdepth/alignment.per-base.bed.gz \ |
6656 |
29 Mar 22 |
nicklas |
33 |
| awk "\$4 >= ${MIN_DEPTH}" \ |
6656 |
29 Mar 22 |
nicklas |
34 |
| ${BEDTOOLS} merge \ |
6656 |
29 Mar 22 |
nicklas |
35 |
> ${BED_FILE} |
6656 |
29 Mar 22 |
nicklas |
36 |
} |
6656 |
29 Mar 22 |
nicklas |
37 |
|
6656 |
29 Mar 22 |
nicklas |
# Find variants. Parameters: |
6656 |
29 Mar 22 |
nicklas |
# $1: Aligned BAM file |
6656 |
29 Mar 22 |
nicklas |
# $2: BED file that define regions where variants should be called |
6656 |
29 Mar 22 |
nicklas |
# $3: Path to output VCF file |
6656 |
29 Mar 22 |
nicklas |
42 |
find_variants() |
6656 |
29 Mar 22 |
nicklas |
43 |
{ |
6656 |
29 Mar 22 |
nicklas |
44 |
local BAM_FILE=$1 |
6656 |
29 Mar 22 |
nicklas |
45 |
local BED_FILE=$2 |
6656 |
29 Mar 22 |
nicklas |
46 |
local OUT_FILE=$3 |
6656 |
29 Mar 22 |
nicklas |
47 |
|
6656 |
29 Mar 22 |
nicklas |
# If we don't have explicit paths to teststrandbias.R |
6656 |
29 Mar 22 |
nicklas |
# and var2vcf_valid.pl we assume that they are in the same |
6656 |
29 Mar 22 |
nicklas |
# directory as vardict-java |
6656 |
29 Mar 22 |
nicklas |
51 |
if [ ! "${VARDICT_TESTSTRANDBIAS}" ]; then |
6656 |
29 Mar 22 |
nicklas |
52 |
VARDICT_TESTSTRANDBIAS="${VARDICT%/*}/teststrandbias.R" |
6656 |
29 Mar 22 |
nicklas |
53 |
fi |
6656 |
29 Mar 22 |
nicklas |
54 |
if [ ! "${VARDICT_VAR2VCF}" ]; then |
6656 |
29 Mar 22 |
nicklas |
55 |
VARDICT_VAR2VCF="${VARDICT%/*}/var2vcf_valid.pl" |
6656 |
29 Mar 22 |
nicklas |
56 |
fi |
6656 |
29 Mar 22 |
nicklas |
57 |
|
6656 |
29 Mar 22 |
nicklas |
58 |
${VARDICT} -th ${NumThreads} \ |
6656 |
29 Mar 22 |
nicklas |
59 |
-G ${Genome} \ |
6656 |
29 Mar 22 |
nicklas |
60 |
${VarDictOptions} \ |
6656 |
29 Mar 22 |
nicklas |
61 |
-b ${BAM_FILE} \ |
6656 |
29 Mar 22 |
nicklas |
62 |
${BED_FILE} \ |
6656 |
29 Mar 22 |
nicklas |
63 |
| ${VARDICT_TESTSTRANDBIAS} \ |
6656 |
29 Mar 22 |
nicklas |
64 |
| ${VARDICT_VAR2VCF} \ |
6656 |
29 Mar 22 |
nicklas |
65 |
-N ${AlignedName} \ |
6656 |
29 Mar 22 |
nicklas |
66 |
${Var2VcfOptions} \ |
6656 |
29 Mar 22 |
nicklas |
67 |
> ${OUT_FILE} |
6656 |
29 Mar 22 |
nicklas |
68 |
} |
6656 |
29 Mar 22 |
nicklas |
69 |
|
6656 |
29 Mar 22 |
nicklas |
# Calculate GC content and DNA strength using custom script |
6656 |
29 Mar 22 |
nicklas |
# We need to get the reference sequence within 50 base-pairs of each entry in the VCF |
6656 |
29 Mar 22 |
nicklas |
# and run that through gc_stat.py which produces a VCF file that vcfanno can use |
6656 |
29 Mar 22 |
nicklas |
# to add back annotations to the VCF file from VarDict |
6656 |
29 Mar 22 |
nicklas |
# Parameters: |
6656 |
29 Mar 22 |
nicklas |
# $1: Input VCF file |
6656 |
29 Mar 22 |
nicklas |
76 |
gc_content() |
6656 |
29 Mar 22 |
nicklas |
77 |
{ |
6656 |
29 Mar 22 |
nicklas |
78 |
local VCF_IN=$1 |
6656 |
29 Mar 22 |
nicklas |
79 |
awk '{OFS="\t"; if (!/^#/){print $1,($2<50)?0:$2-50,$2+50,$2,$4,$5}}' ${VCF_IN} > tmp/gc-50.bed |
6656 |
29 Mar 22 |
nicklas |
80 |
${BEDTOOLS} getfasta -fi ${Genome} -bed tmp/gc-50.bed -bedOut > tmp/gc-50-fasta.bed |
6656 |
29 Mar 22 |
nicklas |
81 |
cat tmp/gc-50-fasta.bed | ${WD}/gc_stat.py | bgzip -c > tmp/gc-50.vcf.gz |
6656 |
29 Mar 22 |
nicklas |
82 |
tabix tmp/gc-50.vcf.gz |
6656 |
29 Mar 22 |
nicklas |
# Create gc_stat.toml file so that we can annotate the VCF with GC content |
6656 |
29 Mar 22 |
nicklas |
84 |
echo '[[annotation]]' > tmp/gc_stat.toml |
6656 |
29 Mar 22 |
nicklas |
85 |
echo 'file="tmp/gc-50.vcf.gz"' >> tmp/gc_stat.toml |
6656 |
29 Mar 22 |
nicklas |
86 |
echo 'fields=["GC_CONT","GC_CONT_LOCAL","STRENGTH","STRENGTH_LOCAL"]' >> tmp/gc_stat.toml |
6656 |
29 Mar 22 |
nicklas |
87 |
echo 'names=["GC_CONT","GC_CONT_LOCAL","STRENGTH","STRENGTH_LOCAL"]' >> tmp/gc_stat.toml |
6656 |
29 Mar 22 |
nicklas |
88 |
echo 'ops=["self","self","self","self"]' >> tmp/gc_stat.toml |
6656 |
29 Mar 22 |
nicklas |
89 |
} |
6656 |
29 Mar 22 |
nicklas |
90 |
|
6656 |
29 Mar 22 |
nicklas |
# Annotate a VCF file with 'vcfanno' |
6656 |
29 Mar 22 |
nicklas |
# Parameters: |
6656 |
29 Mar 22 |
nicklas |
# $1: Input VCF file |
6656 |
29 Mar 22 |
nicklas |
# $2: Options to vcfanno |
6656 |
29 Mar 22 |
nicklas |
# $3: Output VCF file |
6656 |
29 Mar 22 |
nicklas |
96 |
vcf_annotate() |
6656 |
29 Mar 22 |
nicklas |
97 |
{ |
6656 |
29 Mar 22 |
nicklas |
98 |
local VCF_IN=$1 |
6656 |
29 Mar 22 |
nicklas |
99 |
local OPTIONS=$2 |
6656 |
29 Mar 22 |
nicklas |
100 |
local VCF_OUT=$3 |
6656 |
29 Mar 22 |
nicklas |
101 |
${WD}/stderrwrap.sh ${VCFANNO} ${OPTIONS} ${VCF_IN} > ${VCF_OUT} 3>> vcfanno.out |
6656 |
29 Mar 22 |
nicklas |
102 |
} |
6656 |
29 Mar 22 |
nicklas |
103 |
|
6656 |
29 Mar 22 |
nicklas |
# Annotate a VCF file with 'snpEff' |
6656 |
29 Mar 22 |
nicklas |
# Parameters: |
6656 |
29 Mar 22 |
nicklas |
# $1: Input VCF file |
6656 |
29 Mar 22 |
nicklas |
# $2: Options to snpEff |
6656 |
29 Mar 22 |
nicklas |
# $3: Output VCF file is compressed with bgzip |
6656 |
29 Mar 22 |
nicklas |
109 |
snpeff_annotate() |
6656 |
29 Mar 22 |
nicklas |
110 |
{ |
6656 |
29 Mar 22 |
nicklas |
111 |
local VCF_IN=$1 |
6656 |
29 Mar 22 |
nicklas |
112 |
local OPTIONS=$2 |
6656 |
29 Mar 22 |
nicklas |
113 |
local VCF_OUT=$3 |
6657 |
30 Mar 22 |
nicklas |
114 |
if [ "${VCF_OUT: -3}" == ".gz" ]; then |
6665 |
05 Apr 22 |
nicklas |
115 |
local COMPRESS="bgzip -c" |
6657 |
30 Mar 22 |
nicklas |
116 |
fi |
6656 |
29 Mar 22 |
nicklas |
117 |
${WD}/stderrwrap.sh ${JAVA} ${JavaOptions} \ |
6656 |
29 Mar 22 |
nicklas |
118 |
-jar ${SNPEFF} \ |
6656 |
29 Mar 22 |
nicklas |
119 |
${OPTIONS} ${VCF_IN} \ |
6657 |
30 Mar 22 |
nicklas |
120 |
3>> snpeff.out \ |
6665 |
05 Apr 22 |
nicklas |
121 |
| ${COMPRESS:-cat} > ${VCF_OUT} |
6656 |
29 Mar 22 |
nicklas |
122 |
} |
6656 |
29 Mar 22 |
nicklas |
123 |
|
6656 |
29 Mar 22 |
nicklas |
# Filter a VCF file with 'SnpSift' |
6656 |
29 Mar 22 |
nicklas |
# Parameters: |
6656 |
29 Mar 22 |
nicklas |
# $1: Input VCF file |
6656 |
29 Mar 22 |
nicklas |
# $2: Options to SnpSift |
6656 |
29 Mar 22 |
nicklas |
# $3: Output VCF file (not compressed) |
6656 |
29 Mar 22 |
nicklas |
129 |
snpsift_filter() |
6656 |
29 Mar 22 |
nicklas |
130 |
{ |
6656 |
29 Mar 22 |
nicklas |
131 |
local VCF_IN=$1 |
6656 |
29 Mar 22 |
nicklas |
132 |
local OPTIONS=$2 |
6656 |
29 Mar 22 |
nicklas |
133 |
local VCF_OUT=$3 |
6656 |
29 Mar 22 |
nicklas |
134 |
|
6656 |
29 Mar 22 |
nicklas |
135 |
${WD}/stderrwrap.sh ${JAVA} ${JavaOptions} \ |
6656 |
29 Mar 22 |
nicklas |
136 |
-jar ${SNPSIFT} filter \ |
6656 |
29 Mar 22 |
nicklas |
137 |
${OPTIONS} ${VCF_IN} \ |
6656 |
29 Mar 22 |
nicklas |
138 |
3> snpsift.out \ |
6656 |
29 Mar 22 |
nicklas |
139 |
> ${VCF_OUT} |
6656 |
29 Mar 22 |
nicklas |
140 |
} |