6630 |
07 Mar 22 |
nicklas |
#!/bin/bash |
6628 |
07 Mar 22 |
nicklas |
2 |
## |
6628 |
07 Mar 22 |
nicklas |
## Pipeline script for the mBaf analysis. It requries an 'alignment.bam' file and |
6628 |
07 Mar 22 |
nicklas |
## output genotypes in 'mbaf_genotype.vcf' to the same directory. |
6628 |
07 Mar 22 |
nicklas |
5 |
## |
6631 |
08 Mar 22 |
nicklas |
## Environment variables that should be defined (in job.sh) before calling this script |
6628 |
07 Mar 22 |
nicklas |
## -BamFolder: Path to folder where alignment.bam is found (results will also be saved here) |
6628 |
07 Mar 22 |
nicklas |
## -BamName: Base part of alignment BAM file (eg. alignment) |
6628 |
07 Mar 22 |
nicklas |
## -JAVA: Path to JAVA program |
6628 |
07 Mar 22 |
nicklas |
## -GATK: Path to GATK JAR file |
6628 |
07 Mar 22 |
nicklas |
## -DBSNP: Path to VCF file with SNP variants that should be genotyped |
6640 |
11 Mar 22 |
nicklas |
## -HCRef: Path to reference genome FASTA file |
6628 |
07 Mar 22 |
nicklas |
## -HaplotypeCallerOptions: Other options for the the HaplotypeCaller program |
6628 |
07 Mar 22 |
nicklas |
## -TMPDIR: Path to a local temporary working directory |
6628 |
07 Mar 22 |
nicklas |
15 |
## |
6628 |
07 Mar 22 |
nicklas |
16 |
|
6628 |
07 Mar 22 |
nicklas |
17 |
set -e |
6628 |
07 Mar 22 |
nicklas |
18 |
|
6628 |
07 Mar 22 |
nicklas |
## Import utility functions |
6628 |
07 Mar 22 |
nicklas |
20 |
source ./reggie-utils.sh |
6628 |
07 Mar 22 |
nicklas |
21 |
|
6628 |
07 Mar 22 |
nicklas |
## Verify settings in options |
6653 |
23 Mar 22 |
nicklas |
23 |
rg_var_isdir "BamFolder" "TMPDIR" "WD" |
6653 |
23 Mar 22 |
nicklas |
24 |
rg_var_isfile "JAVA" "GATK" "DBSNP" "HCRef" |
6653 |
23 Mar 22 |
nicklas |
25 |
rg_var_isset "BamName" |
6628 |
07 Mar 22 |
nicklas |
26 |
rg_file_exists "${BamFolder}/${BamName}.bam" "${BamFolder}/${BamName}.bai" |
6628 |
07 Mar 22 |
nicklas |
27 |
|
6628 |
07 Mar 22 |
nicklas |
## Move to the temporary working directory |
6628 |
07 Mar 22 |
nicklas |
29 |
cd ${TMPDIR} |
6669 |
06 Apr 22 |
nicklas |
30 |
mkdir -p bam |
6669 |
06 Apr 22 |
nicklas |
31 |
mkdir -p mbaf |
6669 |
06 Apr 22 |
nicklas |
32 |
mkdir -p done |
6628 |
07 Mar 22 |
nicklas |
33 |
|
6628 |
07 Mar 22 |
nicklas |
## Copy BAM file to local working directory |
6669 |
06 Apr 22 |
nicklas |
35 |
if [ ! -f "done/copy.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
36 |
rg_progress 10 "Copying BAM file" |
6669 |
06 Apr 22 |
nicklas |
37 |
cp ${BamFolder}/${BamName}.bam bam/alignment.bam |
6669 |
06 Apr 22 |
nicklas |
38 |
cp ${BamFolder}/${BamName}.bai bam/alignment.bai |
6669 |
06 Apr 22 |
nicklas |
39 |
touch "done/copy.done" |
6669 |
06 Apr 22 |
nicklas |
40 |
fi |
6628 |
07 Mar 22 |
nicklas |
41 |
|
6628 |
07 Mar 22 |
nicklas |
## Run GATK HaplotypeCaller |
6669 |
06 Apr 22 |
nicklas |
43 |
if [ ! -f "done/haplotypecaller.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
44 |
rg_progress 20 "Running HaplotypeCaller" |
6669 |
06 Apr 22 |
nicklas |
45 |
${WD}/stdwrap.sh ${JAVA} ${JavaOptions} \ |
6669 |
06 Apr 22 |
nicklas |
46 |
-jar ${GATK} -T HaplotypeCaller \ |
6669 |
06 Apr 22 |
nicklas |
47 |
-R ${HCRef} \ |
6669 |
06 Apr 22 |
nicklas |
48 |
--genotyping_mode GENOTYPE_GIVEN_ALLELES \ |
6669 |
06 Apr 22 |
nicklas |
49 |
-L ${DBSNP} --dbsnp ${DBSNP} --alleles ${DBSNP} \ |
6669 |
06 Apr 22 |
nicklas |
50 |
${HaplotypeCallerOptions} \ |
6669 |
06 Apr 22 |
nicklas |
51 |
-I bam/alignment.bam \ |
6669 |
06 Apr 22 |
nicklas |
52 |
-o mbaf/mbaf_genotype.vcf \ |
6669 |
06 Apr 22 |
nicklas |
53 |
> mbaf/mbaf_HaplotypeCaller.out |
6669 |
06 Apr 22 |
nicklas |
54 |
touch "done/haplotypecaller.done" |
6669 |
06 Apr 22 |
nicklas |
55 |
fi |
6628 |
07 Mar 22 |
nicklas |
56 |
|
6628 |
07 Mar 22 |
nicklas |
57 |
rg_progress 90 "Copying result files to project archive" |
6628 |
07 Mar 22 |
nicklas |
58 |
\cp mbaf/mbaf* ${BamFolder} |
6628 |
07 Mar 22 |
nicklas |
59 |
ls -1 mbaf/mbaf* > ${WD}/files.out |
6628 |
07 Mar 22 |
nicklas |
60 |
|
6640 |
11 Mar 22 |
nicklas |
61 |
rg_progress 99 "Analysis completed, cleaning up..." |