6814 |
26 Aug 22 |
nicklas |
#!/bin/bash |
6814 |
26 Aug 22 |
nicklas |
2 |
## |
6814 |
26 Aug 22 |
nicklas |
## Pipeline script for the Hisat/2023 alignment step. |
6814 |
26 Aug 22 |
nicklas |
4 |
## |
6814 |
26 Aug 22 |
nicklas |
5 |
## |
6814 |
26 Aug 22 |
nicklas |
## Environment variables that should be defined (in job.sh) before calling this script |
6814 |
26 Aug 22 |
nicklas |
## -FastqFolder: Path to folder with FASTQ files |
6814 |
26 Aug 22 |
nicklas |
## -HisatFolder: Folder for output. Will be created or cleared from existing files |
6814 |
26 Aug 22 |
nicklas |
## -BOWTIE: Path to BowTie program |
6814 |
26 Aug 22 |
nicklas |
## -RMidx: Path to reference genome used for masking |
6814 |
26 Aug 22 |
nicklas |
## -BowTieOptions: Other options for the BowTie program |
6814 |
26 Aug 22 |
nicklas |
## -HISAT: Path to Hisat program |
6814 |
26 Aug 22 |
nicklas |
## -Tidx: Path to the reference genome used for alignment |
6814 |
26 Aug 22 |
nicklas |
## -HisatOptions: Other options for the Hisat program |
6814 |
26 Aug 22 |
nicklas |
## -SAMTOOLS: Path to samtools program |
6814 |
26 Aug 22 |
nicklas |
## -JAVA: Path to java |
6814 |
26 Aug 22 |
nicklas |
## -PICARD: Path to Picard JAR file |
6814 |
26 Aug 22 |
nicklas |
## -MarkDuplicatesOptions: Options to the Picard MarkDuplicates step |
6814 |
26 Aug 22 |
nicklas |
## -GATK: Path to GATK JAR file |
6814 |
26 Aug 22 |
nicklas |
## -DBSNP: Path to VCF file with SNP variants that should be genotyped |
6814 |
26 Aug 22 |
nicklas |
## -HCRef: Path to reference genome FASTA file |
6814 |
26 Aug 22 |
nicklas |
## -HaplotypeCallerOptions: Other options for the the HaplotypeCaller program |
6814 |
26 Aug 22 |
nicklas |
## -TMPDIR: Path to a local temporary working directory |
6814 |
26 Aug 22 |
nicklas |
24 |
## |
6814 |
26 Aug 22 |
nicklas |
25 |
|
6814 |
26 Aug 22 |
nicklas |
26 |
set -e |
6814 |
26 Aug 22 |
nicklas |
27 |
|
6814 |
26 Aug 22 |
nicklas |
## Import utility functions |
6814 |
26 Aug 22 |
nicklas |
29 |
source ./reggie-utils.sh |
6814 |
26 Aug 22 |
nicklas |
30 |
|
6814 |
26 Aug 22 |
nicklas |
## Verify settings in options |
6814 |
26 Aug 22 |
nicklas |
32 |
rg_var_isdir "FastqFolder" "TMPDIR" "WD" |
6814 |
26 Aug 22 |
nicklas |
33 |
rg_var_isfile "BOWTIE" "HISAT" "SAMTOOLS" "JAVA" "PICARD" "GATK" "HCRef" "DBSNP" |
6814 |
26 Aug 22 |
nicklas |
34 |
rg_var_isset "RMidx" "HisatFolder" |
6814 |
26 Aug 22 |
nicklas |
35 |
|
6814 |
26 Aug 22 |
nicklas |
## Move to the temporary working directory and create subdirectories |
6814 |
26 Aug 22 |
nicklas |
37 |
cd ${TMPDIR} |
6814 |
26 Aug 22 |
nicklas |
38 |
mkdir -p fastq |
6814 |
26 Aug 22 |
nicklas |
39 |
mkdir -p masked |
6814 |
26 Aug 22 |
nicklas |
40 |
mkdir -p aligned |
6814 |
26 Aug 22 |
nicklas |
41 |
mkdir -p gatk_tmp |
6814 |
26 Aug 22 |
nicklas |
42 |
mkdir -p done |
6814 |
26 Aug 22 |
nicklas |
43 |
|
6814 |
26 Aug 22 |
nicklas |
## Copy FASTQ files to local working directory |
6814 |
26 Aug 22 |
nicklas |
45 |
if [ ! -f "done/copy.done" ]; then |
6814 |
26 Aug 22 |
nicklas |
46 |
rg_progress 10 "Copying FASTQ files" |
6814 |
26 Aug 22 |
nicklas |
47 |
cp ${FastqFolder}/*.fastq.gz fastq |
6814 |
26 Aug 22 |
nicklas |
48 |
touch "done/copy.done" |
6814 |
26 Aug 22 |
nicklas |
49 |
fi |
6814 |
26 Aug 22 |
nicklas |
50 |
|
6814 |
26 Aug 22 |
nicklas |
## Run Bowtie2 if masked/*.fastq.gz doesn't exists |
6814 |
26 Aug 22 |
nicklas |
52 |
if [ ! -f "done/mask.done" ]; then |
6814 |
26 Aug 22 |
nicklas |
53 |
rg_progress 20 "Running Bowtie2 (${NumThreads} threads)" |
6814 |
26 Aug 22 |
nicklas |
## Find R1 and R2 FASTQ files |
6814 |
26 Aug 22 |
nicklas |
55 |
FASTQ1=`find fastq -name "*_R1.fastq.gz" -print -quit 2> /dev/null` |
6814 |
26 Aug 22 |
nicklas |
56 |
FASTQ2=`find fastq -name "*_R2.fastq.gz" -print -quit 2> /dev/null` |
6814 |
26 Aug 22 |
nicklas |
57 |
rg_var_isfile "FASTQ1" "FASTQ2" |
6814 |
26 Aug 22 |
nicklas |
58 |
${WD}/stdwrap.sh ${BOWTIE} \ |
6814 |
26 Aug 22 |
nicklas |
59 |
-p ${NumThreads} \ |
6814 |
26 Aug 22 |
nicklas |
60 |
${BowTieOptions} \ |
6814 |
26 Aug 22 |
nicklas |
61 |
--un-conc-gz masked/R%.fastq.gz \ |
6814 |
26 Aug 22 |
nicklas |
62 |
-x ${RMidx} \ |
6814 |
26 Aug 22 |
nicklas |
63 |
-1 ${FASTQ1} \ |
6814 |
26 Aug 22 |
nicklas |
64 |
-2 ${FASTQ2} \ |
6814 |
26 Aug 22 |
nicklas |
65 |
-S /dev/null \ |
6814 |
26 Aug 22 |
nicklas |
66 |
> masked/masked.out |
6814 |
26 Aug 22 |
nicklas |
67 |
touch "done/mask.done" |
6814 |
26 Aug 22 |
nicklas |
68 |
fi |
6814 |
26 Aug 22 |
nicklas |
69 |
|
6814 |
26 Aug 22 |
nicklas |
## Run Hisat |
6814 |
26 Aug 22 |
nicklas |
71 |
if [ ! -f "done/align.done" ]; then |
6814 |
26 Aug 22 |
nicklas |
72 |
rg_progress 30 "Running Hisat (${NumThreads} threads)" |
6814 |
26 Aug 22 |
nicklas |
73 |
${WD}/stdwrap.sh ${HISAT} \ |
6814 |
26 Aug 22 |
nicklas |
74 |
-p ${NumThreads} \ |
6814 |
26 Aug 22 |
nicklas |
75 |
${HisatOptions} \ |
6814 |
26 Aug 22 |
nicklas |
76 |
-x ${Tidx} \ |
6814 |
26 Aug 22 |
nicklas |
77 |
-1 masked/R1.fastq.gz \ |
6814 |
26 Aug 22 |
nicklas |
78 |
-2 masked/R2.fastq.gz \ |
6814 |
26 Aug 22 |
nicklas |
79 |
-S aligned/alignment.sam \ |
6814 |
26 Aug 22 |
nicklas |
80 |
> aligned/hisat.out |
6814 |
26 Aug 22 |
nicklas |
81 |
touch "done/align.done" |
6814 |
26 Aug 22 |
nicklas |
82 |
fi |
6814 |
26 Aug 22 |
nicklas |
83 |
|
6814 |
26 Aug 22 |
nicklas |
## Sort alignment.sam and save to alignment.bam |
6814 |
26 Aug 22 |
nicklas |
85 |
if [ ! -f "done/sort.done" ]; then |
6814 |
26 Aug 22 |
nicklas |
86 |
rg_progress 50 "Sorting alignment file" |
6814 |
26 Aug 22 |
nicklas |
87 |
${SAMTOOLS} sort -@ ${NumThreads} \ |
6814 |
26 Aug 22 |
nicklas |
88 |
-o aligned/alignment.bam -O bam \ |
6814 |
26 Aug 22 |
nicklas |
89 |
-T aligned/tmp \ |
6814 |
26 Aug 22 |
nicklas |
90 |
aligned/alignment.sam |
6814 |
26 Aug 22 |
nicklas |
91 |
|
6814 |
26 Aug 22 |
nicklas |
92 |
rm -f aligned/alignment.sam |
6814 |
26 Aug 22 |
nicklas |
93 |
touch "done/sort.done" |
6814 |
26 Aug 22 |
nicklas |
94 |
fi |
6814 |
26 Aug 22 |
nicklas |
95 |
|
6814 |
26 Aug 22 |
nicklas |
96 |
|
6814 |
26 Aug 22 |
nicklas |
97 |
if [ ! -f "done/markduplicates.done" ]; then |
6814 |
26 Aug 22 |
nicklas |
98 |
rg_progress 60 "Running picard MarkDuplicates" |
6814 |
26 Aug 22 |
nicklas |
99 |
${WD}/stdwrap.sh ${JAVA} \ |
6814 |
26 Aug 22 |
nicklas |
100 |
-Dpicard.useLegacyParser=false ${JavaOptions} \ |
6814 |
26 Aug 22 |
nicklas |
101 |
-jar ${PICARD} MarkDuplicates \ |
6814 |
26 Aug 22 |
nicklas |
102 |
-INPUT aligned/alignment.bam \ |
6814 |
26 Aug 22 |
nicklas |
103 |
-OUTPUT aligned/alignment.bam.tmp_picard \ |
6814 |
26 Aug 22 |
nicklas |
104 |
-METRICS_FILE aligned/alignment_picardmetrics.csv \ |
6814 |
26 Aug 22 |
nicklas |
105 |
${MarkDuplicatesOptions} \ |
6814 |
26 Aug 22 |
nicklas |
106 |
> aligned/picard_MarkDuplicates.out |
6814 |
26 Aug 22 |
nicklas |
107 |
mv -f aligned/alignment.bam.tmp_picard aligned/alignment.bam |
6814 |
26 Aug 22 |
nicklas |
108 |
touch "done/markduplicates.done" |
6814 |
26 Aug 22 |
nicklas |
109 |
fi |
6814 |
26 Aug 22 |
nicklas |
110 |
|
6814 |
26 Aug 22 |
nicklas |
111 |
if [ ! -f "done/index.done" ]; then |
6814 |
26 Aug 22 |
nicklas |
112 |
rg_progress 80 "Creating index" |
6814 |
26 Aug 22 |
nicklas |
113 |
${SAMTOOLS} index -b aligned/alignment.bam aligned/alignment.bai |
6814 |
26 Aug 22 |
nicklas |
114 |
touch "done/index.done" |
6814 |
26 Aug 22 |
nicklas |
115 |
fi |
6814 |
26 Aug 22 |
nicklas |
116 |
|
6814 |
26 Aug 22 |
nicklas |
117 |
if [ ! -f "done/statistics.done" ]; then |
6814 |
26 Aug 22 |
nicklas |
118 |
rg_progress 85 "Calculating alignment statistics" |
6814 |
26 Aug 22 |
nicklas |
119 |
${WD}/alignment_statistics.sh aligned/alignment.bam > aligned/alignment_statistics.out |
6814 |
26 Aug 22 |
nicklas |
120 |
${SAMTOOLS} view aligned/alignment.bam | ${WD}/singlecolumnaverager.awk > aligned/fragments.out |
6814 |
26 Aug 22 |
nicklas |
121 |
touch "done/statistics.done" |
6814 |
26 Aug 22 |
nicklas |
122 |
fi |
6814 |
26 Aug 22 |
nicklas |
123 |
|
6814 |
26 Aug 22 |
nicklas |
## Genotype QC |
6814 |
26 Aug 22 |
nicklas |
125 |
if [ ! -f "done/genotypeqc.done" ]; then |
6814 |
26 Aug 22 |
nicklas |
126 |
rg_progress 90 "Running HaplotypeCaller" |
6814 |
26 Aug 22 |
nicklas |
127 |
${WD}/stdwrap.sh ${JAVA} ${JavaOptions} \ |
6814 |
26 Aug 22 |
nicklas |
128 |
-jar ${GATK} HaplotypeCaller \ |
6814 |
26 Aug 22 |
nicklas |
129 |
-R ${HCRef} \ |
6814 |
26 Aug 22 |
nicklas |
130 |
-L ${DBSNP} --dbsnp ${DBSNP} --alleles ${DBSNP} \ |
6814 |
26 Aug 22 |
nicklas |
131 |
--tmp-dir gatk_tmp \ |
6814 |
26 Aug 22 |
nicklas |
132 |
${HaplotypeCallerOptions} \ |
6814 |
26 Aug 22 |
nicklas |
133 |
-I aligned/alignment.bam \ |
6814 |
26 Aug 22 |
nicklas |
134 |
-O aligned/qc_genotype.vcf \ |
6814 |
26 Aug 22 |
nicklas |
135 |
> aligned/gatk_HaplotypeCaller.out |
6814 |
26 Aug 22 |
nicklas |
136 |
touch "done/genotypeqc.done" |
6814 |
26 Aug 22 |
nicklas |
137 |
fi |
6814 |
26 Aug 22 |
nicklas |
138 |
|
6814 |
26 Aug 22 |
nicklas |
139 |
rg_progress 95 "Copying result files to project archive" |
6814 |
26 Aug 22 |
nicklas |
140 |
cp masked/*.out ${WD} |
6814 |
26 Aug 22 |
nicklas |
141 |
cp aligned/*.out ${WD} |
6814 |
26 Aug 22 |
nicklas |
142 |
cp aligned/alignment_picardmetrics.csv ${WD} |
6814 |
26 Aug 22 |
nicklas |
143 |
mkdir -p ${HisatFolder} |
6814 |
26 Aug 22 |
nicklas |
144 |
rm -rf ${HisatFolder}/* |
6814 |
26 Aug 22 |
nicklas |
145 |
cp aligned/* ${HisatFolder} |
6814 |
26 Aug 22 |
nicklas |
146 |
ls -1 ${HisatFolder}/* >> ${WD}/files.out |
6814 |
26 Aug 22 |
nicklas |
147 |
|
6814 |
26 Aug 22 |
nicklas |
148 |
rg_progress 99 "Analysis completed, cleaning up..." |