6658 |
31 Mar 22 |
nicklas |
#!/bin/bash |
6658 |
31 Mar 22 |
nicklas |
2 |
## |
6658 |
31 Mar 22 |
nicklas |
## Pipeline script for the FASTQ import step. |
6658 |
31 Mar 22 |
nicklas |
4 |
## |
6658 |
31 Mar 22 |
nicklas |
5 |
## |
6658 |
31 Mar 22 |
nicklas |
## Environment variables that should be defined (in job.sh) before calling this script |
6658 |
31 Mar 22 |
nicklas |
## -AllRunArchives: White-space separated list of locations to search for sequencing data |
6658 |
31 Mar 22 |
nicklas |
## -JAVA: Path to Java |
6658 |
31 Mar 22 |
nicklas |
## -PICARD: Path to Picard JAR file |
6658 |
31 Mar 22 |
nicklas |
## -PICARD_MEMORY: Amount of memory to assign to Java when running Picard |
6658 |
31 Mar 22 |
nicklas |
## -BOWTIE: Path to BowTie program |
6658 |
31 Mar 22 |
nicklas |
## -BowTieOptions: Other options for the BowTie program |
6658 |
31 Mar 22 |
nicklas |
## -Gidx: Path to reference genome used for alignment with BowTie |
6658 |
31 Mar 22 |
nicklas |
## -TRIMMOMATIC: Path to Trimmomatich JAR file |
6658 |
31 Mar 22 |
nicklas |
## -TrimmomaticOptions1: Options for the first Trimmomatic step (adapter filter) |
6658 |
31 Mar 22 |
nicklas |
## -TrimmomaticOptions2: Options for the second Trimmomatic step (quality filter) |
6658 |
31 Mar 22 |
nicklas |
17 |
|
6658 |
31 Mar 22 |
nicklas |
18 |
set -e |
6658 |
31 Mar 22 |
nicklas |
19 |
|
6658 |
31 Mar 22 |
nicklas |
## Import utility functions |
6658 |
31 Mar 22 |
nicklas |
21 |
source ./reggie-utils.sh |
6658 |
31 Mar 22 |
nicklas |
22 |
source ./demux-utils.sh |
6658 |
31 Mar 22 |
nicklas |
23 |
|
6658 |
31 Mar 22 |
nicklas |
## Verify settings in options |
6658 |
31 Mar 22 |
nicklas |
25 |
rg_var_isdir "TMPDIR" "WD" "ImportGateway" |
6658 |
31 Mar 22 |
nicklas |
26 |
rg_var_isfile "BOWTIE" "TRIMMOMATIC" |
6658 |
31 Mar 22 |
nicklas |
27 |
rg_var_isset "BowTieOptions" "Gidx" "TrimmomaticImportOptions" "TrimmomaticQualityOptions" |
6658 |
31 Mar 22 |
nicklas |
28 |
rg_var_isset "ImportArchive" "MergedName" "BaseFileName" "FASTQ1" "FASTQ2" "FastqFolder" |
6658 |
31 Mar 22 |
nicklas |
29 |
|
6658 |
31 Mar 22 |
nicklas |
## Move to the temporary working directory and create subdirectories |
6658 |
31 Mar 22 |
nicklas |
31 |
cd ${TMPDIR} |
6658 |
31 Mar 22 |
nicklas |
32 |
mkdir fastq |
6658 |
31 Mar 22 |
nicklas |
33 |
mkdir fastq.aligned |
6658 |
31 Mar 22 |
nicklas |
34 |
mkdir fastq.trim.1 |
6658 |
31 Mar 22 |
nicklas |
35 |
mkdir fastq.trim.2 |
6669 |
06 Apr 22 |
nicklas |
36 |
mkdir done |
6658 |
31 Mar 22 |
nicklas |
37 |
|
6658 |
31 Mar 22 |
nicklas |
# Copy FASTQ files to tmp folder |
6669 |
06 Apr 22 |
nicklas |
39 |
if [ ! -f "done/copy_R1.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
40 |
rg_progress 10 "Copying FASTQ files: ${FASTQ1}" |
6669 |
06 Apr 22 |
nicklas |
41 |
copy_from_import_archive "${FASTQ1}" fastq/${BaseFileName}_R1.fastq.gz "${RemoveFASTQFromGateway}" "${MaxFASTQLines}" |
6669 |
06 Apr 22 |
nicklas |
42 |
touch "done/copy_R1.done" |
6669 |
06 Apr 22 |
nicklas |
43 |
fi |
6669 |
06 Apr 22 |
nicklas |
44 |
if [ ! -f "done/copy_R2.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
45 |
rg_progress 15 "Copying FASTQ files: ${FASTQ2}" |
6669 |
06 Apr 22 |
nicklas |
46 |
copy_from_import_archive "${FASTQ2}" fastq/${BaseFileName}_R2.fastq.gz "${RemoveFASTQFromGateway}" "${MaxFASTQLines}" |
6669 |
06 Apr 22 |
nicklas |
47 |
touch "done/copy_R2.done" |
6669 |
06 Apr 22 |
nicklas |
48 |
fi |
6658 |
31 Mar 22 |
nicklas |
49 |
|
6658 |
31 Mar 22 |
nicklas |
# Run Bowtie |
6669 |
06 Apr 22 |
nicklas |
51 |
if [ ! -f "done/align.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
52 |
rg_progress 20 "Bowtie2: ${MergedName} (${NumThreads} threads)" |
6669 |
06 Apr 22 |
nicklas |
53 |
bowtie_align fastq/${BaseFileName} fastq.aligned/${BaseFileName} |
6669 |
06 Apr 22 |
nicklas |
54 |
touch "done/align.done" |
6669 |
06 Apr 22 |
nicklas |
55 |
fi |
6658 |
31 Mar 22 |
nicklas |
56 |
|
6658 |
31 Mar 22 |
nicklas |
## First Trimmomatic step is used to filter on adapter sequences |
6669 |
06 Apr 22 |
nicklas |
58 |
if [ ! -f "done/trim1.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
59 |
rg_progress 50 "Trimmomatic (adapter): ${MergedName} (${NumThreads} threads)" |
6669 |
06 Apr 22 |
nicklas |
60 |
trimmomatic fastq/${BaseFileName} "fastq.gz" fastq.trim.1/${BaseFileName} "fastq" "${TrimmomaticImportOptions}" |
6669 |
06 Apr 22 |
nicklas |
61 |
touch "done/trim1.done" |
6669 |
06 Apr 22 |
nicklas |
62 |
fi |
6658 |
31 Mar 22 |
nicklas |
63 |
|
6658 |
31 Mar 22 |
nicklas |
## Second Trimmomatic step is used to filter on quality |
6669 |
06 Apr 22 |
nicklas |
65 |
if [ ! -f "done/trim2.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
66 |
rg_progress 60 "Trimmomatic (quality): ${MergedName} (${NumThreads} threads)" |
6669 |
06 Apr 22 |
nicklas |
67 |
trimmomatic fastq.trim.1/${BaseFileName} "fastq" fastq.trim.2/${BaseFileName} "fastq" "${TrimmomaticQualityOptions}" |
6669 |
06 Apr 22 |
nicklas |
68 |
touch "done/trim2.done" |
6669 |
06 Apr 22 |
nicklas |
69 |
fi |
6658 |
31 Mar 22 |
nicklas |
70 |
|
6658 |
31 Mar 22 |
nicklas |
## Get statitics about average remaining read lengths |
6669 |
06 Apr 22 |
nicklas |
72 |
if [ ! -f "done/readlength.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
73 |
rg_progress 70 "Calculating average read length: ${MergedName}" |
6669 |
06 Apr 22 |
nicklas |
74 |
${WD}/readlength_averager.awk < fastq.trim.2/${BaseFileName}_R1.fastq >> fastq.trim.2/${BaseFileName}_readlength.txt |
6669 |
06 Apr 22 |
nicklas |
75 |
${WD}/readlength_averager.awk < fastq.trim.2/${BaseFileName}_R2.fastq >> fastq.trim.2/${BaseFileName}_readlength.txt |
6669 |
06 Apr 22 |
nicklas |
76 |
touch "done/readlength.done" |
6669 |
06 Apr 22 |
nicklas |
77 |
fi |
6658 |
31 Mar 22 |
nicklas |
78 |
|
6658 |
31 Mar 22 |
nicklas |
79 |
rg_progress 80 "Archiving FASTQ: ${MergedName}" |
6658 |
31 Mar 22 |
nicklas |
80 |
mkdir -p ${FastqFolder} |
6658 |
31 Mar 22 |
nicklas |
81 |
rm -rf ${FastqFolder}/* |
6658 |
31 Mar 22 |
nicklas |
82 |
cat fastq.aligned/${BaseFileName}_fragmentsize.txt >> ${WD}/fragments.out |
6658 |
31 Mar 22 |
nicklas |
83 |
cp fastq.aligned/${BaseFileName}_fragmentsize.txt ${FastqFolder}/${BaseFileName}_fragmentsize.txt |
6658 |
31 Mar 22 |
nicklas |
84 |
cat fastq.trim.2/${BaseFileName}_readlength.txt >> ${WD}/readlength.out |
6658 |
31 Mar 22 |
nicklas |
85 |
cp fastq.trim.2/${BaseFileName}_readlength.txt ${FastqFolder}/${BaseFileName}_readlength.txt |
6658 |
31 Mar 22 |
nicklas |
86 |
pigz -5 -p ${NumThreads} -c fastq.trim.2/${BaseFileName}_R1.fastq > ${FastqFolder}/${BaseFileName}_R1.fastq.gz |
6658 |
31 Mar 22 |
nicklas |
87 |
pigz -5 -p ${NumThreads} -c fastq.trim.2/${BaseFileName}_R2.fastq > ${FastqFolder}/${BaseFileName}_R2.fastq.gz |
6658 |
31 Mar 22 |
nicklas |
88 |
ls -1 ${FastqFolder}/*.fastq.gz >> ${WD}/files.out |
6658 |
31 Mar 22 |
nicklas |
89 |
|
6658 |
31 Mar 22 |
nicklas |
90 |
rg_progress 99 "Analysis completed, cleaning up..." |