6649 |
21 Mar 22 |
nicklas |
#!/bin/bash |
6649 |
21 Mar 22 |
nicklas |
2 |
## |
6649 |
21 Mar 22 |
nicklas |
## Pipeline script for the RNAseq demux step. |
6649 |
21 Mar 22 |
nicklas |
4 |
## |
6649 |
21 Mar 22 |
nicklas |
5 |
## |
6649 |
21 Mar 22 |
nicklas |
## Environment variables that should be defined (in job.sh) before calling this script |
6649 |
21 Mar 22 |
nicklas |
## -AllRunArchives: White-space separated list of locations to search for sequencing data |
6649 |
21 Mar 22 |
nicklas |
## -JAVA: Path to Java |
6649 |
21 Mar 22 |
nicklas |
## -PICARD: Path to Picard JAR file |
6649 |
21 Mar 22 |
nicklas |
## -PICARD_MEMORY: Amount of memory to assign to Java when running Picard |
6649 |
21 Mar 22 |
nicklas |
## -BOWTIE: Path to BowTie program |
6649 |
21 Mar 22 |
nicklas |
## -BowTieOptions: Other options for the BowTie program |
6649 |
21 Mar 22 |
nicklas |
## -Gidx: Path to reference genome used for alignment with BowTie |
6649 |
21 Mar 22 |
nicklas |
## -TRIMMOMATIC: Path to Trimmomatich JAR file |
6649 |
21 Mar 22 |
nicklas |
## -TrimmomaticOptions1: Options for the first Trimmomatic step (adapter filter) |
6649 |
21 Mar 22 |
nicklas |
## -TrimmomaticOptions2: Options for the second Trimmomatic step (quality filter) |
6649 |
21 Mar 22 |
nicklas |
17 |
|
6649 |
21 Mar 22 |
nicklas |
18 |
set -e |
6649 |
21 Mar 22 |
nicklas |
19 |
|
6649 |
21 Mar 22 |
nicklas |
## Import utility functions |
6649 |
21 Mar 22 |
nicklas |
21 |
source ./reggie-utils.sh |
6649 |
21 Mar 22 |
nicklas |
22 |
source ./demux-utils.sh |
6649 |
21 Mar 22 |
nicklas |
23 |
|
6649 |
21 Mar 22 |
nicklas |
## Verify settings in options |
6655 |
24 Mar 22 |
nicklas |
25 |
rg_var_isdir "TMPDIR" "WD" |
6655 |
24 Mar 22 |
nicklas |
26 |
rg_var_isfile "JAVA" "PICARD" "BOWTIE" "TRIMMOMATIC" |
6655 |
24 Mar 22 |
nicklas |
27 |
rg_var_isset "AllRunArchives" "BowTieOptions" "Gidx" "TrimmomaticAdapterOptions" "TrimmomaticQualityOptions" |
6649 |
21 Mar 22 |
nicklas |
28 |
|
6649 |
21 Mar 22 |
nicklas |
29 |
|
6649 |
21 Mar 22 |
nicklas |
## Move to the temporary working directory and create subdirectories |
6649 |
21 Mar 22 |
nicklas |
31 |
cd ${TMPDIR} |
6669 |
06 Apr 22 |
nicklas |
32 |
mkdir -p done |
6669 |
06 Apr 22 |
nicklas |
33 |
mkdir -p fastq |
6669 |
06 Apr 22 |
nicklas |
34 |
mkdir -p fastq.merged |
6669 |
06 Apr 22 |
nicklas |
35 |
mkdir -p fastq.aligned |
6669 |
06 Apr 22 |
nicklas |
36 |
mkdir -p fastq.trim.1 |
6669 |
06 Apr 22 |
nicklas |
37 |
mkdir -p fastq.trim.2 |
6649 |
21 Mar 22 |
nicklas |
38 |
|
6649 |
21 Mar 22 |
nicklas |
## Read information about each flowcell and lane from 'lane_info.txt' and run picard to create FASTQ files |
6649 |
21 Mar 22 |
nicklas |
## Columns in 'lane_info.txt': |
6649 |
21 Mar 22 |
nicklas |
## -FLOWCELL_BARCODE |
6649 |
21 Mar 22 |
nicklas |
## -LANE |
6649 |
21 Mar 22 |
nicklas |
## -DEMUX_NAME |
6649 |
21 Mar 22 |
nicklas |
## -POOL_NAME |
6649 |
21 Mar 22 |
nicklas |
## -DATA_FILES_FOLDER |
6649 |
21 Mar 22 |
nicklas |
## -READSTRING |
6649 |
21 Mar 22 |
nicklas |
## -EXTRACT_OPTIONS |
6649 |
21 Mar 22 |
nicklas |
## -FASTQ_OPTIONS |
6649 |
21 Mar 22 |
nicklas |
49 |
NUM_LINES=`wc -l < ${WD}/lane_info.txt` |
6649 |
21 Mar 22 |
nicklas |
50 |
CURRENT_LINE=0 |
6655 |
24 Mar 22 |
nicklas |
51 |
while IFS=$',' read FLOWCELL_BARCODE LANE DEMUX_NAME POOL_NAME DATA_FILES_FOLDER READSTRING EXTRACT_OPTIONS FASTQ_OPTIONS; do |
6649 |
21 Mar 22 |
nicklas |
52 |
CURRENT_LINE=$(( ${CURRENT_LINE}+1 )) |
6649 |
21 Mar 22 |
nicklas |
53 |
RUN_ARCHIVE=`find_run_archive "${DATA_FILES_FOLDER}" "${AllRunArchives}"` |
6669 |
06 Apr 22 |
nicklas |
54 |
PREFIX=${FLOWCELL_BARCODE}.${LANE} |
6649 |
21 Mar 22 |
nicklas |
55 |
|
6669 |
06 Apr 22 |
nicklas |
56 |
if [ ! -f "done/${PREFIX}.barcodes.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
57 |
mkdir -p ${PREFIX} |
6669 |
06 Apr 22 |
nicklas |
58 |
PROGRESS=$(( ${CURRENT_LINE}*20/${NUM_LINES} )) |
6669 |
06 Apr 22 |
nicklas |
59 |
rg_progress ${PROGRESS} "Extracting barcodes: ${FLOWCELL_BARCODE}; lane ${LANE} (${NumThreads} threads)" |
6669 |
06 Apr 22 |
nicklas |
60 |
extract_illumina_barcodes ${RUN_ARCHIVE} ${FLOWCELL_BARCODE} ${LANE} ${READSTRING} "${EXTRACT_OPTIONS}" |
6669 |
06 Apr 22 |
nicklas |
61 |
touch "done/${PREFIX}.barcodes.done"; |
6669 |
06 Apr 22 |
nicklas |
62 |
fi |
6669 |
06 Apr 22 |
nicklas |
63 |
|
6669 |
06 Apr 22 |
nicklas |
64 |
if [ ! -f "done/${PREFIX}.fastq.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
65 |
PROGRESS=$(( (${CURRENT_LINE}*20+10)/${NUM_LINES} )) |
6669 |
06 Apr 22 |
nicklas |
66 |
rg_progress ${PROGRESS} "Creating FASTQ: ${FLOWCELL_BARCODE}; lane ${LANE} (${NumThreads} threads)" |
6669 |
06 Apr 22 |
nicklas |
67 |
basecalls_to_fastq ${RUN_ARCHIVE} ${FLOWCELL_BARCODE} ${LANE} ${READSTRING} "${FASTQ_OPTIONS}" |
6669 |
06 Apr 22 |
nicklas |
68 |
|
6669 |
06 Apr 22 |
nicklas |
69 |
echo "# [${DEMUX_NAME}]" >> ${WD}/demultiplex_metrics.txt |
6669 |
06 Apr 22 |
nicklas |
70 |
echo "# Lane=${LANE}" >> ${WD}/demultiplex_metrics.txt |
6669 |
06 Apr 22 |
nicklas |
71 |
echo "# Pool=${POOL_NAME}" >> ${WD}/demultiplex_metrics.txt |
6669 |
06 Apr 22 |
nicklas |
72 |
cat ${PREFIX}_metrics.csv >> ${WD}/demultiplex_metrics.txt |
6669 |
06 Apr 22 |
nicklas |
73 |
echo "[${DEMUX_NAME}]" >> ${WD}/skipped_tiles.txt |
6669 |
06 Apr 22 |
nicklas |
74 |
find ${PREFIX} -name "*_barcode.txt" -size 0 >> ${WD}/skipped_tiles.txt |
6669 |
06 Apr 22 |
nicklas |
75 |
touch "done/${PREFIX}.fastq.done" |
6669 |
06 Apr 22 |
nicklas |
76 |
rm -rf ${PREFIX} |
6669 |
06 Apr 22 |
nicklas |
77 |
fi |
6649 |
21 Mar 22 |
nicklas |
78 |
|
6649 |
21 Mar 22 |
nicklas |
79 |
done < ${WD}/lane_info.txt |
6649 |
21 Mar 22 |
nicklas |
80 |
|
6649 |
21 Mar 22 |
nicklas |
## Read information about each merged item from 'merge_info.txt'. For each, merge FASTQ files, run |
6649 |
21 Mar 22 |
nicklas |
## bowtie and trimmomatic and move results to project-archive |
6649 |
21 Mar 22 |
nicklas |
83 |
NUM_LINES=`wc -l < ${WD}/merge_info.txt` |
6649 |
21 Mar 22 |
nicklas |
84 |
CURRENT_LINE=0 |
6655 |
24 Mar 22 |
nicklas |
85 |
while IFS=$',' read MERGE_NAME FASTQ_FOLDER BASE_FASTQ_NAME UMASK; do |
6649 |
21 Mar 22 |
nicklas |
86 |
CURRENT_LINE=$(( ${CURRENT_LINE}+1 )) |
6649 |
21 Mar 22 |
nicklas |
87 |
PROGRESS=$(( 25+${CURRENT_LINE}*70/${NUM_LINES} )) |
6649 |
21 Mar 22 |
nicklas |
88 |
|
6669 |
06 Apr 22 |
nicklas |
89 |
if [ ! -f "done/${MERGE_NAME}.done" ]; then |
6649 |
21 Mar 22 |
nicklas |
90 |
|
6669 |
06 Apr 22 |
nicklas |
91 |
if [ "${UMASK}" ]; then |
6669 |
06 Apr 22 |
nicklas |
92 |
umask -S ${UMASK} |
6669 |
06 Apr 22 |
nicklas |
93 |
fi |
6669 |
06 Apr 22 |
nicklas |
94 |
|
6669 |
06 Apr 22 |
nicklas |
## Start new sections in result files |
6669 |
06 Apr 22 |
nicklas |
96 |
echo "[${MERGE_NAME}]" >> ${WD}/trimmomatic.out |
6669 |
06 Apr 22 |
nicklas |
97 |
echo "[${MERGE_NAME}]" >> ${WD}/files.out |
6669 |
06 Apr 22 |
nicklas |
98 |
echo "[${MERGE_NAME}]" >> ${WD}/fragments.out |
6669 |
06 Apr 22 |
nicklas |
99 |
echo "[${MERGE_NAME}]" >> ${WD}/readlength.out |
6669 |
06 Apr 22 |
nicklas |
100 |
|
6669 |
06 Apr 22 |
nicklas |
## Merge FASTQ files from all lanes |
6669 |
06 Apr 22 |
nicklas |
102 |
if [ ! -f "done/${MERGE_NAME}.merge.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
103 |
rg_progress ${PROGRESS} "Consolidating FASTQ: ${MERGE_NAME}" |
6669 |
06 Apr 22 |
nicklas |
104 |
merge_fastq_files fastq/${MERGE_NAME} fastq.merged/${MERGE_NAME} |
6669 |
06 Apr 22 |
nicklas |
105 |
touch "done/${MERGE_NAME}.merge.done" |
6669 |
06 Apr 22 |
nicklas |
106 |
fi |
6669 |
06 Apr 22 |
nicklas |
107 |
|
6669 |
06 Apr 22 |
nicklas |
## Align a subset with Bowtie to get an estimate of fragment sizes |
6669 |
06 Apr 22 |
nicklas |
109 |
if [ ! -f "done/${MERGE_NAME}.align.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
110 |
rg_progress ${PROGRESS} "Bowtie2: ${MERGE_NAME} (${CURRENT_LINE} of ${NUM_LINES}; ${NumThreads} threads)" |
6669 |
06 Apr 22 |
nicklas |
111 |
bowtie_align fastq.merged/${MERGE_NAME} fastq.aligned/${MERGE_NAME} |
6669 |
06 Apr 22 |
nicklas |
112 |
touch "done/${MERGE_NAME}.align.done" |
6669 |
06 Apr 22 |
nicklas |
113 |
fi |
6669 |
06 Apr 22 |
nicklas |
114 |
|
6669 |
06 Apr 22 |
nicklas |
## First Trimmomatic step is used to filter on adapter sequences |
6669 |
06 Apr 22 |
nicklas |
116 |
if [ ! -f "done/${MERGE_NAME}.trim1.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
117 |
rg_progress ${PROGRESS} "Trimmomatic (adapter): ${MERGE_NAME} (${CURRENT_LINE} of ${NUM_LINES}; ${NumThreads} threads)" |
6669 |
06 Apr 22 |
nicklas |
118 |
trimmomatic fastq.merged/${MERGE_NAME} "fastq.gz" fastq.trim.1/${MERGE_NAME} "fastq" "${TrimmomaticAdapterOptions}" |
6669 |
06 Apr 22 |
nicklas |
119 |
touch "done/${MERGE_NAME}.trim1.done" |
6669 |
06 Apr 22 |
nicklas |
120 |
fi |
6669 |
06 Apr 22 |
nicklas |
121 |
|
6669 |
06 Apr 22 |
nicklas |
## Second Trimmomatic step is used to filter on quality |
6669 |
06 Apr 22 |
nicklas |
123 |
if [ ! -f "done/${MERGE_NAME}.trim2.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
124 |
rg_progress ${PROGRESS} "Trimmomatic (quality): ${MERGE_NAME} (${CURRENT_LINE} of ${NUM_LINES}; ${NumThreads} threads)" |
6669 |
06 Apr 22 |
nicklas |
125 |
trimmomatic fastq.trim.1/${MERGE_NAME} "fastq" fastq.trim.2/${MERGE_NAME} "fastq" "${TrimmomaticQualityOptions}" |
6669 |
06 Apr 22 |
nicklas |
126 |
touch "done/${MERGE_NAME}.trim2.done" |
6669 |
06 Apr 22 |
nicklas |
127 |
fi |
6649 |
21 Mar 22 |
nicklas |
128 |
|
6669 |
06 Apr 22 |
nicklas |
## Get statitics about average remaining read lengths |
6669 |
06 Apr 22 |
nicklas |
130 |
if [ ! -f "done/${MERGE_NAME}.readlength.done" ]; then |
6669 |
06 Apr 22 |
nicklas |
131 |
${WD}/readlength_averager.awk < fastq.trim.2/${MERGE_NAME}_R1.fastq > fastq.trim.2/${MERGE_NAME}_readlength.txt |
6669 |
06 Apr 22 |
nicklas |
132 |
${WD}/readlength_averager.awk < fastq.trim.2/${MERGE_NAME}_R2.fastq >> fastq.trim.2/${MERGE_NAME}_readlength.txt |
6669 |
06 Apr 22 |
nicklas |
133 |
touch "done/${MERGE_NAME}.readlength.done" |
6669 |
06 Apr 22 |
nicklas |
134 |
fi |
6669 |
06 Apr 22 |
nicklas |
135 |
|
6669 |
06 Apr 22 |
nicklas |
## Move results to project-archive |
6669 |
06 Apr 22 |
nicklas |
137 |
rg_progress ${PROGRESS} "Archiving FASTQ: ${MERGE_NAME} (${CURRENT_LINE} of ${NUM_LINES})" |
6669 |
06 Apr 22 |
nicklas |
138 |
mkdir -p ${FASTQ_FOLDER} |
6669 |
06 Apr 22 |
nicklas |
139 |
rm -rf ${FASTQ_FOLDER}/* |
6669 |
06 Apr 22 |
nicklas |
140 |
cat fastq.aligned/${MERGE_NAME}_fragmentsize.txt >> ${WD}/fragments.out |
6669 |
06 Apr 22 |
nicklas |
141 |
cp fastq.aligned/${MERGE_NAME}_fragmentsize.txt ${FASTQ_FOLDER}/${BASE_FASTQ_NAME}_fragmentsize.txt |
6669 |
06 Apr 22 |
nicklas |
142 |
cat fastq.trim.2/${MERGE_NAME}_readlength.txt >> ${WD}/readlength.out |
6669 |
06 Apr 22 |
nicklas |
143 |
cp fastq.trim.2/${MERGE_NAME}_readlength.txt ${FASTQ_FOLDER}/${BASE_FASTQ_NAME}_readlength.txt |
6669 |
06 Apr 22 |
nicklas |
144 |
pigz -5 -p ${NumThreads} -c fastq.trim.2/${MERGE_NAME}_R1.fastq > ${FASTQ_FOLDER}/${BASE_FASTQ_NAME}_R1.fastq.gz |
6669 |
06 Apr 22 |
nicklas |
145 |
pigz -5 -p ${NumThreads} -c fastq.trim.2/${MERGE_NAME}_R2.fastq > ${FASTQ_FOLDER}/${BASE_FASTQ_NAME}_R2.fastq.gz |
6669 |
06 Apr 22 |
nicklas |
146 |
ls -1 ${FASTQ_FOLDER}/*.fastq.gz >> ${WD}/files.out |
6669 |
06 Apr 22 |
nicklas |
147 |
|
6669 |
06 Apr 22 |
nicklas |
## Cleanup files that are no longer needed |
6669 |
06 Apr 22 |
nicklas |
149 |
rm -f fastq.merged/* |
6669 |
06 Apr 22 |
nicklas |
150 |
rm -f fastq.aligned/* |
6669 |
06 Apr 22 |
nicklas |
151 |
rm -f fastq.trim.1/* |
6669 |
06 Apr 22 |
nicklas |
152 |
rm -f fastq.trim.2/* |
6669 |
06 Apr 22 |
nicklas |
153 |
|
6669 |
06 Apr 22 |
nicklas |
154 |
touch "done/${MERGE_NAME}.done" # A flag file to prevent the demux from being repeated when debugging |
6669 |
06 Apr 22 |
nicklas |
155 |
fi |
6649 |
21 Mar 22 |
nicklas |
156 |
done < ${WD}/merge_info.txt |
6649 |
21 Mar 22 |
nicklas |
157 |
|
6649 |
21 Mar 22 |
nicklas |
158 |
rg_progress 99 "Analysis completed, cleaning up..." |