6649 |
21 Mar 22 |
nicklas |
# $Id $ |
6649 |
21 Mar 22 |
nicklas |
2 |
# |
6649 |
21 Mar 22 |
nicklas |
# Useful utility functions for demuxing. |
6649 |
21 Mar 22 |
nicklas |
4 |
# |
6649 |
21 Mar 22 |
nicklas |
# source ./demux-utils.sh |
6649 |
21 Mar 22 |
nicklas |
6 |
|
6649 |
21 Mar 22 |
nicklas |
# Find the path to a run-archive folder. The result is returned to stdout. |
6649 |
21 Mar 22 |
nicklas |
# If the specified folder can't be found the script is exited with exit code 1. |
6649 |
21 Mar 22 |
nicklas |
# Parameters: |
6649 |
21 Mar 22 |
nicklas |
# $1: The name of folder to look for |
6649 |
21 Mar 22 |
nicklas |
# $2: List of root folders to look in (multiple folders should be separated by space) |
6649 |
21 Mar 22 |
nicklas |
12 |
find_run_archive() |
6649 |
21 Mar 22 |
nicklas |
13 |
{ |
6649 |
21 Mar 22 |
nicklas |
14 |
local DATA_FILES_FOLDER=$1 |
6649 |
21 Mar 22 |
nicklas |
15 |
local ALL_RUN_ARCHIVES=$2 |
6649 |
21 Mar 22 |
nicklas |
16 |
local RUN_ARCHIVE=`find ${ALL_RUN_ARCHIVES} -maxdepth 2 -name ${DATA_FILES_FOLDER} -type d -print -quit 2> /dev/null || true` |
6649 |
21 Mar 22 |
nicklas |
17 |
if [ -z "$RUN_ARCHIVE" ]; then |
6649 |
21 Mar 22 |
nicklas |
18 |
echo "Can't find data folder ${DATA_FILES_FOLDER} in [${ALL_RUN_ARCHIVES}]" 1>&2 |
6649 |
21 Mar 22 |
nicklas |
19 |
exit 1 |
6649 |
21 Mar 22 |
nicklas |
20 |
fi |
6649 |
21 Mar 22 |
nicklas |
21 |
echo "${RUN_ARCHIVE}" |
6649 |
21 Mar 22 |
nicklas |
22 |
} |
6649 |
21 Mar 22 |
nicklas |
23 |
|
6649 |
21 Mar 22 |
nicklas |
# Extract barcodes from Illumina runfolder. |
6649 |
21 Mar 22 |
nicklas |
# Parameters: |
6649 |
21 Mar 22 |
nicklas |
# $1: Path to Illumina folder with sequencing data |
6649 |
21 Mar 22 |
nicklas |
# $2: Barcode of the flowcell |
6649 |
21 Mar 22 |
nicklas |
# $3: The lane number to extract barcodes from |
6649 |
21 Mar 22 |
nicklas |
# $4: Read string template |
6649 |
21 Mar 22 |
nicklas |
# $5: Other options to the ExtractIlluminaBarcodes command |
6649 |
21 Mar 22 |
nicklas |
31 |
extract_illumina_barcodes() |
6649 |
21 Mar 22 |
nicklas |
32 |
{ |
6649 |
21 Mar 22 |
nicklas |
33 |
local RUN_ARCHIVE=$1 |
6649 |
21 Mar 22 |
nicklas |
34 |
local FLOWCELL_BARCODE=$2 |
6649 |
21 Mar 22 |
nicklas |
35 |
local LANE=$3 |
6649 |
21 Mar 22 |
nicklas |
36 |
local READSTRING=$4 |
6649 |
21 Mar 22 |
nicklas |
37 |
local MORE_OPTIONS=$5 |
6669 |
06 Apr 22 |
nicklas |
38 |
local PREFIX=${FLOWCELL_BARCODE}.${LANE} |
6649 |
21 Mar 22 |
nicklas |
39 |
|
6649 |
21 Mar 22 |
nicklas |
40 |
${WD}/stdwrap.sh ${JAVA} \ |
6655 |
24 Mar 22 |
nicklas |
41 |
-Dpicard.useLegacyParser=false ${JavaOptions} \ |
6649 |
21 Mar 22 |
nicklas |
42 |
-jar ${PICARD} ExtractIlluminaBarcodes \ |
6655 |
24 Mar 22 |
nicklas |
43 |
-BASECALLS_DIR ${RUN_ARCHIVE}/Data/Intensities/BaseCalls \ |
6649 |
21 Mar 22 |
nicklas |
44 |
-BARCODE_FILE ${WD}/${PREFIX}_barcodes.csv \ |
6649 |
21 Mar 22 |
nicklas |
45 |
-LANE ${LANE} \ |
6649 |
21 Mar 22 |
nicklas |
46 |
-READ_STRUCTURE ${READSTRING} \ |
6649 |
21 Mar 22 |
nicklas |
47 |
-OUTPUT_DIR ${PREFIX} \ |
6649 |
21 Mar 22 |
nicklas |
48 |
-METRICS_FILE ${PREFIX}_metrics.csv \ |
6649 |
21 Mar 22 |
nicklas |
49 |
-NUM_PROCESSORS ${NumThreads} \ |
6649 |
21 Mar 22 |
nicklas |
50 |
-TMP_DIR ${TMPDIR} \ |
6655 |
24 Mar 22 |
nicklas |
51 |
${ExtractBarcodesOptions} ${MORE_OPTIONS} \ |
6649 |
21 Mar 22 |
nicklas |
52 |
>> ${PREFIX}.out |
6649 |
21 Mar 22 |
nicklas |
53 |
} |
6649 |
21 Mar 22 |
nicklas |
54 |
|
6649 |
21 Mar 22 |
nicklas |
# Extract base calls to FASTQ files. |
6649 |
21 Mar 22 |
nicklas |
# Parameters: |
6649 |
21 Mar 22 |
nicklas |
# $1: Path to Illumina folder with sequencing data |
6649 |
21 Mar 22 |
nicklas |
# $2: Barcode of the flowcell |
6649 |
21 Mar 22 |
nicklas |
# $3: The lane number to extract barcodes from |
6649 |
21 Mar 22 |
nicklas |
# $4: Read string template |
6649 |
21 Mar 22 |
nicklas |
# $5: Other options to the IlluminaBasecallsToFastq command |
6649 |
21 Mar 22 |
nicklas |
62 |
basecalls_to_fastq() |
6649 |
21 Mar 22 |
nicklas |
63 |
{ |
6649 |
21 Mar 22 |
nicklas |
64 |
local RUN_ARCHIVE=$1 |
6649 |
21 Mar 22 |
nicklas |
65 |
local FLOWCELL_BARCODE=$2 |
6649 |
21 Mar 22 |
nicklas |
66 |
local LANE=$3 |
6649 |
21 Mar 22 |
nicklas |
67 |
local READSTRING=$4 |
6649 |
21 Mar 22 |
nicklas |
68 |
local MORE_OPTIONS=$5 |
6669 |
06 Apr 22 |
nicklas |
69 |
local PREFIX=${FLOWCELL_BARCODE}.${LANE} |
6649 |
21 Mar 22 |
nicklas |
70 |
|
6649 |
21 Mar 22 |
nicklas |
71 |
${WD}/stdwrap.sh ${JAVA} \ |
6655 |
24 Mar 22 |
nicklas |
72 |
-Dpicard.useLegacyParser=false ${JavaOptions} \ |
6649 |
21 Mar 22 |
nicklas |
73 |
-jar ${PICARD} IlluminaBasecallsToFastq \ |
6649 |
21 Mar 22 |
nicklas |
74 |
-BASECALLS_DIR ${RUN_ARCHIVE}/Data/Intensities/BaseCalls \ |
6649 |
21 Mar 22 |
nicklas |
75 |
-BARCODES_DIR ${PREFIX} \ |
6649 |
21 Mar 22 |
nicklas |
76 |
-MULTIPLEX_PARAMS ${WD}/${PREFIX}_multiplex.csv \ |
6649 |
21 Mar 22 |
nicklas |
77 |
-LANE ${LANE} \ |
6649 |
21 Mar 22 |
nicklas |
78 |
-FLOWCELL_BARCODE ${FLOWCELL_BARCODE} \ |
6649 |
21 Mar 22 |
nicklas |
79 |
-READ_STRUCTURE ${READSTRING} \ |
6649 |
21 Mar 22 |
nicklas |
80 |
-NUM_PROCESSORS ${NumThreads} \ |
6649 |
21 Mar 22 |
nicklas |
81 |
-TMP_DIR ${TMPDIR} \ |
6655 |
24 Mar 22 |
nicklas |
82 |
${BasecallsToFastqOptions} ${MORE_OPTIONS} \ |
6649 |
21 Mar 22 |
nicklas |
83 |
>> ${PREFIX}.out |
6649 |
21 Mar 22 |
nicklas |
84 |
} |
6649 |
21 Mar 22 |
nicklas |
85 |
|
6649 |
21 Mar 22 |
nicklas |
# Merge FASTQ files to a pair for R1 and R2. |
6649 |
21 Mar 22 |
nicklas |
# Parameters: |
6649 |
21 Mar 22 |
nicklas |
# $1: Source file name pattern |
6649 |
21 Mar 22 |
nicklas |
# $2: Destination base file name. |
6649 |
21 Mar 22 |
nicklas |
90 |
merge_fastq_files() |
6649 |
21 Mar 22 |
nicklas |
91 |
{ |
6649 |
21 Mar 22 |
nicklas |
92 |
local BASENAME_SRC=$1 |
6649 |
21 Mar 22 |
nicklas |
93 |
local BASENAME_DEST=$2 |
6649 |
21 Mar 22 |
nicklas |
94 |
|
6649 |
21 Mar 22 |
nicklas |
95 |
cat ${BASENAME_SRC}_*.1.fastq.gz > ${BASENAME_DEST}_R1.fastq.gz |
6649 |
21 Mar 22 |
nicklas |
96 |
cat ${BASENAME_SRC}_*.2.fastq.gz > ${BASENAME_DEST}_R2.fastq.gz |
6649 |
21 Mar 22 |
nicklas |
97 |
rm -f ${BASENAME_SRC}* |
6649 |
21 Mar 22 |
nicklas |
98 |
} |
6649 |
21 Mar 22 |
nicklas |
99 |
|
6649 |
21 Mar 22 |
nicklas |
# Align FASTQ files with BowTie |
6649 |
21 Mar 22 |
nicklas |
# Parameters: |
6649 |
21 Mar 22 |
nicklas |
# $1: Base file name of R1 and R2 FASTQ files |
6649 |
21 Mar 22 |
nicklas |
# $2: Base file name for output files |
6649 |
21 Mar 22 |
nicklas |
104 |
bowtie_align() |
6649 |
21 Mar 22 |
nicklas |
105 |
{ |
6649 |
21 Mar 22 |
nicklas |
106 |
local BASENAME_SRC=$1 |
6649 |
21 Mar 22 |
nicklas |
107 |
local BASENAME_DEST=$2 |
6649 |
21 Mar 22 |
nicklas |
108 |
|
6649 |
21 Mar 22 |
nicklas |
109 |
${WD}/stdwrap.sh ${BOWTIE} -p ${NumThreads} \ |
6649 |
21 Mar 22 |
nicklas |
110 |
${BowTieOptions} \ |
6649 |
21 Mar 22 |
nicklas |
111 |
--un-conc ${BASENAME_DEST}_R%.fastq \ |
6649 |
21 Mar 22 |
nicklas |
112 |
-x ${Gidx} \ |
6649 |
21 Mar 22 |
nicklas |
113 |
-1 ${BASENAME_SRC}_R1.fastq.gz \ |
6649 |
21 Mar 22 |
nicklas |
114 |
-2 ${BASENAME_SRC}_R2.fastq.gz \ |
6649 |
21 Mar 22 |
nicklas |
115 |
-S ${BASENAME_DEST}.sam \ |
6649 |
21 Mar 22 |
nicklas |
116 |
> ${BASENAME_DEST}.sam.out |
6649 |
21 Mar 22 |
nicklas |
117 |
${WD}/singlecolumnaverager.awk < ${BASENAME_DEST}.sam > ${BASENAME_DEST}_fragmentsize.txt |
6649 |
21 Mar 22 |
nicklas |
118 |
} |
6649 |
21 Mar 22 |
nicklas |
119 |
|
6649 |
21 Mar 22 |
nicklas |
# Run Trimmomatic. |
6649 |
21 Mar 22 |
nicklas |
# Parameters: |
6649 |
21 Mar 22 |
nicklas |
# $1: Base file name for soure files |
6649 |
21 Mar 22 |
nicklas |
# $2: Suffix for source files |
6649 |
21 Mar 22 |
nicklas |
# $3: Base file name for destination files |
6649 |
21 Mar 22 |
nicklas |
# $4: Suffix for destination files |
6649 |
21 Mar 22 |
nicklas |
# $5: Other options for Trimmomatic |
6649 |
21 Mar 22 |
nicklas |
127 |
trimmomatic() |
6649 |
21 Mar 22 |
nicklas |
128 |
{ |
6649 |
21 Mar 22 |
nicklas |
129 |
local BASENAME_SRC=$1 |
6649 |
21 Mar 22 |
nicklas |
130 |
local SUFFIX_SRC=$2 |
6649 |
21 Mar 22 |
nicklas |
131 |
local BASENAME_DEST=$3 |
6649 |
21 Mar 22 |
nicklas |
132 |
local SUFFIX_DEST=$4 |
6649 |
21 Mar 22 |
nicklas |
133 |
local OPTIONS=$5 |
6649 |
21 Mar 22 |
nicklas |
134 |
|
6665 |
05 Apr 22 |
nicklas |
135 |
$WD/stdwrap.sh ${JAVA} ${JavaOptions} \ |
6665 |
05 Apr 22 |
nicklas |
136 |
-jar ${TRIMMOMATIC} PE \ |
6649 |
21 Mar 22 |
nicklas |
137 |
-threads ${NumThreads} \ |
6649 |
21 Mar 22 |
nicklas |
138 |
${BASENAME_SRC}_R1.${SUFFIX_SRC} \ |
6649 |
21 Mar 22 |
nicklas |
139 |
${BASENAME_SRC}_R2.${SUFFIX_SRC} \ |
6649 |
21 Mar 22 |
nicklas |
140 |
${BASENAME_DEST}_R1.${SUFFIX_DEST} \ |
6649 |
21 Mar 22 |
nicklas |
141 |
${BASENAME_DEST}_R1_un.${SUFFIX_DEST} \ |
6649 |
21 Mar 22 |
nicklas |
142 |
${BASENAME_DEST}_R2.${SUFFIX_DEST} \ |
6649 |
21 Mar 22 |
nicklas |
143 |
${BASENAME_DEST}_R2_un.${SUFFIX_DEST} \ |
6649 |
21 Mar 22 |
nicklas |
144 |
${OPTIONS} \ |
6649 |
21 Mar 22 |
nicklas |
145 |
>> ${WD}/trimmomatic.out |
6649 |
21 Mar 22 |
nicklas |
146 |
} |
6649 |
21 Mar 22 |
nicklas |
147 |
|
6658 |
31 Mar 22 |
nicklas |
# Copy a FASTQ file from the ImportArchive to given destination |
6658 |
31 Mar 22 |
nicklas |
# If the FASTQ file can't be found in the ImportArchive we also |
6658 |
31 Mar 22 |
nicklas |
# check the ImportGateway. If it is found there it is copied to |
6658 |
31 Mar 22 |
nicklas |
# the ImportArchive (and optionally deleted from the gateway) |
6658 |
31 Mar 22 |
nicklas |
# If the FASTQ can't be found an error is generated |
6658 |
31 Mar 22 |
nicklas |
153 |
copy_from_import_archive() |
6658 |
31 Mar 22 |
nicklas |
154 |
{ |
6658 |
31 Mar 22 |
nicklas |
155 |
local SRC=$1 |
6658 |
31 Mar 22 |
nicklas |
156 |
local DEST=$2 |
6658 |
31 Mar 22 |
nicklas |
157 |
|
6658 |
31 Mar 22 |
nicklas |
158 |
if [ ! -f "${ImportArchive}/${SRC}" ]; then |
6658 |
31 Mar 22 |
nicklas |
159 |
if [ ! -f "${ImportGateway}/${SRC}" ]; then |
6658 |
31 Mar 22 |
nicklas |
160 |
echo "Can't find FASTQ file '${SRC}' in '${ImportGateway}' or '${ImportArchive}'" 1>&2 |
6658 |
31 Mar 22 |
nicklas |
161 |
exit 1 |
6658 |
31 Mar 22 |
nicklas |
162 |
fi |
6658 |
31 Mar 22 |
nicklas |
# Copy from gatway to archive |
6658 |
31 Mar 22 |
nicklas |
164 |
mkdir -p "${ImportArchive}" |
6955 |
12 Dec 22 |
nicklas |
# Temporary change umask to read-only for the current user/group |
6955 |
12 Dec 22 |
nicklas |
166 |
(umask u=r,g=r,o= && cp "${ImportGateway}/${SRC}" "${ImportArchive}/${SRC}") |
6658 |
31 Mar 22 |
nicklas |
167 |
if [ ! "${KeepFASTQOnGateway}" ]; then |
6658 |
31 Mar 22 |
nicklas |
168 |
rm -f "${ImportGateway}/${SRC}" |
6658 |
31 Mar 22 |
nicklas |
169 |
fi |
6658 |
31 Mar 22 |
nicklas |
170 |
fi |
6658 |
31 Mar 22 |
nicklas |
171 |
if [ "${MaxFASTQLines}" ]; then |
6658 |
31 Mar 22 |
nicklas |
# Debug mode to only copy part of the FASTQ file |
6658 |
31 Mar 22 |
nicklas |
173 |
zcat "${ImportArchive}/${SRC}" | awk "(NR<=${MaxFASTQLines})" | gzip -1 -c > "${DEST}" |
6658 |
31 Mar 22 |
nicklas |
174 |
else |
6658 |
31 Mar 22 |
nicklas |
175 |
cp "${ImportArchive}/${SRC}" "${DEST}" |
6658 |
31 Mar 22 |
nicklas |
176 |
fi |
6658 |
31 Mar 22 |
nicklas |
177 |
} |