6662 |
01 Apr 22 |
nicklas |
#!/bin/bash |
6662 |
01 Apr 22 |
nicklas |
2 |
## |
6662 |
01 Apr 22 |
nicklas |
## Pipeline script for checking status of a HiSeq sequencing run. |
6662 |
01 Apr 22 |
nicklas |
4 |
## |
6662 |
01 Apr 22 |
nicklas |
## Environment variables that should be defined before calling this script |
6662 |
01 Apr 22 |
nicklas |
## -AllRunArchives: White-space separated list of locations to search for sequencing data |
6662 |
01 Apr 22 |
nicklas |
## -BARCODE: Barcode of the flow cell that we want to find information for |
6662 |
01 Apr 22 |
nicklas |
8 |
## |
6662 |
01 Apr 22 |
nicklas |
## The output is a number of key-value pairs. All values may not be present. |
6662 |
01 Apr 22 |
nicklas |
## -RunArchive: The path to the data folder for the flow cell |
6662 |
01 Apr 22 |
nicklas |
## -Config: Date and time the 'Config' folder was last modified |
6662 |
01 Apr 22 |
nicklas |
## -RunParameters: Date and time the 'runParameters.xml' file was last modified |
6662 |
01 Apr 22 |
nicklas |
## -BclCount: Number of files ending with '.bcl' |
6662 |
01 Apr 22 |
nicklas |
## -LaneCount: Value from <FlowcellLayout LaneCount> tag in RunInfo.xml |
6662 |
01 Apr 22 |
nicklas |
## -SurfaceCount: Value from <FlowcellLayout SurfaceCount> tag in RunInfo.xml |
6662 |
01 Apr 22 |
nicklas |
## -SwathCount: Value from <FlowcellLayout SwathCount> tag in RunInfo.xml |
6662 |
01 Apr 22 |
nicklas |
## -TileCount: Value from <FlowcellLayout TileCount> tag in RunInfo.xml |
6662 |
01 Apr 22 |
nicklas |
## -Read1: Value from <Read1> tag in runParameters.xml |
6662 |
01 Apr 22 |
nicklas |
## -Read2: Value from <Read2> tag in runParameters.xml |
6662 |
01 Apr 22 |
nicklas |
## -IndexRead1: Value from <IndexRead1> tag in runParameters.xml |
6662 |
01 Apr 22 |
nicklas |
## -IndexRead2: Value from <IndexRead2> tag in runParameters.xml |
6662 |
01 Apr 22 |
nicklas |
## -RTAComplete: Date and time the 'RTAComplete.txt' was last modified |
6662 |
01 Apr 22 |
nicklas |
## -HiSeqSerial: Value from <ScannerID> tag in runParameters.xml |
6662 |
01 Apr 22 |
nicklas |
24 |
|
6662 |
01 Apr 22 |
nicklas |
25 |
|
6662 |
01 Apr 22 |
nicklas |
# Format string for file dates/times |
6662 |
01 Apr 22 |
nicklas |
27 |
DATE_FORMAT="%Y%m%d %H%M%S" |
6662 |
01 Apr 22 |
nicklas |
28 |
|
6662 |
01 Apr 22 |
nicklas |
# Try to find a folder inside run-archive that has the barcode in the name |
6662 |
01 Apr 22 |
nicklas |
# The folder may not yet exist so a missing folder is not an error |
6663 |
01 Apr 22 |
nicklas |
31 |
DATA_FOLDER=`find ${AllRunArchives} -maxdepth 2 -iname "*${BARCODE}*" -type d -print 2> /dev/null || true` |
6662 |
01 Apr 22 |
nicklas |
32 |
|
6662 |
01 Apr 22 |
nicklas |
# Fail if more than one folder is found |
6662 |
01 Apr 22 |
nicklas |
34 |
readarray -t lines <<< "${DATA_FOLDER}" |
6662 |
01 Apr 22 |
nicklas |
35 |
if [ ! ${#lines[@]} -eq 1 ]; then |
6662 |
01 Apr 22 |
nicklas |
36 |
echo "Found ${#lines[@]} data folders for flow cell ${BARCODE}" 1>&2 |
6662 |
01 Apr 22 |
nicklas |
37 |
echo ${DATA_FOLDER} 1>&2 |
6662 |
01 Apr 22 |
nicklas |
38 |
exit 1 |
6662 |
01 Apr 22 |
nicklas |
39 |
fi |
6662 |
01 Apr 22 |
nicklas |
40 |
|
6662 |
01 Apr 22 |
nicklas |
41 |
echo RunArchive: ${DATA_FOLDER} |
6662 |
01 Apr 22 |
nicklas |
# Config folder is created immediately when starting the HiSeq |
6662 |
01 Apr 22 |
nicklas |
# We use the date of this folder to set the start date of the job |
6662 |
01 Apr 22 |
nicklas |
44 |
if [ -d "${DATA_FOLDER}/Config" ]; then |
6662 |
01 Apr 22 |
nicklas |
45 |
echo "Config: `date +"${DATE_FORMAT}" -r "${DATA_FOLDER}/Config"`" |
6662 |
01 Apr 22 |
nicklas |
46 |
fi |
6662 |
01 Apr 22 |
nicklas |
47 |
|
6662 |
01 Apr 22 |
nicklas |
# runParameters.xml is created after clustering |
6662 |
01 Apr 22 |
nicklas |
# We extract information about number of reads and lanes |
6662 |
01 Apr 22 |
nicklas |
# and compare that to the number of *.bgzf.bci files we can find |
6662 |
01 Apr 22 |
nicklas |
# This gives an estimate of the current sequencing cycle and we can |
6662 |
01 Apr 22 |
nicklas |
# use this for progress reporting |
6662 |
01 Apr 22 |
nicklas |
53 |
RUN_PARAMETERS=${DATA_FOLDER}/runParameters.xml |
6662 |
01 Apr 22 |
nicklas |
54 |
if [ -f "${RUN_PARAMETERS}" ]; then |
6662 |
01 Apr 22 |
nicklas |
55 |
echo "RunParameters: `date +"${DATE_FORMAT}" -r "${RUN_PARAMETERS}"`" |
6662 |
01 Apr 22 |
nicklas |
56 |
echo "Read1: `grep '<Read1>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
6662 |
01 Apr 22 |
nicklas |
57 |
echo "Read2: `grep '<Read2>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
6662 |
01 Apr 22 |
nicklas |
58 |
echo "IndexRead1: `grep '<IndexRead1>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
6662 |
01 Apr 22 |
nicklas |
59 |
echo "IndexRead2: `grep '<IndexRead2>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
6662 |
01 Apr 22 |
nicklas |
60 |
echo "HiSeqSerial: `grep '<ScannerID>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
6662 |
01 Apr 22 |
nicklas |
61 |
fi |
6662 |
01 Apr 22 |
nicklas |
62 |
|
6662 |
01 Apr 22 |
nicklas |
# Count number of BCL files which gives us information about |
6662 |
01 Apr 22 |
nicklas |
# the progress of the sequencing |
6662 |
01 Apr 22 |
nicklas |
65 |
BCL_FOLDER=${DATA_FOLDER}/Data/Intensities/BaseCalls |
6662 |
01 Apr 22 |
nicklas |
66 |
if [ -d "${BCL_FOLDER}" ]; then |
6662 |
01 Apr 22 |
nicklas |
67 |
echo "BclCount: `find "${BCL_FOLDER}" -type f -name *.bcl | wc -l`" |
6662 |
01 Apr 22 |
nicklas |
68 |
fi |
6662 |
01 Apr 22 |
nicklas |
69 |
|
6662 |
01 Apr 22 |
nicklas |
# RunInfo.xml contains information about the layout of the flowcell |
6662 |
01 Apr 22 |
nicklas |
# which we need to be able to compare the number of BCL files |
6662 |
01 Apr 22 |
nicklas |
72 |
RUN_INFO=${DATA_FOLDER}/RunInfo.xml |
6662 |
01 Apr 22 |
nicklas |
73 |
if [ -f "${RUN_PARAMETERS}" ]; then |
6662 |
01 Apr 22 |
nicklas |
74 |
echo "LaneCount: `grep -o 'LaneCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`" |
6662 |
01 Apr 22 |
nicklas |
75 |
echo "SurfaceCount: `grep -o 'SurfaceCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`" |
6662 |
01 Apr 22 |
nicklas |
76 |
echo "SwathCount: `grep -o 'SwathCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`" |
6662 |
01 Apr 22 |
nicklas |
77 |
echo "TileCount: `grep -o 'TileCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`" |
6662 |
01 Apr 22 |
nicklas |
78 |
fi |
6662 |
01 Apr 22 |
nicklas |
79 |
|
6662 |
01 Apr 22 |
nicklas |
80 |
|
6662 |
01 Apr 22 |
nicklas |
81 |
|
6662 |
01 Apr 22 |
nicklas |
# RTAComplete.txt is created when everything is complete |
6662 |
01 Apr 22 |
nicklas |
# This becomes the end date of the job and should trigger |
6662 |
01 Apr 22 |
nicklas |
# Reggie to start file checks and secondary analysis |
6662 |
01 Apr 22 |
nicklas |
85 |
if [ -f "${DATA_FOLDER}/RTAComplete.txt" ]; then |
6662 |
01 Apr 22 |
nicklas |
86 |
echo "RTAComplete: `date +"${DATE_FORMAT}" -r "${DATA_FOLDER}/RTAComplete.txt"`" |
6662 |
01 Apr 22 |
nicklas |
87 |
fi |