5856 |
09 Mar 20 |
nicklas |
#!/bin/sh |
5856 |
09 Mar 20 |
nicklas |
# $Id $ |
5856 |
09 Mar 20 |
nicklas |
3 |
|
5856 |
09 Mar 20 |
nicklas |
# Nicklas Nordborg, 2020 |
5856 |
09 Mar 20 |
nicklas |
5 |
# |
5856 |
09 Mar 20 |
nicklas |
# Finds information about a sequencing run given the barcode of a flow cell |
5856 |
09 Mar 20 |
nicklas |
# It is expected that the sequencing is done with a NovaSeq sequencer |
5856 |
09 Mar 20 |
nicklas |
8 |
# |
5856 |
09 Mar 20 |
nicklas |
# run ./novaseq_status.sh <barcode> <run-archive-root-1> [<run-archive-root-2> ...] |
5856 |
09 Mar 20 |
nicklas |
10 |
# |
5856 |
09 Mar 20 |
nicklas |
# The output is a number of key-value pairs. All values may not be present. |
5856 |
09 Mar 20 |
nicklas |
12 |
# |
5856 |
09 Mar 20 |
nicklas |
# RunArchive: The path to the data folder for the flow cell |
5856 |
09 Mar 20 |
nicklas |
# Config: Date and time the 'Config' folder was last modified |
5856 |
09 Mar 20 |
nicklas |
# RunParameters: Date and time the 'RunParameters.xml' file was last modified |
5856 |
09 Mar 20 |
nicklas |
# Read1: Value from <Read1NumberOfCycles> tag in RunParameters.xml |
5856 |
09 Mar 20 |
nicklas |
# Read2: Value from <Read2NumberOfCycles> tag in RunParameters.xml |
5856 |
09 Mar 20 |
nicklas |
# Index1Read: Value from <IndexRead1NumberOfCycles> tag in RunParameters.xml |
5856 |
09 Mar 20 |
nicklas |
# Index2Read: Value from <IndexRead2NumberOfCycles> tag in RunParameters.xml |
5856 |
09 Mar 20 |
nicklas |
# NovaSeqSerial: Value from <InstrumentName> tag in RunParameters.xml |
5856 |
09 Mar 20 |
nicklas |
# CbclCount: Number of files ending with '.cbcl' |
5856 |
09 Mar 20 |
nicklas |
# LaneCount: Value from the LaneCount attribute in FlowcellLayout tag in RunInfo.xml |
5856 |
09 Mar 20 |
nicklas |
# SurfaceCount: Value from the SurfaceCount attribute in FlowcellLayout tag in RunInfo.xml |
5856 |
09 Mar 20 |
nicklas |
# SwathCount: Value from the SwathCount attribute in FlowcellLayout tag in RunInfo.xml |
5856 |
09 Mar 20 |
nicklas |
# TileCount: Value from the TileCount attribute in FlowcellLayout tag in RunInfo.xml |
5856 |
09 Mar 20 |
nicklas |
# RTAComplete: Date and time the 'RTAComplete.txt' was last modified |
5856 |
09 Mar 20 |
nicklas |
27 |
|
5856 |
09 Mar 20 |
nicklas |
28 |
BARCODE=$1 |
5856 |
09 Mar 20 |
nicklas |
29 |
shift |
5856 |
09 Mar 20 |
nicklas |
30 |
RUN_ARCHIVE=$@ |
5856 |
09 Mar 20 |
nicklas |
31 |
|
5856 |
09 Mar 20 |
nicklas |
# Format string for file dates/times |
5856 |
09 Mar 20 |
nicklas |
33 |
DATE_FORMAT="%Y%m%d %H%M%S" |
5856 |
09 Mar 20 |
nicklas |
34 |
|
5856 |
09 Mar 20 |
nicklas |
# Try to find a folder inside run-archive that has the barcode in the name |
5856 |
09 Mar 20 |
nicklas |
# The folder may not yet exist so a missing folder is not an error |
5856 |
09 Mar 20 |
nicklas |
37 |
DATA_FOLDER=`find ${RUN_ARCHIVE} -maxdepth 2 -iname "*${BARCODE}*" -type d -print 2> /dev/null || true`; |
5856 |
09 Mar 20 |
nicklas |
38 |
|
5856 |
09 Mar 20 |
nicklas |
# Fail if more than one folder is found |
5856 |
09 Mar 20 |
nicklas |
40 |
readarray -t lines <<< "${DATA_FOLDER}" |
5856 |
09 Mar 20 |
nicklas |
41 |
if [ ! ${#lines[@]} -eq 1 ]; then |
5856 |
09 Mar 20 |
nicklas |
42 |
echo "Found ${#lines[@]} data folders for flow cell ${BARCODE}" 1>&2 |
5856 |
09 Mar 20 |
nicklas |
43 |
echo ${DATA_FOLDER} 1>&2 |
5856 |
09 Mar 20 |
nicklas |
44 |
exit 1 |
5856 |
09 Mar 20 |
nicklas |
45 |
fi |
5856 |
09 Mar 20 |
nicklas |
46 |
|
5856 |
09 Mar 20 |
nicklas |
47 |
echo RunArchive: ${DATA_FOLDER} |
5856 |
09 Mar 20 |
nicklas |
# Config folder is created immediately when starting the NovaSeq |
5856 |
09 Mar 20 |
nicklas |
# We use the date of this folder to set the start date of the job |
5856 |
09 Mar 20 |
nicklas |
50 |
if [ -d "${DATA_FOLDER}/Config" ]; then |
5856 |
09 Mar 20 |
nicklas |
51 |
echo "Config: `date +"${DATE_FORMAT}" -r "${DATA_FOLDER}/Config"`" |
5856 |
09 Mar 20 |
nicklas |
52 |
fi |
5856 |
09 Mar 20 |
nicklas |
53 |
|
5856 |
09 Mar 20 |
nicklas |
# RunParameters.xml is created after clustering |
5856 |
09 Mar 20 |
nicklas |
# We extract information about number of reads and lanes |
5856 |
09 Mar 20 |
nicklas |
# and compare that to the number of *.cbcl files we can find |
5856 |
09 Mar 20 |
nicklas |
# This gives an estimate of the current sequencing cycle and we can |
5856 |
09 Mar 20 |
nicklas |
# use this for progress reporting |
5856 |
09 Mar 20 |
nicklas |
59 |
RUN_PARAMETERS=${DATA_FOLDER}/RunParameters.xml |
5856 |
09 Mar 20 |
nicklas |
60 |
if [ -f "${RUN_PARAMETERS}" ]; then |
5856 |
09 Mar 20 |
nicklas |
61 |
echo "RunParameters: `date +"${DATE_FORMAT}" -r "${RUN_PARAMETERS}"`" |
5856 |
09 Mar 20 |
nicklas |
62 |
echo "Read1: `grep '<Read1NumberOfCycles>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
5856 |
09 Mar 20 |
nicklas |
63 |
echo "Read2: `grep '<Read2NumberOfCycles>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
5856 |
09 Mar 20 |
nicklas |
64 |
echo "Index1Read: `grep '<IndexRead1NumberOfCycles>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
5856 |
09 Mar 20 |
nicklas |
65 |
echo "Index2Read: `grep '<IndexRead2NumberOfCycles>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
5856 |
09 Mar 20 |
nicklas |
66 |
echo "NovaSeqSerial: `grep '<InstrumentName>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
5934 |
08 May 20 |
nicklas |
67 |
echo "FlowCellMode: `grep '<FlowCellMode>' "${RUN_PARAMETERS}" | cut -d '>' -f 2 | cut -d '<' -f 1`" |
5856 |
09 Mar 20 |
nicklas |
68 |
fi |
5856 |
09 Mar 20 |
nicklas |
69 |
|
5856 |
09 Mar 20 |
nicklas |
# Count number of BCL files which gives us information about |
5856 |
09 Mar 20 |
nicklas |
# the progress of the sequencing |
5856 |
09 Mar 20 |
nicklas |
72 |
CBCL_FOLDER=${DATA_FOLDER}/Data/Intensities/BaseCalls |
5856 |
09 Mar 20 |
nicklas |
73 |
if [ -d "${CBCL_FOLDER}" ]; then |
5856 |
09 Mar 20 |
nicklas |
74 |
echo "CbclCount: `find "${CBCL_FOLDER}" -type f -name *.cbcl | wc -l`" |
5856 |
09 Mar 20 |
nicklas |
75 |
fi |
5856 |
09 Mar 20 |
nicklas |
76 |
|
5856 |
09 Mar 20 |
nicklas |
77 |
|
5856 |
09 Mar 20 |
nicklas |
# RunInfo.xml contains information about the layout of the flowcell |
5856 |
09 Mar 20 |
nicklas |
# which we need to be able to compare the number of *.cbcl files |
5856 |
09 Mar 20 |
nicklas |
80 |
RUN_INFO=${DATA_FOLDER}/RunInfo.xml |
5856 |
09 Mar 20 |
nicklas |
81 |
if [ -f "${RUN_INFO}" ]; then |
5856 |
09 Mar 20 |
nicklas |
82 |
echo "LaneCount: `grep -o 'LaneCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`" |
5856 |
09 Mar 20 |
nicklas |
83 |
echo "SurfaceCount: `grep -o 'SurfaceCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`" |
5856 |
09 Mar 20 |
nicklas |
84 |
echo "SwathCount: `grep -o 'SwathCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`" |
5856 |
09 Mar 20 |
nicklas |
85 |
echo "TileCount: `grep -o 'TileCount="[^"]*"' "${RUN_INFO}" | cut -d '"' -f 2`" |
5934 |
08 May 20 |
nicklas |
86 |
echo "TileTags: `grep '<Tile>' "${RUN_INFO}" | wc -l`" |
5934 |
08 May 20 |
nicklas |
87 |
RUN_INFO_DATE=`grep '<Date>' "${RUN_INFO}" | cut -d '>' -f 2 | cut -d '<' -f 1` |
5934 |
08 May 20 |
nicklas |
88 |
echo "RunInfoDate: `date -d "${RUN_INFO_DATE}" +"${DATE_FORMAT}"`" |
5856 |
09 Mar 20 |
nicklas |
89 |
fi |
5856 |
09 Mar 20 |
nicklas |
90 |
|
5856 |
09 Mar 20 |
nicklas |
91 |
|
5856 |
09 Mar 20 |
nicklas |
# RTAComplete.txt is created when everything is complete |
5856 |
09 Mar 20 |
nicklas |
# This becomes the end date of the job and should trigger |
5856 |
09 Mar 20 |
nicklas |
# Reggie to start file checks and secondary analysis |
5856 |
09 Mar 20 |
nicklas |
95 |
if [ -f "${DATA_FOLDER}/RTAComplete.txt" ]; then |
5856 |
09 Mar 20 |
nicklas |
96 |
echo "RTAComplete: `date +"${DATE_FORMAT}" -r "${DATA_FOLDER}/RTAComplete.txt"`" |
5856 |
09 Mar 20 |
nicklas |
97 |
fi |