6675 |
13 Apr 22 |
nicklas |
#!/bin/bash |
6675 |
13 Apr 22 |
nicklas |
2 |
## |
6675 |
13 Apr 22 |
nicklas |
## Pipeline script for the Legacy Tophat/Cufflinks pipeline. |
6675 |
13 Apr 22 |
nicklas |
4 |
## |
6675 |
13 Apr 22 |
nicklas |
5 |
## |
6675 |
13 Apr 22 |
nicklas |
## Environment variables that should be defined (in job.sh) before calling this script |
6675 |
13 Apr 22 |
nicklas |
## -FastqFolder: Path to folder with FASTQ files |
6675 |
13 Apr 22 |
nicklas |
## -CufflinksFolder: Folder for output. Will be created or cleared from existing files |
6675 |
13 Apr 22 |
nicklas |
## -BOWTIE: Path to BowTie program |
6675 |
13 Apr 22 |
nicklas |
## -RMidx: Path to reference genome used for masking |
6675 |
13 Apr 22 |
nicklas |
## -BowTieOptions: Other options for the BowTie program (when masking) |
6675 |
13 Apr 22 |
nicklas |
## -TOPHAT: Path to Tophat program |
6675 |
13 Apr 22 |
nicklas |
## -ATidx: Path to the reference transcriptome used for alignment |
6675 |
13 Apr 22 |
nicklas |
## -AGidx: Path to the reference genome used for alignment |
6675 |
13 Apr 22 |
nicklas |
## -TophatOptions: Other options for the Tophat program |
6675 |
13 Apr 22 |
nicklas |
## -JAVA: Path to java |
6675 |
13 Apr 22 |
nicklas |
## -PICARD: Path to Picard JAR file |
6675 |
13 Apr 22 |
nicklas |
## -MarkDuplicatesOptions: Options to the Picard MarkDuplicates step |
6675 |
13 Apr 22 |
nicklas |
## -SAMTOOLS: Path to the Samtools program |
6675 |
13 Apr 22 |
nicklas |
## -CUFFLINKS: Path to Cufflinks program |
6675 |
13 Apr 22 |
nicklas |
## -CGTF: Path GTF file user by Cufflinks |
6675 |
13 Apr 22 |
nicklas |
## -CGidx: Path to reference genome used by Cufflinks |
6675 |
13 Apr 22 |
nicklas |
## -CufflinksOptions: Other options for the Cufflinks program |
6675 |
13 Apr 22 |
nicklas |
## -IdRemap: Path to text file for re-mapping transcript ids |
6675 |
13 Apr 22 |
nicklas |
## -TMPDIR: Path to a local temporary working directory |
6675 |
13 Apr 22 |
nicklas |
26 |
## |
6675 |
13 Apr 22 |
nicklas |
27 |
|
6675 |
13 Apr 22 |
nicklas |
28 |
set -e |
6675 |
13 Apr 22 |
nicklas |
29 |
|
6675 |
13 Apr 22 |
nicklas |
## Import utility functions |
6675 |
13 Apr 22 |
nicklas |
31 |
source ./reggie-utils.sh |
6675 |
13 Apr 22 |
nicklas |
32 |
|
6675 |
13 Apr 22 |
nicklas |
## Verify settings in options |
6675 |
13 Apr 22 |
nicklas |
34 |
rg_var_isdir "FastqFolder" "TMPDIR" "WD" |
6675 |
13 Apr 22 |
nicklas |
35 |
rg_var_isfile "BOWTIE" "TOPHAT" "JAVA" "PICARD" "SAMTOOLS" "CUFFLINKS" "CGTF" "CGidx" "IdRemap" |
6675 |
13 Apr 22 |
nicklas |
36 |
rg_var_isset "RMidx" "AGidx" "ATidx" "CufflinksFolder" |
6675 |
13 Apr 22 |
nicklas |
37 |
|
6675 |
13 Apr 22 |
nicklas |
## Move to the temporary working directory and create subdirectories |
6675 |
13 Apr 22 |
nicklas |
39 |
cd ${TMPDIR} |
6675 |
13 Apr 22 |
nicklas |
40 |
mkdir -p fastq |
6675 |
13 Apr 22 |
nicklas |
41 |
mkdir -p masked |
6675 |
13 Apr 22 |
nicklas |
42 |
mkdir -p aligned |
6675 |
13 Apr 22 |
nicklas |
43 |
mkdir -p cufflinks |
6675 |
13 Apr 22 |
nicklas |
44 |
mkdir -p done |
6675 |
13 Apr 22 |
nicklas |
45 |
|
6675 |
13 Apr 22 |
nicklas |
## Copy FASTQ files to local working directory |
6675 |
13 Apr 22 |
nicklas |
47 |
if [ ! -f "done/copy.done" ]; then |
6675 |
13 Apr 22 |
nicklas |
48 |
rg_progress 10 "Copying FASTQ files" |
6675 |
13 Apr 22 |
nicklas |
49 |
cp ${FastqFolder}/*.fastq.gz fastq |
6675 |
13 Apr 22 |
nicklas |
50 |
touch "done/copy.done" |
6675 |
13 Apr 22 |
nicklas |
51 |
fi |
6675 |
13 Apr 22 |
nicklas |
52 |
|
6675 |
13 Apr 22 |
nicklas |
## Run Bowtie2 |
6675 |
13 Apr 22 |
nicklas |
54 |
if [ ! -f "done/mask.done" ]; then |
6675 |
13 Apr 22 |
nicklas |
55 |
rg_progress 20 "Running Bowtie2 (${NumThreads} threads)" |
6675 |
13 Apr 22 |
nicklas |
## Find R1 and R2 FASTQ files |
6675 |
13 Apr 22 |
nicklas |
57 |
FASTQ1=`find fastq -name "*_R1.fastq.gz" -print -quit 2> /dev/null` |
6675 |
13 Apr 22 |
nicklas |
58 |
FASTQ2=`find fastq -name "*_R2.fastq.gz" -print -quit 2> /dev/null` |
6675 |
13 Apr 22 |
nicklas |
59 |
rg_var_isfile "FASTQ1" "FASTQ2" |
6675 |
13 Apr 22 |
nicklas |
60 |
${WD}/stdwrap.sh ${BOWTIE} \ |
6675 |
13 Apr 22 |
nicklas |
61 |
-p ${NumThreads} \ |
6675 |
13 Apr 22 |
nicklas |
62 |
${BowTieOptions} \ |
6675 |
13 Apr 22 |
nicklas |
63 |
--un-conc-gz masked/R%.fastq.gz \ |
6675 |
13 Apr 22 |
nicklas |
64 |
-x ${RMidx} \ |
6675 |
13 Apr 22 |
nicklas |
65 |
-1 ${FASTQ1} \ |
6675 |
13 Apr 22 |
nicklas |
66 |
-2 ${FASTQ2} \ |
6675 |
13 Apr 22 |
nicklas |
67 |
-S /dev/null \ |
6675 |
13 Apr 22 |
nicklas |
68 |
> masked/masked.out |
6675 |
13 Apr 22 |
nicklas |
69 |
touch "done/mask.done" |
6675 |
13 Apr 22 |
nicklas |
70 |
fi |
6675 |
13 Apr 22 |
nicklas |
71 |
|
6675 |
13 Apr 22 |
nicklas |
## Run Tophat |
6675 |
13 Apr 22 |
nicklas |
73 |
if [ ! -f "done/align.done" ]; then |
6675 |
13 Apr 22 |
nicklas |
74 |
rg_progress 30 "Running Tophat (${NumThreads} threads)" |
6675 |
13 Apr 22 |
nicklas |
75 |
${WD}/stdwrap.sh ${TOPHAT} \ |
6675 |
13 Apr 22 |
nicklas |
76 |
-p ${NumThreads} \ |
6675 |
13 Apr 22 |
nicklas |
77 |
-o aligned \ |
6675 |
13 Apr 22 |
nicklas |
78 |
${TophatOptions} \ |
6675 |
13 Apr 22 |
nicklas |
79 |
--transcriptome-index ${ATidx} \ |
6675 |
13 Apr 22 |
nicklas |
80 |
${AGidx} \ |
6675 |
13 Apr 22 |
nicklas |
81 |
masked/R1.fastq.gz \ |
6675 |
13 Apr 22 |
nicklas |
82 |
masked/R2.fastq.gz \ |
6675 |
13 Apr 22 |
nicklas |
83 |
> aligned/aligned.out |
6675 |
13 Apr 22 |
nicklas |
84 |
rm -rf aligned/logs |
6675 |
13 Apr 22 |
nicklas |
85 |
touch "done/align.done" |
6675 |
13 Apr 22 |
nicklas |
86 |
fi |
6675 |
13 Apr 22 |
nicklas |
87 |
|
6675 |
13 Apr 22 |
nicklas |
## MarkDuplicates |
6675 |
13 Apr 22 |
nicklas |
89 |
if [ ! -f "done/markduplicates.done" ]; then |
6675 |
13 Apr 22 |
nicklas |
90 |
rg_progress 50 "Running picard MarkDuplicates" |
6675 |
13 Apr 22 |
nicklas |
91 |
${WD}/stdwrap.sh ${JAVA} \ |
6675 |
13 Apr 22 |
nicklas |
92 |
-Dpicard.useLegacyParser=false ${JavaOptions} \ |
6675 |
13 Apr 22 |
nicklas |
93 |
-jar ${PICARD} MarkDuplicates \ |
6675 |
13 Apr 22 |
nicklas |
94 |
-INPUT aligned/accepted_hits.bam \ |
6675 |
13 Apr 22 |
nicklas |
95 |
-OUTPUT aligned/accepted_hits.bam.tmp_picard \ |
6675 |
13 Apr 22 |
nicklas |
96 |
-METRICS_FILE aligned/accepted_hits_picardmetrics.csv \ |
6675 |
13 Apr 22 |
nicklas |
97 |
${MarkDuplicatesOptions} \ |
6675 |
13 Apr 22 |
nicklas |
98 |
> aligned/picard_MarkDuplicates.out |
6675 |
13 Apr 22 |
nicklas |
99 |
mv -f aligned/accepted_hits.bam.tmp_picard aligned/accepted_hits.bam |
6675 |
13 Apr 22 |
nicklas |
100 |
touch "done/markduplicates.done" |
6675 |
13 Apr 22 |
nicklas |
101 |
fi |
6675 |
13 Apr 22 |
nicklas |
102 |
|
6675 |
13 Apr 22 |
nicklas |
103 |
if [ ! -f "done/statistics.done" ]; then |
6675 |
13 Apr 22 |
nicklas |
104 |
rg_progress 55 "Calculating alignment statistics" |
6675 |
13 Apr 22 |
nicklas |
105 |
${WD}/alignment_statistics.sh aligned/accepted_hits.bam aligned/unmapped.bam > aligned/alignment_statistics.out |
6675 |
13 Apr 22 |
nicklas |
106 |
${SAMTOOLS} view aligned/accepted_hits.bam | ${WD}/singlecolumnaverager.awk > aligned/fragments.out |
6675 |
13 Apr 22 |
nicklas |
107 |
touch "done/statistics.done" |
6675 |
13 Apr 22 |
nicklas |
108 |
fi |
6675 |
13 Apr 22 |
nicklas |
109 |
|
6675 |
13 Apr 22 |
nicklas |
110 |
|
6675 |
13 Apr 22 |
nicklas |
## Cufflinks |
6675 |
13 Apr 22 |
nicklas |
112 |
if [ ! -f "done/cufflinks.done" ]; then |
6675 |
13 Apr 22 |
nicklas |
113 |
rg_progress 60 "Running Cufflinks (${NumThreads} threads)" |
6675 |
13 Apr 22 |
nicklas |
114 |
${WD}/stdwrap.sh ${CUFFLINKS} \ |
6675 |
13 Apr 22 |
nicklas |
115 |
-p ${NumThreads} \ |
6675 |
13 Apr 22 |
nicklas |
116 |
-o cufflinks \ |
6675 |
13 Apr 22 |
nicklas |
117 |
--GTF ${CGTF} \ |
6675 |
13 Apr 22 |
nicklas |
118 |
--frag-bias-correct ${CGidx} \ |
6675 |
13 Apr 22 |
nicklas |
119 |
${CufflinksOptions} \ |
6675 |
13 Apr 22 |
nicklas |
120 |
aligned/accepted_hits.bam \ |
6675 |
13 Apr 22 |
nicklas |
121 |
> cufflinks/cufflinks.out |
6675 |
13 Apr 22 |
nicklas |
122 |
touch "done/cufflinks.done" |
6675 |
13 Apr 22 |
nicklas |
123 |
fi |
6675 |
13 Apr 22 |
nicklas |
124 |
|
6675 |
13 Apr 22 |
nicklas |
## Remap tracking_id |
6675 |
13 Apr 22 |
nicklas |
126 |
if [ ! -f "done/remap.done" ]; then |
6675 |
13 Apr 22 |
nicklas |
127 |
${WD}/fix_cufflinks_tracking_id.sh \ |
6675 |
13 Apr 22 |
nicklas |
128 |
${IdRemap} \ |
6675 |
13 Apr 22 |
nicklas |
129 |
< cufflinks/isoforms.fpkm_tracking \ |
6675 |
13 Apr 22 |
nicklas |
130 |
> cufflinks/isoforms.fpkm_tracking.tmp |
6675 |
13 Apr 22 |
nicklas |
131 |
mv -f cufflinks/isoforms.fpkm_tracking.tmp cufflinks/isoforms.fpkm_tracking |
6675 |
13 Apr 22 |
nicklas |
132 |
touch "done/remap.done" |
6675 |
13 Apr 22 |
nicklas |
133 |
fi |
6675 |
13 Apr 22 |
nicklas |
134 |
|
6675 |
13 Apr 22 |
nicklas |
135 |
rg_progress 95 "Copying result files to project archive" |
6675 |
13 Apr 22 |
nicklas |
136 |
cp masked/*.out ${WD} |
6675 |
13 Apr 22 |
nicklas |
137 |
cp aligned/*.out ${WD} |
6675 |
13 Apr 22 |
nicklas |
138 |
cp aligned/accepted_hits_picardmetrics.csv ${WD} |
6675 |
13 Apr 22 |
nicklas |
139 |
cp cufflinks/*.out ${WD} |
6675 |
13 Apr 22 |
nicklas |
140 |
|
6675 |
13 Apr 22 |
nicklas |
141 |
mkdir -p ${CufflinksFolder} |
6675 |
13 Apr 22 |
nicklas |
142 |
rm -rf ${CufflinksFolder}/* |
6675 |
13 Apr 22 |
nicklas |
143 |
cp cufflinks/isoforms.fpkm_tracking ${CufflinksFolder} |
6675 |
13 Apr 22 |
nicklas |
144 |
ls -1 ${CufflinksFolder}/* >> ${WD}/files.out |
6675 |
13 Apr 22 |
nicklas |
145 |
|
6675 |
13 Apr 22 |
nicklas |
146 |
rg_progress 99 "Analysis completed, cleaning up..." |
6675 |
13 Apr 22 |
nicklas |
147 |
|