6873 |
17 Nov 22 |
nicklas |
#!/usr/bin/env Rscript |
6873 |
17 Nov 22 |
nicklas |
2 |
# |
6873 |
17 Nov 22 |
nicklas |
# A script for generating TSV files with beta-values for a |
6873 |
17 Nov 22 |
nicklas |
# single pair of IDAT files. |
6886 |
24 Nov 22 |
nicklas |
5 |
# |
6886 |
24 Nov 22 |
nicklas |
# This script is an adaption for Reggie of "normalizeEPIC_minfi_EA_20211005.R" |
6886 |
24 Nov 22 |
nicklas |
# originally developed by Mattias Aine (mattias.aine@med.lu.se), and |
6886 |
24 Nov 22 |
nicklas |
# modified by Elsa Arbajian (elsa.arbajian@med.lu.se) |
6886 |
24 Nov 22 |
nicklas |
9 |
# |
6873 |
17 Nov 22 |
nicklas |
10 |
|
6873 |
17 Nov 22 |
nicklas |
args <- commandArgs(trailingOnly=TRUE) |
6873 |
17 Nov 22 |
nicklas |
idatBase <- args[1] # Base path to a pair of IDAT files (eg. path without '_Red/Grn.idat' |
6873 |
17 Nov 22 |
nicklas |
outDir <- args[2] # Path to directory where files are saved |
6873 |
17 Nov 22 |
nicklas |
manifestFile <- args[3] # Path to EPIC.hg38.manifest.tsv |
6873 |
17 Nov 22 |
nicklas |
maskFile <- args[4] # Path to EPIC.hg38.mask.tsv |
6873 |
17 Nov 22 |
nicklas |
sampleName <- args[5] # External name of Methylation items |
6874 |
17 Nov 22 |
nicklas |
progress <- args[6] # Path to file for reporting progress (Reggie-style) |
6873 |
17 Nov 22 |
nicklas |
18 |
|
6874 |
17 Nov 22 |
nicklas |
cat('15 Initializing R/Minfi', file=progress) |
6873 |
17 Nov 22 |
nicklas |
library("minfi") |
6873 |
17 Nov 22 |
nicklas |
21 |
|
6873 |
17 Nov 22 |
nicklas |
# Read IDAT files |
6874 |
17 Nov 22 |
nicklas |
cat('20 Reading IDAT files', file=progress) |
6873 |
17 Nov 22 |
nicklas |
mIdat <- read.metharray(idatBase) |
6873 |
17 Nov 22 |
nicklas |
25 |
|
6873 |
17 Nov 22 |
nicklas |
# Get QC |
6874 |
17 Nov 22 |
nicklas |
cat('30 Checking QC', file=progress) |
6873 |
17 Nov 22 |
nicklas |
mRaw <- preprocessRaw(mIdat) |
6873 |
17 Nov 22 |
nicklas |
qc <- getQC(mRaw) |
6875 |
18 Nov 22 |
nicklas |
detP <- detectionP(mIdat) |
6873 |
17 Nov 22 |
nicklas |
31 |
|
6878 |
18 Nov 22 |
nicklas |
FLAG <- NULL |
6878 |
18 Nov 22 |
nicklas |
if ((qc$mMed + qc$uMed)/2 <10.5) FLAG <- 'Mean signal<10.5' |
6878 |
18 Nov 22 |
nicklas |
if (is.null(FLAG)) |
6878 |
18 Nov 22 |
nicklas |
35 |
{ |
6878 |
18 Nov 22 |
nicklas |
if (sum(detP>0.01) / nrow(mRaw) > 0.01) FLAG <- '>1% probes with p>0.01' |
6878 |
18 Nov 22 |
nicklas |
37 |
} |
6878 |
18 Nov 22 |
nicklas |
38 |
|
6873 |
17 Nov 22 |
nicklas |
# Noob Normalization since we want to use single-sample normalization |
6874 |
17 Nov 22 |
nicklas |
cat('40 Normalizing with NOOB', file=progress) |
6873 |
17 Nov 22 |
nicklas |
mNoob <- preprocessNoob(mIdat, dyeMethod='single') |
6873 |
17 Nov 22 |
nicklas |
betaData <- getBeta(mNoob) |
6873 |
17 Nov 22 |
nicklas |
43 |
|
6873 |
17 Nov 22 |
nicklas |
# Load probe manifest and mask information |
6874 |
17 Nov 22 |
nicklas |
cat('60 Loading EPIC-850K manifest', file=progress) |
6873 |
17 Nov 22 |
nicklas |
hm.manifest <- read.table(manifestFile, sep='\t', header=TRUE, stringsAsFactors=FALSE) |
6873 |
17 Nov 22 |
nicklas |
rownames(hm.manifest) <- hm.manifest$Probe_ID |
6873 |
17 Nov 22 |
nicklas |
hm.mask <- read.table(maskFile, sep='\t', header=TRUE, stringsAsFactors=FALSE) |
6873 |
17 Nov 22 |
nicklas |
rownames(hm.mask) <- hm.mask$probeID |
6873 |
17 Nov 22 |
nicklas |
50 |
|
6873 |
17 Nov 22 |
nicklas |
# Only keep data for probes that are intersecting |
6873 |
17 Nov 22 |
nicklas |
isect <- intersect(rownames(betaData), rownames(hm.manifest)) |
6873 |
17 Nov 22 |
nicklas |
betaData <- betaData[isect,,drop=FALSE] |
6873 |
17 Nov 22 |
nicklas |
hm.mask <- hm.mask[isect,] |
6873 |
17 Nov 22 |
nicklas |
hm.manifest <- hm.manifest[isect,] |
6873 |
17 Nov 22 |
nicklas |
56 |
|
6873 |
17 Nov 22 |
nicklas |
# Remove data for probes that have MASK_general=TRUE |
6873 |
17 Nov 22 |
nicklas |
keep <- ! hm.mask$MASK_general |
6873 |
17 Nov 22 |
nicklas |
betaData <- betaData[keep,,drop=FALSE] |
6873 |
17 Nov 22 |
nicklas |
hm.manifest <- hm.manifest[keep,] |
6873 |
17 Nov 22 |
nicklas |
61 |
|
6874 |
17 Nov 22 |
nicklas |
cat('80 Adjusting beta-values', file=progress) |
6873 |
17 Nov 22 |
nicklas |
63 |
|
6873 |
17 Nov 22 |
nicklas |
##Beta adjustment script below adapted from Markus Ringner's (Oncology & Pathology, LU) script "beta_peak_normalize.R" |
6873 |
17 Nov 22 |
nicklas |
##Citation: Holm et al. 2016 (Breast Cancer Res) |
6873 |
17 Nov 22 |
nicklas |
66 |
## --------------------------------------------- |
6873 |
17 Nov 22 |
nicklas |
##updated function for handling samples with non-canonical beta distributions |
6873 |
17 Nov 22 |
nicklas |
findMaxima <- function(x, granularity=512, bw=.02, adjust=1, from=0, to=1, kernel="epanechnikov", verbose=FALSE) |
6873 |
17 Nov 22 |
nicklas |
69 |
{ |
6873 |
17 Nov 22 |
nicklas |
aa<-density(x,kernel=kernel,n=granularity,bw=bw,adjust=adjust,from=from,to=to,na.rm=T) |
6873 |
17 Nov 22 |
nicklas |
res<-vector(length=length(aa$x)) |
6873 |
17 Nov 22 |
nicklas |
for(i in 2:length(res[-1])) { |
6873 |
17 Nov 22 |
nicklas |
res[i]<-aa$y[i]>aa$y[i-1] & aa$y[i]>aa$y[i+1] |
6873 |
17 Nov 22 |
nicklas |
74 |
} |
6873 |
17 Nov 22 |
nicklas |
y<-aa$x[res][order(aa$y[res],decreasing=T)] |
6873 |
17 Nov 22 |
nicklas |
if (verbose) |
6873 |
17 Nov 22 |
nicklas |
77 |
{ |
6873 |
17 Nov 22 |
nicklas |
cat("found peaks at:",y,"\n") |
6873 |
17 Nov 22 |
nicklas |
cat("choosing min/max:",range(y),"\n") ###CHANGED here to pick out min-max peaks as these are typically the correct ones |
6873 |
17 Nov 22 |
nicklas |
80 |
} |
6873 |
17 Nov 22 |
nicklas |
range(y) |
6873 |
17 Nov 22 |
nicklas |
82 |
} |
6873 |
17 Nov 22 |
nicklas |
83 |
|
6873 |
17 Nov 22 |
nicklas |
# Separate lists for Infinium I/II probes |
6873 |
17 Nov 22 |
nicklas |
probes_I <- hm.manifest$type=="I" |
6873 |
17 Nov 22 |
nicklas |
probes_II <- hm.manifest$type=="II" |
6873 |
17 Nov 22 |
nicklas |
87 |
|
6873 |
17 Nov 22 |
nicklas |
##define results matrix |
6873 |
17 Nov 22 |
nicklas |
meth.cal <- matrix(0, ncol=ncol(betaData), nrow=nrow(betaData)) |
6873 |
17 Nov 22 |
nicklas |
rownames(meth.cal) <- rownames(betaData) |
6873 |
17 Nov 22 |
nicklas |
colnames(meth.cal) <- colnames(betaData) |
6873 |
17 Nov 22 |
nicklas |
92 |
|
6873 |
17 Nov 22 |
nicklas |
for (i in 1:ncol(betaData)) |
6873 |
17 Nov 22 |
nicklas |
94 |
{ |
6873 |
17 Nov 22 |
nicklas |
sampleId <- colnames(betaData)[i] |
6873 |
17 Nov 22 |
nicklas |
96 |
|
6873 |
17 Nov 22 |
nicklas |
##do pre-calibaration I vs II plot |
6874 |
17 Nov 22 |
nicklas |
pdf(paste0(outDir, '/adjustmentPlots.pdf'), paper='a4', width=8, height=11, title=paste0('Beta-value adjustment: ', sampleName)) |
6873 |
17 Nov 22 |
nicklas |
par(mfrow=c(2,1),font=2,font.sub=2,font.lab=2,font.axis=2,las=1,mar=c(4,3,2,2)) |
6873 |
17 Nov 22 |
nicklas |
plot(density(betaData[probes_I,i],kernel="epanechnikov",bw=0.02,from=0,to=1,na.rm=T),main=sampleName,xlim=c(-.05,1.05),col="orange",lwd=3,xlab="pre-adjustment beta",cex.main=.9,ylim=c(0,8)) |
6873 |
17 Nov 22 |
nicklas |
lines(density(betaData[probes_II,i],kernel="epanechnikov",bw=0.02,from=0,to=1,na.rm=T),col="darkgreen",lwd=3) |
6873 |
17 Nov 22 |
nicklas |
102 |
|
6873 |
17 Nov 22 |
nicklas |
##define maxima |
6873 |
17 Nov 22 |
nicklas |
tempMax_I <- findMaxima(betaData[probes_I,i]) |
6873 |
17 Nov 22 |
nicklas |
tempMax_II <- findMaxima(betaData[probes_II,i]) |
6873 |
17 Nov 22 |
nicklas |
##function not oriented so peak near zero can be in vector slot 2 -> fix |
6873 |
17 Nov 22 |
nicklas |
tempMax_I <- tempMax_I[order(tempMax_I)] |
6873 |
17 Nov 22 |
nicklas |
tempMax_II <- tempMax_II[order(tempMax_II)] |
6873 |
17 Nov 22 |
nicklas |
##add plot "FLAG" if peaks not well separated and close to right place.. |
6878 |
18 Nov 22 |
nicklas |
if (is.null(FLAG)) |
6873 |
17 Nov 22 |
nicklas |
111 |
{ |
6878 |
18 Nov 22 |
nicklas |
if (tempMax_I[2] <.8 | tempMax_II[2] <.8) |
6878 |
18 Nov 22 |
nicklas |
113 |
{ |
6878 |
18 Nov 22 |
nicklas |
FLAG <- 'Peak Meth <0.8' |
6878 |
18 Nov 22 |
nicklas |
115 |
} |
6878 |
18 Nov 22 |
nicklas |
else if (tempMax_I[1] >.2 | tempMax_II[1] >.2) |
6878 |
18 Nov 22 |
nicklas |
117 |
{ |
6878 |
18 Nov 22 |
nicklas |
FLAG <- 'Peak UMeth >0.2' |
6878 |
18 Nov 22 |
nicklas |
119 |
} |
6873 |
17 Nov 22 |
nicklas |
120 |
} |
6873 |
17 Nov 22 |
nicklas |
121 |
|
6873 |
17 Nov 22 |
nicklas |
##add to plot |
6873 |
17 Nov 22 |
nicklas |
abline(v=tempMax_I,col="orange",lwd=2,lty=2) |
6873 |
17 Nov 22 |
nicklas |
abline(v=tempMax_II,col="darkgreen",lwd=2,lty=2) |
6873 |
17 Nov 22 |
nicklas |
text(x=.5,y=c(3.5,3),labels=c("Infinium_I ","Infinium_II"),col=c("orange","darkgreen"),font=2,cex=1) |
6878 |
18 Nov 22 |
nicklas |
if (!is.null(FLAG)) text(x=.5,y=c(6,5.25),labels=c("FLAGGED", FLAG),col=2,font=2,cex=1.5) |
6873 |
17 Nov 22 |
nicklas |
127 |
|
6873 |
17 Nov 22 |
nicklas |
##do calibration scale both to same range.. |
6873 |
17 Nov 22 |
nicklas |
meth.cal[probes_I,i] <- (betaData[probes_I,i]-tempMax_I[1])/(tempMax_I[2]-tempMax_I[1]) |
6873 |
17 Nov 22 |
nicklas |
meth.cal[probes_II,i] <- (betaData[probes_II,i]-tempMax_II[1])/(tempMax_II[2]-tempMax_II[1]) |
6873 |
17 Nov 22 |
nicklas |
131 |
|
6873 |
17 Nov 22 |
nicklas |
##cap ends so that nothing over 1 or below zero.. |
6873 |
17 Nov 22 |
nicklas |
meth.cal[probes_I,i][meth.cal[probes_I,i] < 0 ] <- 0 |
6873 |
17 Nov 22 |
nicklas |
meth.cal[probes_I,i][meth.cal[probes_I,i] > 1 ] <- 1 |
6873 |
17 Nov 22 |
nicklas |
meth.cal[probes_II,i][meth.cal[probes_II,i] < 0 ] <- 0 |
6873 |
17 Nov 22 |
nicklas |
meth.cal[probes_II,i][meth.cal[probes_II,i] > 1 ] <- 1 |
6873 |
17 Nov 22 |
nicklas |
137 |
|
6873 |
17 Nov 22 |
nicklas |
##do post-calibration I vs II plot |
6873 |
17 Nov 22 |
nicklas |
plot(density(meth.cal[probes_I,i],kernel="epanechnikov",bw=0.02,from=0,to=1,na.rm=T),main='',xlim=c(-.05,1.05),col="orange",lwd=3,xlab="post-adjustment beta",cex.main=.9,ylim=c(0,8)) |
6873 |
17 Nov 22 |
nicklas |
lines(density(meth.cal[probes_II,i],kernel="epanechnikov",bw=0.02,from=0,to=1,na.rm=T),col="darkgreen",lwd=3) |
6873 |
17 Nov 22 |
nicklas |
text(x=.5,y=c(3.5,3),labels=c("Infinium_I ","Infinium_II"),col=c("orange","darkgreen"),font=2,cex=1) |
6873 |
17 Nov 22 |
nicklas |
abline(v=findMaxima(meth.cal[probes_I,i],verbose=F),col="orange",lwd=2,lty=2) |
6873 |
17 Nov 22 |
nicklas |
abline(v=findMaxima(meth.cal[probes_II,i],verbose=F),col="darkgreen",lwd=2,lty=2) |
6873 |
17 Nov 22 |
nicklas |
dev.off() |
6873 |
17 Nov 22 |
nicklas |
145 |
} |
6873 |
17 Nov 22 |
nicklas |
146 |
|
6873 |
17 Nov 22 |
nicklas |
147 |
|
6873 |
17 Nov 22 |
nicklas |
## End of Beta adjustment script |
6873 |
17 Nov 22 |
nicklas |
149 |
## ----------------------------------------------- |
6873 |
17 Nov 22 |
nicklas |
150 |
|
6873 |
17 Nov 22 |
nicklas |
##save and exit |
6874 |
17 Nov 22 |
nicklas |
cat('90 Saving beta-values to TSV', file=progress) |
6873 |
17 Nov 22 |
nicklas |
for (i in 1:ncol(betaData)) |
6873 |
17 Nov 22 |
nicklas |
154 |
{ |
6873 |
17 Nov 22 |
nicklas |
##normalized beta matrix |
6875 |
18 Nov 22 |
nicklas |
write.table(betaData[,i], file=gzfile(paste0(outDir, '/beta_normalized.tsv.gz')), sep='\t', row.names=TRUE, col.names=FALSE, quote=FALSE) |
6873 |
17 Nov 22 |
nicklas |
157 |
|
6873 |
17 Nov 22 |
nicklas |
##adjusted beta matrix |
6875 |
18 Nov 22 |
nicklas |
write.table(meth.cal[,i], file=gzfile(paste0(outDir, '/beta_adjusted.tsv.gz')), sep='\t', row.names=TRUE, col.names=FALSE, quote=FALSE) |
6873 |
17 Nov 22 |
nicklas |
160 |
} |
6875 |
18 Nov 22 |
nicklas |
161 |
|
6875 |
18 Nov 22 |
nicklas |
sink(paste0(outDir, '/stats.out')) |
6875 |
18 Nov 22 |
nicklas |
cat('MedianMeth', qc[1, 1], fill=TRUE) |
6875 |
18 Nov 22 |
nicklas |
cat('MedianUnmeth', qc[1, 2], fill=TRUE) |
6875 |
18 Nov 22 |
nicklas |
cat('ProbesRaw', nrow(mRaw), fill=TRUE) |
6875 |
18 Nov 22 |
nicklas |
cat('ProbesBad', sum(detP>0.01), fill=TRUE) |
6875 |
18 Nov 22 |
nicklas |
cat('ProbesI', sum(probes_I), fill=TRUE) |
6875 |
18 Nov 22 |
nicklas |
cat('ProbesII', sum(probes_II), fill=TRUE) |
6876 |
18 Nov 22 |
nicklas |
cat('PeakUnmethI', tempMax_I[1], fill=TRUE) |
6876 |
18 Nov 22 |
nicklas |
cat('PeakMethI', tempMax_I[2], fill=TRUE) |
6876 |
18 Nov 22 |
nicklas |
cat('PeakUnmethII', tempMax_II[1], fill=TRUE) |
6876 |
18 Nov 22 |
nicklas |
cat('PeakMethII', tempMax_II[2], fill=TRUE) |
6878 |
18 Nov 22 |
nicklas |
if (!is.null(FLAG)) cat('Flag', FLAG, fill=TRUE) |
6875 |
18 Nov 22 |
nicklas |
sink() |
6875 |
18 Nov 22 |
nicklas |
175 |
|