5049 |
23 Oct 18 |
nicklas |
1 |
# |
5049 |
23 Oct 18 |
nicklas |
# R script for generating a plot with mBAF data |
5049 |
23 Oct 18 |
nicklas |
# The plot is divided into two parts: |
5049 |
23 Oct 18 |
nicklas |
# 1) Average mBAF per region for a sample. Regions that differ too much (pval < 0.01) |
5049 |
23 Oct 18 |
nicklas |
# compared to the normal reference samples are highlighted |
5049 |
23 Oct 18 |
nicklas |
# 2) Distribution of average mBAF values compared to a set of reference normal samples |
5049 |
23 Oct 18 |
nicklas |
7 |
# |
5049 |
23 Oct 18 |
nicklas |
# Parameters |
5049 |
23 Oct 18 |
nicklas |
# sampleName: Name of the sample (alignment) |
5049 |
23 Oct 18 |
nicklas |
# mBafFile: Path to tab-separated file with mBAF data. The file should have the following |
5049 |
23 Oct 18 |
nicklas |
# columns: Region, Count, AvgMBaf, PVal |
5049 |
23 Oct 18 |
nicklas |
# datadir: Directory with reference data for the normal samples |
5049 |
23 Oct 18 |
nicklas |
# plotFile: Name of file to create |
5049 |
23 Oct 18 |
nicklas |
# width, height: Size of the plot |
5049 |
23 Oct 18 |
nicklas |
# pval: p-value limit for regions that should be highlighted |
5051 |
24 Oct 18 |
nicklas |
# minCount: If the number of SNPs in a region is below this value that bar is not colored or highlighted |
5049 |
23 Oct 18 |
nicklas |
17 |
# |
5051 |
24 Oct 18 |
nicklas |
mBafPlot = function(sampleName, mBafFile="mbafdata.txt", datadir=".", plotFile="mbafplot.png", width=1600, height=1000, pval=0.01, minCount=20) |
5049 |
23 Oct 18 |
nicklas |
19 |
{ |
5049 |
23 Oct 18 |
nicklas |
# Load mBAF data |
5049 |
23 Oct 18 |
nicklas |
# Tab-separated with headers: Region, Count, AvgMBaf, PVal |
5049 |
23 Oct 18 |
nicklas |
mbafdata <- read.table(mBafFile, header=T, sep="\t", check.names=F, stringsAsFactors=F); |
5049 |
23 Oct 18 |
nicklas |
23 |
|
5049 |
23 Oct 18 |
nicklas |
# Load mBAF data for normal references |
5049 |
23 Oct 18 |
nicklas |
# Tab-separated with headers: Sample, 1p, 1q, ... |
5049 |
23 Oct 18 |
nicklas |
normalrefs <- read.table(paste(datadir, "normal-refs.txt", sep="/"), header=T, sep="\t", check.names=F, stringsAsFactors=F); |
5049 |
23 Oct 18 |
nicklas |
# We only need the mBAF values as one long list of values |
5049 |
23 Oct 18 |
nicklas |
normalrefs <- na.omit(unlist(normalrefs[,2:length(names(normalrefs))], use.names=F)); |
5049 |
23 Oct 18 |
nicklas |
29 |
|
5049 |
23 Oct 18 |
nicklas |
png(plotFile, width=width, height=height); |
5049 |
23 Oct 18 |
nicklas |
31 |
|
5049 |
23 Oct 18 |
nicklas |
# Prepare for plottting 2 plots in a single window above each other |
5049 |
23 Oct 18 |
nicklas |
# and make room for adding title |
5049 |
23 Oct 18 |
nicklas |
par(mfrow=c(2, 1), oma=c(0,0,2,0), mar=c(4.5, 4.5, 2.0, 2.5)); |
5049 |
23 Oct 18 |
nicklas |
35 |
|
5049 |
23 Oct 18 |
nicklas |
# *** Plot 1: Bar plot with Region on X axis and average mBAF on Y axis |
5051 |
24 Oct 18 |
nicklas |
# Regions with low p-value are highlighted (but only if we have enough SNPs) |
5051 |
24 Oct 18 |
nicklas |
barcolors <- barcolors <- ifelse(mbafdata$Count < minCount, "white", ifelse(mbafdata$PVal < pval, "darkblue", "gray")); |
5051 |
24 Oct 18 |
nicklas |
barborders <- ifelse(mbafdata$Count < minCount, "gray", "black"); |
5051 |
24 Oct 18 |
nicklas |
bp <- barplot(mbafdata$AvgMBaf, main="Average mBAF per region", cex.main=1.25, font.main=1, ylab="mBAF", ylim=c(0.5,0.9), names.arg=mbafdata$Region, col=barcolors, border=barborders, xpd=F, xaxs = "i", xlim=c(0, 1.2*length(mbafdata$AvgMBaf))); |
5049 |
23 Oct 18 |
nicklas |
# Add Count (of SNPS making up the average) above each bar |
5051 |
24 Oct 18 |
nicklas |
text(x=bp, y=ifelse(is.na(mbafdata$AvgMBaf), 0.5, mbafdata$AvgMBaf), label=mbafdata$Count, cex=0.85, pos=3); |
5049 |
23 Oct 18 |
nicklas |
# Draw x-axis at y=0.5 |
5049 |
23 Oct 18 |
nicklas |
axis(1, pos=0.5, at=c(0,1.2*length(mbafdata$AvgMBaf)), tick=T, labels=F); |
5049 |
23 Oct 18 |
nicklas |
45 |
|
5049 |
23 Oct 18 |
nicklas |
# *** Plot 2: Density plot of the average mBAF values |
5051 |
24 Oct 18 |
nicklas |
densitySample = density(na.omit(mbafdata$AvgMBaf)); |
5049 |
23 Oct 18 |
nicklas |
densityNormal = density(normalrefs); |
5049 |
23 Oct 18 |
nicklas |
maxY <- max(densitySample$y, densityNormal$y); |
5049 |
23 Oct 18 |
nicklas |
plot(densitySample, main="Distribution of mBAF in sample vs. normals", cex.main=1.25, font.main=1, xlab="mBAF", ylab="", yaxt="n", bty="n", xaxs = "i", xlim=c(0.5, 0.9), ylim=c(0, maxY), lwd=2); |
5049 |
23 Oct 18 |
nicklas |
# Fill sample distribution with semi-transparant gray |
5049 |
23 Oct 18 |
nicklas |
polygon(densitySample, col=adjustcolor("lightgray", alpha.f=0.5)); |
5049 |
23 Oct 18 |
nicklas |
# Draw normal reference distribution with dashed line |
5049 |
23 Oct 18 |
nicklas |
lines(densityNormal, col="red", lty=2, lwd=2); |
5049 |
23 Oct 18 |
nicklas |
# Mark individual mBAF values with small ticks on the x-axis |
5049 |
23 Oct 18 |
nicklas |
rug(mbafdata$AvgMBaf) |
5049 |
23 Oct 18 |
nicklas |
57 |
|
5049 |
23 Oct 18 |
nicklas |
# Add title |
5049 |
23 Oct 18 |
nicklas |
title(main=sampleName, outer=T, cex.main=1.75); |
5049 |
23 Oct 18 |
nicklas |
off <- dev.off(); |
5049 |
23 Oct 18 |
nicklas |
61 |
} |