Jun 15, 2011

#----------------------------------------------------------------
# genetic stratification
#----------------------------------------------------------------


# this script calculates:
# (1) fraction of genetic variance explained by each multidimennsional scaling (MDS) dimension
# (2) MDS plot


# files required
# *.mds file (from PLINK)
# a file with phenotypic information (e.g. breed, affection status, sex, etc.), with FID and IID fields




# -------- edit this ---------------------
# specify *.mds and file path 
mds.path <- "D:/jqo/Rscripts/disease.mds"
pheno.path <- "D:/jqo/Rscripts/gwas.pheno"
# ----------------------------------------




#----------------------------------------------------------------
# prepare files
#----------------------------------------------------------------


# import
mds.file <- read.delim(mds.path, sep = "")
pheno.file <- read.delim(pheno.path, sep = "")


# merge files
mds <- merge(pheno.file, mds.file, by = c("FID", "IID"))


# number of samples and columns where MDS dimensions start
samples <- length(mds$IID)
# pheno.file adds n colums + 1 from the "SOL" field in the mds.file
start.dimensions <- ncol(pheno.file) + 2


# variance for each dimension
var.mds <- data.frame(apply(mds[, start.dimensions:(start.dimensions + samples - 1)], 2, 
function(x)
{c(variance = var(x, na.rm = TRUE))}))
names(var.mds) <- "varC"


# total variance
all.var.mds <- sum(var.mds[, 1])


# relative variance
var.mds <- transform(var.mds, rel.varC = varC / all.var.mds)




#----------------------------------------------------------------
# plots
#----------------------------------------------------------------


# relative genetic fraction
windows()
plot(var.mds[, 2] * 100,
type = "b",
ylab = "Relative genetic fraction (%)",
xlab = "MDS dimension (Ci)")


# MDS plot for C1 and C2
rel.var.C1 <- paste("C1 (", round(var.mds[1, 2], digit = 3) * 100 , "%)")
rel.var.C2 <- paste("C2 (", round(var.mds[2, 2], digit = 3) * 100 , "%)")
windows()
plot(mds$C1, mds$C2, 
col = mds$disease,
pch = 16,
xlab = rel.var.C1,
ylab = rel.var.C2)