# Setup code -------


## @knitr sessionSetUp
library(knitr)
opts_chunk$set(warning=FALSE, echo=FALSE, message=FALSE, cache = TRUE, cache.lazy = FALSE)
system("mkdir -p ./figures/")


## @knitr loadLibraries
library(ggplot2)
library(reshape2)
library(snm)
library(corpcor)
library(xtable)
library(magrittr)
library(openxlsx)
library(plyr)
library(minfi)
library(RosMapMethylBMIQ)



## @knitr sourceFunctions
source("code/methylation_functions/fs.R")
source("code/methylation_functions/anovaDF.R")
source("code/methylation_functions/eigenVariable.R")
source("code/methylation_functions/variablesRelation.R")
source("code/methylation_functions/metadata_summary.R")
source("code/methylation_functions/varsInfluence.R")
source("code/methylation_functions/percentVariance.R")
source("code/methylation_functions/matrixOutliers.R")
source("code/methylation_functions/pcScatter.R")


# Tab1 Code ----


## @knitr loadData
data("rosmap.methyl.bmiq")


## @knitr oneWayStats
metadata <- as.data.frame(pData(rosmap.methyl.bmiq))
temp <- do.call("rbind", lapply(metadata, summaryTable))
write.xlsx(temp, file = "results/metadata_descriptive_stats.xlsx",
           row.names = TRUE)


## @knitr twoWayStats
# remove irrelevant variables, such as order id
metadata <- droplevels(metadata[, -c(1:3)])
var.relation <- variablesRelation(metadata)
write.xlsx(var.relation$table,
           file = "results/metadata_variables_relation.xlsx",
           row.names  = FALSE)


## @knitr twoWayStatsPlot
plot(var.relation$plot)


## @knitr PositionVsID
# arrays were run in 2 different batches
ggplot(metadata, aes(Sentrix_ID, Sentrix_Position, fill = batch)) +
  geom_tile(color = "white") +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_brewer(palette = "Set1")


## @knitr PlateVsID
# samples were also amplified in 2 different batches
ggplot(metadata, aes(Sample_Plate, Sentrix_ID, fill = batch)) +
  geom_tile(color = "white") +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_brewer(palette = "Set1")


## @knitr PlateVsWell
ggplot(metadata, aes(Sample_Row, Sentrix_ID, fill = Sample_Plate)) +
  geom_tile(color = "white") +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_brewer(palette = "Paired")


## Tab2 code ----


## @knitr SVDint
data.raw <- getBeta(rosmap.methyl.bmiq)
data.int <- snm(data.raw, bio.var = NULL, adj.var = NULL,
                int.var = data.frame(as.factor(1:ncol(data.raw))))
u.int <- fs(data.int$norm.dat)
temp <- data.frame(variance = u.int$d, index = seq_len(length(u.int$d)))
ggplot2::ggplot(temp, aes(index, variance)) + ggplot2::geom_point(shape = 20) +
  ggtitle("Percent variance explained in BMIQ data")



## @knitr SVDout
matrixOutliers(data.int$norm.dat, u.int$v[,1])


## @knitr rawVSdata
res <- eigenVariable(u.int$v[,1:5], metadata)


## @knitr rawVSdataTable
print(xtable(res$result, display = c("s",rep("E", 5), "s")),
      type="html",
      html.table.attributes=c('class="table table-striped"'),
      comment=FALSE,
      timestamp=NULL, include.rownames = FALSE)


## @knitr rawVSdataPlot
res$plot


## @knitr rawPC1vsPC2batch
scatter.res <- pcScatter(u.int$v, metadata, c(1,2), "batch")
scatter.res$plot



## @knitr rawPC1SentrixByBatch
ggplot(scatter.res$eigen.meta, aes(Sentrix_ID, Eigen1, fill = batch)) +
  geom_boxplot() +
  facet_grid(.~batch, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))


## @knitr rawPC2SentrixByBatch
ggplot(scatter.res$eigen.meta, aes(Sentrix_ID, Eigen2, fill = batch)) +
  geom_boxplot() +
  facet_grid(.~batch, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))


## @knitr rawPC1NNLS
varsInfluence(u.int$v, metadata, 1, "NNLS")


## @knitr rawPC1SPbyBatch
ggplot(scatter.res$eigen.meta, aes(Sample_Plate, Eigen1, fill = batch)) +
  geom_boxplot() +
  facet_grid(.~batch, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))


## @knitr rawPC2SPbyBatch
ggplot(scatter.res$eigen.meta, aes(Sample_Plate, Eigen2, fill = batch)) +
  geom_boxplot() +
  facet_grid(.~batch, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))



## Tab3 code ----

## @knitr removeMissing
data.raw <- getBeta(rosmap.methyl.bmiq)
# Remove missing values from CETS, amyloid, pmi and cogn_global_lv
idx <- c(which(is.na(metadata$CETS)), which(is.na(metadata$pmi)),
         which(is.na(metadata$amyloid)), which(is.na(metadata$cogn_global_lv)))
data.small <- data.raw[, -idx]
meta.small <- metadata[-idx, ]
# No outliers after removing the missing values so lets go ahead and
# fit the model


## @knitr normMod
bio.var <- model.matrix(~ amyloid + cogn_global_lv + factor(braaksc), data = meta.small)
adj.var <- model.matrix(~factor(Sentrix_ID) + factor(sex) + age_death + pmi +
                           CETS, data = meta.small)
data.norm <- snm(data.small, bio.var = bio.var, adj.var = adj.var,
                  int.var = data.frame(as.factor(1:ncol(data.small))), rm.adj=TRUE,
                 verbose = FALSE,
                 diagnose = FALSE)

## @knitr varExplainedMod
u.mod <- fs(data.norm$norm.dat)
temp <- data.frame(variance = u.mod$d, index = seq_len(length(u.mod$d)))
ggplot2::ggplot(temp, aes(index, variance)) + ggplot2::geom_point(shape = 20) +
  ggtitle("Percent variance after the snm")


## @knitr outliersMod
matrixOutliers(data.norm$norm.dat, u.mod$v[,1])


## @knitr tableMod
res.norm <- eigenVariable(u.mod$v[,1:5], meta.small)
write.xlsx(res.norm$result, file = "./results/PC-variables_normalization_snm.xlsx")


## @knitr plotMod
res.norm$plot


## @knitr modPC1vsPC2batch
scatter.res <- pcScatter(u.mod$v, meta.small, c(1,2), "batch")
scatter.res$plot


## @knitr modPC2SentrixbyBatch
ggplot(scatter.res$eigen.meta, aes(Sentrix_ID, Eigen2, fill = batch)) +
  geom_boxplot() +
  facet_grid(.~batch, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))


## @knitr modPC2SPbyBatch
ggplot(scatter.res$eigen.meta, aes(Sample_Plate, Eigen2, fill = batch)) +
  geom_boxplot() +
  facet_grid(.~batch, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))


## @knitr normMod2
adj.var2 <- model.matrix(~factor(Sentrix_ID) + factor(Sentrix_Position) +
                          factor(sex) + age_death + pmi +
                          CETS, data = meta.small)
data.norm2 <- snm(data.small, bio.var = bio.var, adj.var = adj.var2,
                 int.var = data.frame(as.factor(1:ncol(data.small))), rm.adj=TRUE,
                 verbose = FALSE,
                 diagnose = FALSE)

## @knitr varExplainedMod2
u.mod2 <- fs(data.norm2$norm.dat)
temp <- data.frame(variance = u.mod2$d, index = seq_len(length(u.mod2$d)))
ggplot2::ggplot(temp, aes(index, variance)) + ggplot2::geom_point(shape = 20) +
  ggtitle("Percent variance after the snm")


## @knitr outliersMod2
matrixOutliers(data.norm2$norm.dat, u.mod2$v[,1])


## @knitr tableMod2
res.norm2 <- eigenVariable(u.mod2$v[,1:5], meta.small)
write.xlsx(res.norm2$result, file = "./results/PC-variables_normalization_snm2.xlsx")


## @knitr plotMod2
res.norm2$plot


## @knitr modPC1vsPC2batch2
scatter.res2 <- pcScatter(u.mod2$v, meta.small, c(1,2), "batch")
scatter.res2$plot


## @knitr modPC2SentrixbyBatch2
ggplot(scatter.res2$eigen.meta, aes(Sentrix_ID, Eigen2, fill = batch)) +
  geom_boxplot() +
  facet_grid(.~batch, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))


## @knitr modPC2SPbyBatch2
ggplot(scatter.res2$eigen.meta, aes(Sample_Plate, Eigen2, fill = batch)) +
  geom_boxplot() +
  facet_grid(.~batch, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))


## @knitr normMod3
adj.var3 <- model.matrix(~factor(Sentrix_ID) + factor(Sentrix_Position) +
                           factor(sex) + age_death + pmi +
                           CETS + NNLS, data = meta.small)
data.norm3 <- snm(data.small, bio.var = bio.var, adj.var = adj.var3,
                  int.var = data.frame(as.factor(1:ncol(data.small))), rm.adj=TRUE,
                  verbose = FALSE,
                  diagnose = FALSE)


## @knitr varExplainedMod3
u.mod3 <- fs(data.norm3$norm.dat)
temp <- data.frame(variance = u.mod3$d, index = seq_len(length(u.mod3$d)))
ggplot2::ggplot(temp, aes(index, variance)) + ggplot2::geom_point(shape = 20) +
  ggtitle("Percent variance after the snm")


## @knitr outliersMod3
matrixOutliers(data.norm3$norm.dat, u.mod3$v[,1])


## @knitr tableMod3
res.norm3 <- eigenVariable(u.mod3$v[,1:5], meta.small)
write.xlsx(res.norm3$result, file = "./results/PC-variables_normalization_snm3.xlsx")


## @knitr plotMod3
res.norm3$plot


## @knitr modPC1vsPC2batch3
scatter.res3 <- pcScatter(u.mod3$v, meta.small, c(1,2), "batch")
scatter.res3$plot


## @knitr modPC2SentrixbyBatch3
ggplot(scatter.res3$eigen.meta, aes(Sentrix_ID, Eigen2, fill = batch)) +
  geom_boxplot() +
  facet_grid(.~batch, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))


## @knitr modPC2SPbyBatch3
ggplot(scatter.res3$eigen.meta, aes(Sample_Plate, Eigen2, fill = batch)) +
  geom_boxplot() +
  facet_grid(.~batch, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))


