## ----include = FALSE----------------------------------------------------------
startTime <- Sys.time()
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  crop = NULL
)

library(dnaEPICO)

can_run_preprocessing <- requireNamespace("minfiData", quietly = TRUE) &&
  requireNamespace("IlluminaHumanMethylation450kmanifest", quietly = TRUE) &&
  requireNamespace("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)

can_run_sva <- can_run_preprocessing
can_run_glm <- requireNamespace(
  "IlluminaHumanMethylation450kanno.ilmn12.hg19",
  quietly = TRUE
)
can_run_glmm <- can_run_glm && requireNamespace("lmerTest", quietly = TRUE)

## ----eval = FALSE-------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#   install.packages("BiocManager")
# }
# 
# BiocManager::install("dnaEPICO")
# 
# BiocManager::valid()

## ----eval = FALSE, message = FALSE--------------------------------------------
# library("dnaEPICO")

## ----makefile-example---------------------------------------------------------
makefilePath <- extractMake(destDir = tempdir(), overwrite = TRUE)
basename(makefilePath)

## ----preprocessing-pipeline-inputs, eval = can_run_preprocessing, message = FALSE, warning = FALSE----
preprocessing_inputs <- dnaEPICO:::exampleMinfiIdatInputsDnaEpico(n = 6L)

names(preprocessing_inputs)
preprocessing_inputs$tempDir
basename(preprocessing_inputs$phenoFile)
head(basename(list.files(preprocessing_inputs$idatFolder, full.names = TRUE)), 4)
basename(preprocessing_inputs$crossReactivePath)

## ----preprocessing-pipeline, eval = can_run_preprocessing, message = FALSE, warning = FALSE----
preprocessPipelineResult <- preprocessingMinfiEwasWater(
  phenoFile = preprocessing_inputs$phenoFile,
  idatFolder = preprocessing_inputs$idatFolder,
  outputLogs = file.path(preprocessing_inputs$tempDir, "logs"),
  nSamples = 6,
  SampleID = "Sample_Name",
  arrayType = preprocessing_inputs$arrayType,
  annotationVersion = preprocessing_inputs$annotationVersion,
  scriptLabel = "preprocessingMinfiEwasWater",
  baseDataFolder = file.path(preprocessing_inputs$tempDir, "rData"),
  figureBaseDir = file.path(preprocessing_inputs$tempDir, "figures"),
  detPThreshold = 1,
  normMethods = "quantile",
  sexColumn = "Sex",
  pvalThreshold = 1,
  chrToRemove = "",
  snpsToRemove = "SBE",
  mafThreshold = 1,
  crossReactivePath = preprocessing_inputs$crossReactivePath,
  plotGroupVar = "Sex",
  lcRef = "saliva",
  phenoOrder = "Sample_Name;Sex;Basename;Sentrix_ID;Sentrix_Position",
  lcPhenoDir = file.path(
    preprocessing_inputs$tempDir,
    "data",
    "preprocessingMinfiEwasWater"
  ),
  display = FALSE,
  verbose = FALSE,
  logs = TRUE,
  saveOutputs = TRUE
)

preprocess_paths <- c(
  phenoLC = file.path(
    preprocessing_inputs$tempDir,
    "data",
    "preprocessingMinfiEwasWater",
    "phenoLC.csv"
  ),
  rgset = file.path(
    preprocessing_inputs$tempDir,
    "rData",
    "preprocessingMinfiEwasWater",
    "objects",
    "RGSet.RData"
  ),
  beta = file.path(
    preprocessing_inputs$tempDir,
    "rData",
    "preprocessingMinfiEwasWater",
    "metrics",
    "beta_NomFilt_MSetF_Flt_Rxy_Ds_Rc.RData"
  ),
  qc = file.path(
    preprocessing_inputs$tempDir,
    "figures",
    "preprocessingMinfiEwasWater",
    "qc",
    "quality_control(MSet).tiff"
  )
)

class(preprocessPipelineResult)
basename(preprocess_paths)
file.exists(preprocess_paths)

## ----sva-pipeline-inputs, eval = can_run_sva, message = FALSE, warning = FALSE----
sva_targets_file <- file.path(
  preprocessing_inputs$tempDir,
  "data",
  "preprocessingMinfiEwasWater",
  "phenoLC.csv"
)
sva_rgset_file <- file.path(
  preprocessing_inputs$tempDir,
  "rData",
  "preprocessingMinfiEwasWater",
  "objects",
  "RGSet.RData"
)

basename(c(sva_targets_file, sva_rgset_file))
file.exists(c(sva_targets_file, sva_rgset_file))

## ----sva-pipeline, eval = can_run_sva, message = FALSE, warning = FALSE-------
svaPipelineResult <- svaEnmix(
  phenoFile = sva_targets_file,
  rgsetData = sva_rgset_file,
  outputLogs = file.path(preprocessing_inputs$tempDir, "logs"),
  SampleID = "Sample_Name",
  arrayType = "IlluminaHumanMethylation450k",
  annotationVersion = "ilmn12.hg19",
  SentrixIDColumn = "Sentrix_ID",
  SentrixPositionColumn = "Sentrix_Position",
  ctrlSvaPercVar = 0.90,
  ctrlSvaFlag = 1,
  scriptLabel = "svaEnmix",
  dataBaseDir = file.path(preprocessing_inputs$tempDir, "data"),
  rBaseDir = file.path(preprocessing_inputs$tempDir, "rData"),
  figureBaseDir = file.path(preprocessing_inputs$tempDir, "figures"),
  display = FALSE,
  verbose = FALSE,
  logs = TRUE,
  saveOutputs = TRUE
)

class(svaPipelineResult$savedFiles)
names(svaPipelineResult$savedFiles)
basename(unlist(svaPipelineResult$savedFiles, use.names = FALSE))

## ----preprocessingPheno-pipeline-inputs, message = FALSE, warning = FALSE-----
pheno_inputs <- dnaEPICO:::examplePreprocessingPhenoStateDnaEpico()

names(pheno_inputs)
pheno_inputs$tempDir
basename(c(
  pheno_inputs$phenoPath,
  pheno_inputs$betaPath,
  pheno_inputs$mPath,
  pheno_inputs$cnPath
))

## ----preprocessingPheno-pipeline, message = FALSE, warning = FALSE------------
phenoPipelineResult <- preprocessingPheno(
  phenoFile = pheno_inputs$phenoPath,
  betaPath = pheno_inputs$betaPath,
  mPath = pheno_inputs$mPath,
  cnPath = pheno_inputs$cnPath,
  SampleID = "Sample_Name",
  timeVar = "Timepoint",
  timepoints = "1,2",
  combineTimepoints = "1,2",
  outputPheno = file.path(pheno_inputs$tempDir, "data", "preprocessingPheno"),
  outputRData = file.path(
    pheno_inputs$tempDir,
    "rData",
    "preprocessingPheno",
    "metrics"
  ),
  outputRDataMerge = file.path(
    pheno_inputs$tempDir,
    "rData",
    "preprocessingPheno",
    "mergeData"
  ),
  sexColumn = "Sex",
  outputLogs = file.path(pheno_inputs$tempDir, "logs"),
  outputDir = file.path(pheno_inputs$tempDir, "clockFoundation"),
  verbose = FALSE,
  logs = TRUE,
  saveOutputs = TRUE
)

names(phenoPipelineResult$savedFiles)
names(phenoPipelineResult$savedFiles$timepoints)
basename(unlist(phenoPipelineResult$savedFiles$timepoints[["1"]]))
basename(unlist(phenoPipelineResult$savedFiles[c(
  "combinedPheno",
  "combinedPhenoBeta",
  "betaCSV",
  "phenoCF"
)]))

## ----glm-pipeline-inputs, eval = can_run_glm, message = FALSE, warning = FALSE----
glm_inputs <- dnaEPICO:::exampleMethylationGLMStateDnaEpico()

names(glm_inputs)
glm_inputs$tempDir
basename(glm_inputs$inputPath)
file.exists(glm_inputs$inputPath)

## ----glm-pipeline, eval = can_run_glm, message = FALSE, warning = FALSE-------
glmPipelineResult <- methylationGLM_T1(
  inputPheno = glm_inputs$inputPath,
  phenotypes = "status",
  covariates = "sex,age",
  factorVars = "status,sex",
  cpgLimit = 2,
  nCores = 1,
  outputLogs = file.path(glm_inputs$tempDir, "logs"),
  outputRData = file.path(glm_inputs$tempDir, "rData", "models"),
  outputPlots = file.path(glm_inputs$tempDir, "figures"),
  significantCpGDir = file.path(glm_inputs$tempDir, "significant"),
  summaryTxtDir = file.path(glm_inputs$tempDir, "summaries"),
  summaryPval = 1,
  significantCpGPval = 1,
  annotationPackage = "IlluminaHumanMethylation450kanno.ilmn12.hg19",
  annotationCols = "Name,chr,pos",
  annotatedGLMOut = file.path(glm_inputs$tempDir, "annotated"),
  display = FALSE,
  verbose = FALSE,
  logs = TRUE,
  saveOutputs = TRUE
)

class(glmPipelineResult$savedFiles)
names(glmPipelineResult$savedFiles)

## ----glmm-pipeline-inputs, eval = can_run_glmm, message = FALSE, warning = FALSE----
glmm_inputs <- dnaEPICO:::exampleMethylationGLMMStateDnaEpico()

names(glmm_inputs)
glmm_inputs$tempDir
basename(glmm_inputs$inputPath)
file.exists(glmm_inputs$inputPath)

## ----glmm-pipeline, eval = can_run_glmm, message = FALSE, warning = FALSE-----
glmmPipelineResult <- methylationGLMM_T1T2(
  inputPheno = glmm_inputs$inputPath,
  outputLogs = file.path(glmm_inputs$tempDir, "logs"),
  outputRData = file.path(glmm_inputs$tempDir, "rData", "models"),
  outputPlots = file.path(glmm_inputs$tempDir, "figures"),
  personVar = "person",
  timeVar = "Timepoint",
  phenotypes = "score",
  covariates = "sex",
  factorVars = "sex,Timepoint",
  cpgLimit = 2,
  nCores = 1,
  summaryPval = 1,
  saveSignificantInteractions = TRUE,
  significantInteractionDir = file.path(
    glmm_inputs$tempDir,
    "results",
    "cpgs",
    "methylationGLMM_T1T2"
  ),
  significantInteractionPval = 1,
  saveTxtSummaries = TRUE,
  summaryTxtDir = file.path(
    glmm_inputs$tempDir,
    "results",
    "summary",
    "methylationGLMM_T1T2"
  ),
  annotationPackage = "IlluminaHumanMethylation450kanno.ilmn12.hg19",
  annotationCols = "Name,chr,pos",
  annotatedLMEOut = file.path(glmm_inputs$tempDir, "annotated"),
  display = FALSE,
  verbose = FALSE,
  logs = TRUE,
  saveOutputs = TRUE
)

class(glmmPipelineResult$savedFiles)
names(glmmPipelineResult$savedFiles)

## ----quarto-check-r, eval = FALSE---------------------------------------------
# Sys.which("quarto")
# system2("quarto", "--version")

## ----quarto-path-r, eval = FALSE----------------------------------------------
# Sys.setenv(QUARTO_BIN = "/full/path/to/quarto")

## ----dnamReport-pipeline-example, eval = FALSE--------------------------------
# report_log_dir <- file.path(preprocessing_inputs$tempDir, "logs", "report")
# dir.create(report_log_dir, recursive = TRUE, showWarnings = FALSE)
# 
# report_logs <- c(
#   file.path(
#     preprocessing_inputs$tempDir,
#     "logs",
#     "log_preprocessingMinfiEwasWater.txt"
#   ),
#   file.path(pheno_inputs$tempDir, "logs", "log_preprocessingPheno.txt"),
#   file.path(preprocessing_inputs$tempDir, "logs", "log_svaEnmix.txt"),
#   file.path(glm_inputs$tempDir, "logs", "log_methylationGLM_T1.txt"),
#   file.path(glmm_inputs$tempDir, "logs", "log_methylationGLMM_T1T2.txt")
# )
# 
# existing_logs <- report_logs[file.exists(report_logs)]
# if (length(existing_logs)) {
#   file.copy(existing_logs, report_log_dir, overwrite = TRUE)
# }
# 
# reportResult <- dnamReport(
#   outputDir = file.path(preprocessing_inputs$tempDir, "reports", "example"),
#   phenoTab = file.path(
#     preprocessing_inputs$tempDir,
#     "data",
#     "preprocessingMinfiEwasWater",
#     "phenoLC.csv"
#   ),
#   enmixTab = file.path(
#     preprocessing_inputs$tempDir,
#     "figures",
#     "preprocessingMinfiEwasWater",
#     "enmix"
#   ),
#   qcTab = file.path(
#     preprocessing_inputs$tempDir,
#     "figures",
#     "preprocessingMinfiEwasWater",
#     "qc"
#   ),
#   svaTab = file.path(preprocessing_inputs$tempDir, "figures", "svaEnmix"),
#   metricTab = file.path(
#     preprocessing_inputs$tempDir,
#     "figures",
#     "preprocessingMinfiEwasWater",
#     "metrics"
#   ),
#   glmTab = glmPipelineResult$savedFiles$annotatedGLM,
#   lmerTab = glmmPipelineResult$savedFiles$annotatedLME,
#   logTab = report_log_dir,
#   detPPath = file.path(
#     preprocessing_inputs$tempDir,
#     "rData",
#     "preprocessingMinfiEwasWater",
#     "qc",
#     "detP_RGSet.RData"
#   ),
#   detPThreshold = 0.01,
#   verbose = FALSE,
#   logs = TRUE
# )
# 
# reportResult$outputFile

## ----makefile-template-full, echo = FALSE, results = "asis"-------------------
makefile_lines <- readLines(makefilePath, warn = FALSE)
cat("```make\n", paste(makefile_lines, collapse = "\n"), "\n```\n", sep = "")

## ----echo = FALSE-------------------------------------------------------------
Sys.time()

## ----echo = FALSE-------------------------------------------------------------
totalTime <- diff(c(startTime, Sys.time()))
round(totalTime, digits = 3)

## ----echo = FALSE-------------------------------------------------------------
sessionInfo()

