## ----style, echo = FALSE, results = 'asis'----------------
BiocStyle::markdown()
options(width = 60, max.print = 1000)
knitr::opts_chunk$set(
    eval = as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
    cache = as.logical(Sys.getenv("KNITR_CACHE", "TRUE")),
    tidy.opts = list(width.cutoff = 60), tidy = TRUE
)

## ----setup, echo=FALSE, message=FALSE, warning=FALSE, eval=TRUE----
suppressPackageStartupMessages({
    library(systemPipeR)
})

## ----generate_workenvir, eval=FALSE-----------------------
# library(systemPipeRdata)
# genWorkenvir(workflow = "varseq", mydirname = "varseq")
# setwd("varseq")

## ----load_targets_file, eval=TRUE-------------------------
targetspath <- system.file("extdata", "targetsPE.txt", package = "systemPipeR")
targets <- read.delim(targetspath, comment.char = "#")
targets[1:4,-c(5,6)]

## ----project_varseq, eval=FALSE---------------------------
# library(systemPipeR)
# sal <- SPRproject()
# sal <- importWF(sal, file_path = "systemPipeVARseq.Rmd", verbose = FALSE)
# sal

## ----run_varseq, eval=FALSE-------------------------------
# sal <- runWF(sal)

## ----plot_varseq, eval=FALSE------------------------------
# plotWF(sal)

## ----varseq-toplogy, eval=TRUE, warning= FALSE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Toplogy graph of VAR-Seq workflow.", warning=FALSE----
knitr::include_graphics("results/plotwf_varseq.png")

## ----report_varseq, eval=FALSE----------------------------
# # Scientific report
# sal <- renderReport(sal)
# rmarkdown::render("systemPipeVARseq.Rmd", clean = TRUE, output_format = "BiocStyle::html_document")
# 
# # Technical (log) report
# sal <- renderLogs(sal)

## ----status_varseq, eval=FALSE----------------------------
# statusWF(sal)

## ----load_SPR, message=FALSE, eval=FALSE, spr=TRUE--------
# # Some samples in the test dataset do not work well in VARseq, and VARseq workflow
# # takes long time to process each sample. To better test and speed up the test workflow,
# # sample set is reduced to the first 13 samples.
# # Please REMOVE the next two lines in your real analysis
# cat(crayon::red$bold("Some samples in targets are removed for test workflow. Please change the template to disable this in your real analysis.\n"))
# writeLines(readLines("targetsPE.txt")[1:13], "targetsPE.txt")
# 
# cat(crayon::blue$bold("To use this workflow, following R packages are expected:\n"))
# cat(c("'GenomicFeatures", "VariantAnnotation", "GenomicFeatures", "ggbio", "ggplot2'\n"), sep = "', '")
# ###pre-end
# appendStep(sal) <- LineWise(
#     code = {
#         library(systemPipeR)
#         },
#     step_name = "load_SPR"
# )

## ----fastq_report_pre, eval=FALSE, message=FALSE, spr=TRUE----
# appendStep(sal) <- LineWise(
#     code = {
#         targets <- read.delim("targetsPE.txt", comment.char = "#")
#         updateColumn(sal, step = "load_SPR", position = "targetsWF") <- targets
#         fq_files <- getColumn(sal, "load_SPR", "targetsWF", column = 1)
#         fqlist <- seeFastq(fastq = fq_files, batchsize = 10000, klength = 8)
#         png("./results/fastqReport.png", height = 162, width = 288 * length(fqlist))
#         seeFastqPlot(fqlist)
#         dev.off()
#     },
#     step_name = "fastq_report_pre",
#     dependency = "load_SPR"
# )

## ----trimmomatic, eval=FALSE, spr=TRUE--------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "trimmomatic",
#     targets = "targetsPE.txt",
#     wf_file = "trimmomatic/trimmomatic-pe.cwl",
#     input_file = "trimmomatic/trimmomatic-pe.yml",
#     dir_path = "param/cwl",
#     inputvars = c(
#         FileName1 = "_FASTQ_PATH1_",
#         FileName2 = "_FASTQ_PATH2_",
#         SampleName = "_SampleName_"
#     ),
#     dependency = c("fastq_report_pre"),
#     run_step = "optional"
# )

## ----preprocessing, message=FALSE, eval=FALSE, spr=TRUE----
# appendStep(sal) <- SYSargsList(
#     step_name = "preprocessing",
#     targets = "targetsPE.txt", dir = TRUE,
#     wf_file = "preprocessReads/preprocessReads-pe.cwl",
#     input_file = "preprocessReads/preprocessReads-pe.yml",
#     dir_path = "param/cwl",
#     inputvars = c(
#         FileName1 = "_FASTQ_PATH1_",
#         FileName2 = "_FASTQ_PATH2_",
#         SampleName = "_SampleName_"
#     ),
#     dependency = c("fastq_report_pre"),
#     run_step = "optional"
# )

## ----custom_preprocessing_function, eval=FALSE------------
# appendStep(sal) <- LineWise(
#     code = {
#         filterFct <- function(fq, cutoff = 20, Nexceptions = 0) {
#             qcount <- rowSums(as(quality(fq), "matrix") <= cutoff, na.rm = TRUE)
#             # Retains reads where Phred scores are >= cutoff with N exceptions
#             fq[qcount <= Nexceptions]
#         }
#         save(list = ls(), file = "param/customFCT.RData")
#     },
#     step_name = "custom_preprocessing_function",
#     dependency = "preprocessing"
# )

## ----editing_preprocessing, message=FALSE, eval=FALSE-----
# yamlinput(sal, "preprocessing")$Fct
# yamlinput(sal, "preprocessing", "Fct") <- "'filterFct(fq, cutoff=20, Nexceptions=0)'"
# yamlinput(sal, "preprocessing")$Fct ## check the new function
# cmdlist(sal, "preprocessing", targets = 1) ## check if the command line was updated with success

## ----fastq_report_pos, eval=FALSE, message=FALSE, spr=TRUE----
# appendStep(sal) <- LineWise(
#     code = {
#         fq_files <- getColumn(sal, "preprocessing", "outfiles", column = 1) ## get outfiles path
#         fqlist <- seeFastq(fastq = fq_files, batchsize = 10000, klength = 8)
#         png("./results/fastqReport_pos.png", height = 18, width = 4 * length(fqlist))
#         seeFastqPlot(fqlist)
#         dev.off()
#     },
#     step_name = "fastq_report_pos",
#     dependency = "trimmomatic",
#     run_step = "optional"
# )

## ----bwa_index, eval=FALSE, spr=TRUE----------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "bwa_index",
#     dir = FALSE, targets = NULL,
#     wf_file = "gatk/workflow_bwa-index.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     dependency = "load_SPR"
# )

## ----fasta_index, eval=FALSE, spr=TRUE--------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "fasta_index",
#     dir = FALSE, targets = NULL,
#     wf_file = "gatk/workflow_fasta_dict.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     dependency = "bwa_index"
# )

## ----faidx_index, eval=FALSE, spr=TRUE--------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "faidx_index",
#     dir = FALSE, targets = NULL,
#     wf_file = "gatk/workflow_fasta_faidx.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     dependency = "fasta_index"
# )

## ----bwa_alignment, eval=FALSE, spr=TRUE------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "bwa_alignment",
#     targets = "targetsPE.txt",
#     wf_file = "gatk/workflow_bwa-pe.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     inputvars = c(
#         FileName1 = "_FASTQ_PATH1_",
#         FileName2 = "_FASTQ_PATH2_",
#         SampleName = "_SampleName_"
#     ),
#     dependency = c("faidx_index")
# )

## ----align_stats, eval=FALSE, spr=TRUE--------------------
# appendStep(sal) <- LineWise(
#     code = {
#         bampaths <- getColumn(sal, step = "bwa_alignment", "outfiles", column = "samtools_sort_bam")
#         fqpaths <- getColumn(sal, step = "bwa_alignment", "targetsWF", column = "FileName1")
#         read_statsDF <- alignStats(args = bampaths, fqpaths = fqpaths, pairEnd = TRUE)
#         write.table(read_statsDF, "results/alignStats.xls", row.names = FALSE, quote = FALSE, sep = "\t")
#     },
#     step_name = "align_stats",
#     dependency = "bwa_alignment",
#     run_step = "optional"
# )

## ----bam_urls, eval=FALSE, spr=TRUE-----------------------
# appendStep(sal) <- LineWise(
#     code = {
#         bampaths <- getColumn(sal, step = "bwa_alignment", "outfiles", column = "samtools_sort_bam")
#         symLink2bam(
#             sysargs = bampaths, htmldir = c("~/.html/", "somedir/"),
#             urlbase = "http://cluster.hpcc.ucr.edu/~tgirke/",
#             urlfile = "./results/IGVurl.txt"
#         )
#     },
#     step_name = "bam_urls",
#     dependency = "bwa_alignment",
#     run_step = "optional"
# )

## ----fastq2ubam, eval=FALSE, spr=TRUE---------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "fastq2ubam",
#     targets = "targetsPE.txt",
#     wf_file = "gatk/workflow_gatk_fastq2ubam.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     inputvars = c(
#         FileName1 = "_FASTQ_PATH1_",
#         FileName2 = "_FASTQ_PATH2_",
#         SampleName = "_SampleName_"
#     ),
#     dependency = c("faidx_index")
# )

## ----merge_bam, eval=FALSE, spr=TRUE----------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "merge_bam",
#     targets = c("bwa_alignment", "fastq2ubam"),
#     wf_file = "gatk/workflow_gatk_mergebams.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     inputvars = c(
#         bwa_men_sam = "_bwasam_",
#         ubam = "_ubam_",
#         SampleName = "_SampleName_"
#     ),
#     rm_targets_col = c("preprocessReads_1", "preprocessReads_2"),
#     dependency = c("bwa_alignment", "fastq2ubam")
# )

## ----sort, eval=FALSE, spr=TRUE---------------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "sort",
#     targets = "merge_bam",
#     wf_file = "gatk/workflow_gatk_sort.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     inputvars = c(merge_bam = "_mergebam_", SampleName = "_SampleName_"),
#     rm_targets_col = c(
#         "bwa_men_sam", "ubam", "SampleName_fastq2ubam",
#         "Factor_fastq2ubam", "SampleLong_fastq2ubam",
#         "Experiment_fastq2ubam", "Date_fastq2ubam"
#     ),
#     dependency = c("merge_bam")
# )

## ----mark_dup, eval=FALSE, spr=TRUE-----------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "mark_dup",
#     targets = "sort",
#     wf_file = "gatk/workflow_gatk_markduplicates.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     inputvars = c(sort_bam = "_sort_", SampleName = "_SampleName_"),
#     rm_targets_col = c("merge_bam"),
#     dependency = c("sort")
# )

## ----fix_tag, eval=FALSE, spr=TRUE------------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "fix_tag",
#     targets = "mark_dup",
#     wf_file = "gatk/workflow_gatk_fixtag.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     inputvars = c(mark_bam = "_mark_", SampleName = "_SampleName_"),
#     rm_targets_col = c("sort_bam"),
#     dependency = c("mark_dup")
# )

## ----hap_caller, eval=FALSE, spr=TRUE---------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "hap_caller",
#     targets = "fix_tag",
#     wf_file = "gatk/workflow_gatk_haplotypecaller.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     inputvars = c(fixtag_bam = "_fixed_", SampleName = "_SampleName_"),
#     rm_targets_col = c("mark_bam"),
#     dependency = c("fix_tag")
# )

## ----import, eval=FALSE, spr=TRUE-------------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "import",
#     targets = NULL, dir = FALSE,
#     wf_file = "gatk/workflow_gatk_genomicsDBImport.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     dependency = c("hap_caller")
# )

## ----call_variants, eval=FALSE, spr=TRUE------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "call_variants",
#     targets = NULL, dir = FALSE,
#     wf_file = "gatk/workflow_gatk_genotypeGVCFs.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     dependency = c("import")
# )

## ----filter, eval=FALSE, spr=TRUE-------------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "filter",
#     targets = NULL, dir = FALSE,
#     wf_file = "gatk/workflow_gatk_variantFiltration.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     dependency = c("call_variants")
# )

## ----create_vcf, eval=FALSE, spr=TRUE---------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "create_vcf",
#     targets = "hap_caller",
#     wf_file = "gatk/workflow_gatk_select_variant.cwl",
#     input_file = "gatk/gatk.yaml",
#     dir_path = "param/cwl",
#     inputvars = c(SampleName = "_SampleName_"),
#     dependency = c("hap_caller", "filter")
# )

## ----create_vcf_BCFtool, eval=FALSE, spr=TRUE-------------
# appendStep(sal) <- SYSargsList(
#     step_name = "create_vcf_BCFtool",
#     targets = "bwa_alignment", dir = TRUE,
#     wf_file = "workflow-bcftools/workflow_bcftools.cwl",
#     input_file = "workflow-bcftools/bcftools.yml",
#     dir_path = "param/cwl",
#     inputvars = c(bwa_men_sam = "_bwasam_", SampleName = "_SampleName_"),
#     rm_targets_col = c("preprocessReads_1", "preprocessReads_2"),
#     dependency = "bwa_alignment",
#     run_step = "optional"
# )

## ----inspect_vcf, eval=FALSE------------------------------
# library(VariantAnnotation)
# vcf_raw <- getColumn(sal, "create_vcf")
# vcf <- readVcf(vcf_raw[1], "A. thaliana")
# vcf
# vr <- as(vcf, "VRanges")
# vr

## ----filter_vcf, eval=FALSE, spr=TRUE---------------------
# appendStep(sal) <- LineWise(
#     code = {
#         vcf_raw <- getColumn(sal, "create_vcf")
#         library(VariantAnnotation)
#         filter <- "totalDepth(vr) >= 2 & (altDepth(vr) / totalDepth(vr) >= 0.8)"
#         vcf_filter <- suppressWarnings(filterVars(vcf_raw, filter, organism = "A. thaliana", out_dir = "results/vcf_filter"))
#         # dump the filtered path variable to running enviornment so
#         # other sysArg steps can get its values
#         updateColumn(sal, 'create_vcf', "outfiles") <- data.frame(vcf_filter=vcf_filter)
#     },
#     step_name = "filter_vcf",
#     dependency = "create_vcf"
# )

## ----filter_vcf_BCFtools, eval=FALSE, spr=TRUE------------
# appendStep(sal) <- LineWise(
#     code = {
#         vcf_raw <- getColumn(sal, step = "create_vcf_BCFtool",
#                              position = "outfiles", column = "bcftools_call")
#         library(VariantAnnotation)
#         filter <- "rowSums(vr) >= 2 & (rowSums(vr[,3:4])/rowSums(vr[,1:4]) >= 0.8)"
#         vcf_filter_bcf <- suppressWarnings(filterVars(vcf_raw, filter, organism = "A. thaliana", out_dir = "results/vcf_filter_BCFtools", varcaller = "bcftools"))
# 
#         updateColumn(sal, 'create_vcf', "outfiles") <- data.frame(vcf_filter_bcf=vcf_filter_bcf)
#     },
#     step_name = "filter_vcf_BCFtools",
#     dependency = "create_vcf_BCFtool",
#     run_step = "optional"
# )

## ----check_filter, eval=FALSE-----------------------------
# copyEnvir(sal, "vcf_raw", globalenv())
# copyEnvir(sal, "vcf_filter", globalenv())
# length(as(readVcf(vcf_raw[1], genome = "Ath"), "VRanges")[, 1])
# length(as(readVcf(vcf_filter[1], genome = "Ath"), "VRanges")[, 1])

## ----annotate_basics, eval=FALSE--------------------------
# library("GenomicFeatures")
# # comment the next line if optional step "filter_vcf" is included
# vcf_filter <- getColumn(sal, "create_vcf")
# # uncomment the next line if optional step "filter_vcf" is included
# # copyEnvir(sal, "vcf_filter", globalenv())
# txdb <- loadDb("./data/tair10.sqlite")
# vcf <- readVcf(vcf_filter[1], "A. thaliana")
# locateVariants(vcf, txdb, CodingVariants())

## ----annotate_basics_non-synon, eval=FALSE----------------
# fa <- FaFile("data/tair10.fasta")
# predictCoding(vcf, txdb, seqSource = fa)

## ----annotate_vcf, eval=FALSE, spr=TRUE-------------------
# appendStep(sal) <- LineWise(
#     code = {
#         # get the filtered vcf path from R running environment
#         copyEnvir(sal, "vcf_filter", globalenv())
#         library("GenomicFeatures")
#         txdb <- loadDb("./data/tair10.sqlite")
#         fa <- FaFile("data/tair10.fasta")
#         vcf_anno <- suppressMessages(suppressWarnings(variantReport(vcf_filter, txdb = txdb, fa = fa, organism = "A. thaliana", out_dir = "results/vcf_anno")))
#     },
#     step_name = "annotate_vcf",
#     dependency = "filter_vcf"
# )

## ----view_annotation, eval=FALSE--------------------------
# copyEnvir(sal, "vcf_anno", globalenv())
# read.delim(vcf_anno[1])[38:40, ]

## ----combine_var, eval=FALSE, spr=TRUE--------------------
# appendStep(sal) <- LineWise(
#     code = {
#         combineDF <- combineVarReports(vcf_anno, filtercol = c(Consequence = "nonsynonymous"))
#         write.table(combineDF, "./results/combineDF_nonsyn.tsv", quote = FALSE, row.names = FALSE, sep = "\t")
#     },
#     step_name = "combine_var",
#     dependency = "annotate_vcf"
# )

## ----summary_var, eval=FALSE, spr=TRUE--------------------
# appendStep(sal) <- LineWise(
#     code = {
#         write.table(varSummary(vcf_anno), "./results/variantStats.tsv", quote = FALSE, col.names = NA, sep = "\t")
#     },
#     step_name = "summary_var",
#     dependency = "combine_var"
# )

## ----venn_diagram, eval=FALSE, spr=TRUE-------------------
# appendStep(sal) <- LineWise(
#     code = {
#         ## make a list of first three samples
#         varlist <- sapply(names(vcf_anno[1:3]), function(x) as.character(read.delim(vcf_anno[x])$VARID))
#         vennset <- overLapper(varlist, type = "vennsets")
#         png("./results/vennplot_var.png")
#         vennPlot(list(vennset), mymain = "Venn Plot of First 3 Samples", mysub = "", colmode = 2, ccol = c("red", "blue"))
#         dev.off()
#     },
#     step_name = "venn_diagram",
#     dependency = "annotate_vcf"
# )

## ----plot_variant, eval=FALSE, spr=TRUE-------------------
# appendStep(sal) <- LineWise(
#     code = {
#         # get the filtered vcf path from R running environment
#         copyEnvir(sal, "vcf_filter", globalenv())
#         library(ggbio)
#         library(VariantAnnotation)
#         mychr <- "ChrM"
#         mystart <- 19000
#         myend <- 21000
#         bams <- getColumn(sal, "fix_tag")
#         vcf <- suppressWarnings(readVcf(vcf_filter["M6B"], "A. thaliana"))
#         ga <- readGAlignments(bams["M6B"], use.names = TRUE, param = ScanBamParam(which = GRanges(mychr, IRanges(mystart, myend))))
#         p1 <- autoplot(ga, geom = "rect")
#         p2 <- autoplot(ga, geom = "line", stat = "coverage")
#         p3 <- autoplot(vcf[seqnames(vcf) == mychr], type = "fixed") +
#             xlim(mystart, myend) +
#             theme(legend.position = "none", axis.text.y = element_blank(), axis.ticks.y = element_blank())
#         p4 <- autoplot(loadDb("./data/tair10.sqlite"), which = GRanges(mychr, IRanges(mystart, myend)), names.expr = "gene_id")
#         p1_4 <- tracks(Reads = p1, Coverage = p2, Variant = p3, Transcripts = p4, heights = c(0.3, 0.2, 0.1, 0.35)) + ylab("")
#         ggbio::ggsave(p1_4, filename = "./results/plot_variant.png", units = "in")
#     },
#     step_name = "plot_variant",
#     dependency = "filter_vcf"
# )

## ----sessionInfo, eval=FALSE, spr=TRUE--------------------
# appendStep(sal) <- LineWise(
#     code = {
#         sessionInfo()
#         },
#     step_name = "sessionInfo",
#     dependency = "plot_variant")

## ----runWF, eval=FALSE------------------------------------
# sal <- runWF(sal)

## ----runWF_cluster, eval=FALSE----------------------------
# # wall time in mins, memory in MB
# resources <- list(conffile=".batchtools.conf.R",
#                   template="batchtools.slurm.tmpl",
#                   Njobs=18,
#                   walltime=120,
#                   ntasks=1,
#                   ncpus=4,
#                   memory=1024,
#                   partition = "short"
#                   )
# sal <- addResources(sal, c("hisat2_mapping"), resources = resources)
# sal <- runWF(sal)

## ----plotWF, eval=FALSE-----------------------------------
# plotWF(sal, rstudio = TRUE)

## ----statusWF, eval=FALSE---------------------------------
# sal
# statusWF(sal)

## ----logsWF, eval=FALSE-----------------------------------
# sal <- renderLogs(sal)

## ----list_tools-------------------------------------------
if(file.exists(file.path(".SPRproject", "SYSargsList.yml"))) {
    local({
        sal <- systemPipeR::SPRproject(resume = TRUE)
        systemPipeR::listCmdTools(sal)
        systemPipeR::listCmdModules(sal)
    })
} else {
    cat(crayon::blue$bold("Tools and modules required by this workflow are:\n"))
    cat(c("trimmomatic/0.39", "samtools/1.14", "gatk/4.2.0.0", "bcftools/1.15", 
          "bwa/0.7.17"), sep = "\n")
}

## ----report_session_info, eval=TRUE-----------------------
sessionInfo()

