## ----knitr, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  cache = FALSE
)

## ----setup, eval = FALSE------------------------------------------------------
# # install from bioconductor
# if (!require(BiocManager)) {
#   install.packages('BiocManager')
#   BiocManager::install('leapR')
# }
# 

## ----libraries----------------------------------------------------------------
# load the library
library(leapR)
library(gplots)
library(rmarkdown)

## ----pathdb-------------------------------------------------------------------
data(ncipid)

## ----omicsdata, message=FALSE, warning=FALSE----------------------------------
url <- "https://api.figshare.com/v2/file/download/56536217"
pdata <- download.file(url, method = "libcurl", destfile = "protData.rda")
#  as.matrix()
load("protData.rda")

p <- file.remove("protData.rda")

url <- "https://api.figshare.com/v2/file/download/56536214"
tdata <- download.file(url, method = "libcurl", destfile = "transData.rda")
load("transData.rda")
p <- file.remove("transData.rda")

url <- "https://api.figshare.com/v2/file/download/56536211"
phdata <- download.file(url, method = "libcurl", destfile = "phosData.rda")
load("phosData.rda")
p <- file.remove("phosData.rda")

## ----ptlists------------------------------------------------------------------
data(shortlist)
data(longlist)

## columns that we want to use for results

cols_to_display <- c("ingroup_n", "outgroup_n", "background_n", 
                     "pvalue", "BH_pvalue")

## ----abundance, echo=TRUE, warning=FALSE, message=FALSE-----------------------
# in this example we lump a bunch of patients together (the 'short survivors')
# and compare them to another group (the 'long survivors')

### using enrichment_wrapper function
protdata.enrichment.svl <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "enrichment_comparison",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)

or <- order(unlist(protdata.enrichment.svl[, "pvalue"]))
rmarkdown::paged_table(protdata.enrichment.svl[or, cols_to_display])
# another application is to compare just one patient against another
# (this would be the  equivalent of comparing one time point to another)

### using enrichment_wrapper function
protdata.enrichment.svl.ovo <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "enrichment_comparison",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = shortlist[1],
  secondary_columns = longlist[1]
)
or <- order(unlist(protdata.enrichment.svl.ovo[, "pvalue"]))
rmarkdown::paged_table(protdata.enrichment.svl.ovo[or, cols_to_display])

## ----fishers, echo=TRUE, warning=FALSE, message=FALSE-------------------------
# for this example we will construct a list of genes from the expression data
#     to emulate what you might be inputting
genelist <- rownames(pset)[which(SummarizedExperiment::assay(pset, 
                                                    "proteomics")[, 1] > 0.5)]

protdata.enrichment.sets.test <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "enrichment_in_sets",
  eset = pset,
  assay_name = "proteomics",
  targets = genelist
)
or <- order(protdata.enrichment.sets.test[, "pvalue"])
rmarkdown::paged_table(protdata.enrichment.sets.test[or, cols_to_display])

### using enrichment_wrapper function
protdata.enrichment.order <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "enrichment_in_order",
  eset = pset, 
  assay_name = "proteomics",
  primary_columns = "TCGA-13-1484"
)

or <- order(protdata.enrichment.order[, "pvalue"])
rmarkdown::paged_table(protdata.enrichment.order[or, cols_to_display])

# in this example we construct some modules from the hierarchical clustering
#   of the   data
protdata_naf <- SummarizedExperiment::assay(pset, "proteomics")

# hierarchical clustering is not too happy with lots of missing values
#    so we'll do a zero fill on this to get the modules
protdata_naf[which(is.na(protdata_naf))] <- 0

# construct the hierarchical clustering using the 'wardD' method, which
#    seems to give more even sized modules
protdata_hc <- hclust(dist(protdata_naf), method = "ward.D2")

# arbitrarily we'll chop the clusters into 5 modules
modules <- cutree(protdata_hc, k = 5)

## sara: created list
clusters <- lapply(unique(modules), function(x) names(which(modules == x)))

# modules is a named list of values where each value is a module
#         number and the name is the gene name

# To do enrichment for one module (module 1 in this case) do this
protdata.enrichment.sets.module_1 <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "enrichment_in_sets",
  eset = pset,
  assay_name = "proteomics",
  targets = names(modules[which(modules == 1)])
)

# To do enrichment on all modules and return the list of enrichment results
protdata.enrichment.sets.modules <- do.call(rbind, 
                                            leapR::cluster_enrichment(
                                                eset = pset,
                                                assay_name= 'proteomics',
                                                geneset = ncipid,
                                                clusters = clusters, 
                                                sigfilter = 0.25))
## nothing is enriched
rmarkdown::paged_table(protdata.enrichment.sets.modules[, cols_to_display])

## ----ks, echo=TRUE, warning = FALSE, message=FALSE----------------------------
# This is how you calculate enrichment in a ranked list 
# (for example from topology)
### using enrichment_wrapper function
protdata.enrichment.sets <- leapR::leapR(
  geneset = ncipid, "enrichment_in_order",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = shortlist[1]
)


or <- order(protdata.enrichment.sets[, "pvalue"])
rmarkdown::paged_table(protdata.enrichment.sets[or, cols_to_display])

## ----correlation, echo=TRUE, warning=FALSE, message=FALSE---------------------
### using enrichment_wrapper function
protdata.enrichment.correlation <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "correlation_enrichment",
  assay_name = "proteomics",
  eset = pset
)

or <- order(protdata.enrichment.correlation[, "pvalue"])
rmarkdown::paged_table(head(protdata.enrichment.correlation[or, 
                                                      cols_to_display]))

protdata.enrichment.correlation.short <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "correlation_enrichment",
  assay_name = "proteomics",
  eset = pset[, shortlist]
)
or <- order(protdata.enrichment.correlation.short[, "pvalue"])
rmarkdown::paged_table(head(protdata.enrichment.correlation.short[or, 
                                                        cols_to_display]))

protdata.enrichment.correlation.long <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "correlation_enrichment",
  assay_name = "proteomics",
  eset = pset[, longlist]
)
or <- order(protdata.enrichment.correlation.long[, "pvalue"])
rmarkdown::paged_table(head(protdata.enrichment.correlation.long[or, 
                                                            cols_to_display]))

## ----phosphoproteomics, echo=TRUE, warning=FALSE, message=FALSE---------------
data("kinasesubstrates")

# for an individual tumor calculate the Kinase-Substrate 
# Enrichment (similar to KSEA)
#     This uses the site-specific phosphorylation data to determine 
# which kinases
#     might be active by assessing the enrichment of the 
# phosphorylation of their known substrates

phosphodata.ksea.order <- leapR::leapR(
  geneset = kinasesubstrates,
  enrichment_method = "enrichment_in_order",
  assay_name = "phosphoproteomics",
  eset = phset,
  primary_columns = "TCGA-13-1484")

or <- order(phosphodata.ksea.order[, "pvalue"])
rmarkdown::paged_table(phosphodata.ksea.order[or, cols_to_display])


# now do the same thing but use a threshold
phosphodata.sets.order <- leapR::leapR(
  geneset = kinasesubstrates,
  enrichment_method = "enrichment_in_sets",
  eset = phset,
  assay_name = "phosphoproteomics",
  threshold = 0.5,
  primary_columns = "TCGA-13-1484"
)
or <- order(phosphodata.sets.order[, "pvalue"])
rmarkdown::paged_table(phosphodata.sets.order[or, cols_to_display])

## ----figure_2, echo=TRUE, warning=FALSE, message=FALSE, error=FALSE-----------
# load the single omic and multi-omic pathway databases
data("krbpaths")
data("mo_krbpaths")

# comparison enrichment in transcriptional data
transdata.comp.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_comparison",
  eset = tset,
  assay_name = "transcriptomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)

# comparison enrichment in proteomics data
# this is the same code used above, just repeated here for clarity
protdata.comp.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_comparison",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)

# comparison enrichment in phosphoproteomics data
phosphodata.comp.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_comparison",
  eset = phset,
  assay_name = "phosphoproteomics",
  primary_columns = shortlist,
  secondary_columns = longlist, id_column = "hgnc_id"
)


# set enrichment in transcriptomics data
# perform the comparison t tes
tset <- leapR::calcTTest(tset, assay_name = "transcriptomics", 
                         shortlist, longlist)


## now we need to run enrichment in sets with target list, not eset
transdata.set.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  eset = tset,
  assay_name = "transcriptomics",
  enrichment_method = "enrichment_in_sets",
  primary_columns = "pvalue",
  greaterthan = FALSE, threshold = 0.05
)


pset <- leapR::calcTTest(pset, assay_name = "proteomics",
                         shortlist, longlist)

protdata.set.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  eset = pset,
  assay_name = "proteomics",
  enrichment_method = "enrichment_in_sets",
  primary_columns = "pvalue",
  greaterthan = FALSE, threshold = 0.05
)


phset <- leapR::calcTTest(phset, assay_name = "phosphoproteomics", 
                          shortlist, longlist)

phosphodata.set.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_sets",
  id_column = "hgnc_id",
  assay_name = "phosphoproteomics",
  eset = phset, primary_columns = "pvalue",
  greaterthan = FALSE, threshold = 0.05
)

# order enrichment in transcriptomics data
transdata.order.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_order",
  eset = tset,
  assay_name = "transcriptomics",
  primary_columns = "difference"
)

# order enrichment in proteomics data
protdata.order.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_order",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = "difference"
)

# order enrichment in phosphoproteomics data


phosphodata.order.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_order",
  id_column = "hgnc_id",
  eset = phset,
  assay_name = "phosphoproteomics",
  primary_columns = "difference"
)

# correlation difference in transcriptomics data
transdata.corr.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "correlation_comparison",
  eset = tset,
  assay_name = "transcriptomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)
# correlation difference in proteomics data
protdata.corr.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "correlation_comparison",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)
# correlation difference in phosphoproteomics data
phosphodata.corr.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "correlation_comparison",
  eset = phset,
  assay_name = "phosphoproteomics",
  primary_columns = shortlist,
  secondary_columns = longlist, id_column = "hgnc_id"
)

# combine the omics data into one with prefix tags
comboset <- leapR::combine_omics(list(pset, phset, tset), 
                                 c(NA, "hgnc_id", NA))

# comparison enrichment for combodata
# when we use expression set, we do not need to use the mo_krbpaths 
#since the  id mapping column is used
combodata.enrichment.svl <- leapR::leapR(
  geneset = krbpaths, # mo_krbpaths,
  enrichment_method = "enrichment_comparison",
  eset = comboset,
  assay_name = "combined",
  primary_columns = shortlist,
  secondary_columns = longlist, id_column = "id"
)


# set enrichment in combo data
# perform the comparison t test
comboset <- leapR::calcTTest(comboset,
                             assay_name = "combined", 
                             shortlist, longlist)

combodata.set.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_sets",
  eset = comboset, primary_columns = "pvalue",
  assay_name = "combined",
  id_column = "id",
  greaterthan = FALSE, threshold = 0.05
)

# order enrichment in combo data
combodata.order.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_order",
  assay_name = "combined",
  eset = comboset, primary_columns = "difference",
  id_column = "id"
)

# correlation difference in combo data
combodata.corr.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "correlation_comparison",
  eset = comboset,
  assay_name = "combined",
  primary_columns = shortlist,
  id_column = "id",
  secondary_columns = longlist
)


# now take all these results and combine them into one figure
all_results <- list(
  transdata.comp.enrichment.svl,
  protdata.comp.enrichment.svl,
  phosphodata.comp.enrichment.svl,
  combodata.enrichment.svl,
  transdata.set.enrichment.svl,
  protdata.set.enrichment.svl,
  phosphodata.set.enrichment.svl,
  combodata.set.enrichment.svl,
  transdata.order.enrichment.svl,
  protdata.order.enrichment.svl,
  phosphodata.order.enrichment.svl,
  combodata.order.enrichment.svl,
  transdata.corr.enrichment.svl,
  protdata.corr.enrichment.svl,
  phosphodata.corr.enrichment.svl,
  combodata.corr.enrichment.svl
)

pathways_of_interest <- c(
  "KEGG_APOPTOSIS",
  "KEGG_CELL_CYCLE",
  "KEGG_ERBB_SIGNALING_PATHWAY",
  "KEGG_FOCAL_ADHESION",
  "KEGG_INSULIN_SIGNALING_PATHWAY",
  "KEGG_MAPK_SIGNALING_PATHWAY",
  "KEGG_MISMATCH_REPAIR",
  "KEGG_MTOR_SIGNALING_PATHWAY",
  "KEGG_OXIDATIVE_PHOSPHORYLATION",
  "KEGG_P53_SIGNALING_PATHWAY",
  "KEGG_PATHWAYS_IN_CANCER",
  "KEGG_PROTEASOME",
  "KEGG_RIBOSOME",
  "KEGG_VEGF_SIGNALING_PATHWAY",
  "KEGG_WNT_SIGNALING_PATHWAY"
)


results.frame <- data.frame(
  pathway = pathways_of_interest,
  td.comp = all_results[[1]][pathways_of_interest, "BH_pvalue"] < 0.05,
  pd.comp = all_results[[2]][pathways_of_interest, "BH_pvalue"] < 0.05,
  fd.comp = all_results[[3]][pathways_of_interest, "BH_pvalue"] < 0.05,
  cd.comp = all_results[[4]][pathways_of_interest, "BH_pvalue"] < 0.05,
  td.set = all_results[[5]][pathways_of_interest, "BH_pvalue"] < 0.05,
  pd.set = all_results[[6]][pathways_of_interest, "BH_pvalue"] < 0.05,
  fd.set = all_results[[7]][pathways_of_interest, "BH_pvalue"] < 0.05,
  cd.set = all_results[[8]][pathways_of_interest, "BH_pvalue"] < 0.05,
  td.order = all_results[[9]][pathways_of_interest, "BH_pvalue"] < 0.05,
  pd.order = all_results[[10]][pathways_of_interest, "BH_pvalue"] < 0.05,
  fd.order = all_results[[11]][pathways_of_interest, "BH_pvalue"] < 0.05,
  cd.order = all_results[[12]][pathways_of_interest, "BH_pvalue"] < 0.05,
  td.corr = all_results[[13]][pathways_of_interest, "BH_pvalue"] < 0.05,
  pd.corr = all_results[[14]][pathways_of_interest, "BH_pvalue"] < 0.05,
  fd.corr = all_results[[15]][pathways_of_interest, "BH_pvalue"] < 0.05,
  cd.corr = all_results[[16]][pathways_of_interest, "BH_pvalue"] < 0.05
)

results.frame.or <- data.frame(
  pathway = pathways_of_interest,
  td.comp = all_results[[1]][pathways_of_interest, "oddsratio"],
  pd.comp = all_results[[2]][pathways_of_interest, "oddsratio"],
  fd.comp = all_results[[3]][pathways_of_interest, "oddsratio"],
  cd.comp = all_results[[4]][pathways_of_interest, "oddsratio"],
  td.set = log(all_results[[5]][pathways_of_interest, "oddsratio"], 2),
  pd.set = log(all_results[[6]][pathways_of_interest, "oddsratio"], 2),
  fd.set = log(all_results[[7]][pathways_of_interest, "oddsratio"], 2),
  cd.set = log(all_results[[8]][pathways_of_interest, "oddsratio"], 2),
  td.order = all_results[[9]][pathways_of_interest, "oddsratio"],
  pd.order = all_results[[10]][pathways_of_interest, "oddsratio"],
  fd.order = all_results[[11]][pathways_of_interest, "oddsratio"],
  cd.order = all_results[[12]][pathways_of_interest, "oddsratio"],
  td.corr = all_results[[13]][pathways_of_interest, "oddsratio"],
  pd.corr = all_results[[14]][pathways_of_interest, "oddsratio"],
  fd.corr = all_results[[15]][pathways_of_interest, "oddsratio"],
  cd.corr = all_results[[16]][pathways_of_interest, "oddsratio"]
)

rownames(results.frame) <- results.frame[, 1]
rownames(results.frame.or) <- results.frame.or[, 1]
results.frame.sig <- results.frame[, 2:17] * results.frame.or[, 2:17]

heatmap.2(as.matrix(results.frame.sig[, c(1:4, 9:16)]), Colv = NA, 
          trace = "none", breaks = c(-1, -.1, -0.0001, 0, 0.1, 1), 
          col = c("blue", "lightblue", "grey", "pink", "red"), 
          dendrogram = "none")

## ----figure_3, warning=FALSE, message=FALSE-----------------------------------
# this comparison of abundance in substrates between case and control
#     is lopsided in the sense that phosphorylation levels were previously
#     reported to be overall higher in the short survivors. Thus the
#     results are not terribly interesting (all kinases are in the same 
# direction)
phosphodata.ksea.comp.svl <- leapR::leapR(
  geneset = kinasesubstrates,
  enrichment_method = "enrichment_comparison",
  eset = phset,
  assay_name = "phosphoproteomics",
  primary_columns = shortlist, secondary_columns = longlist
)


# thus for the example we'll look at correlation between known substrates 
#   in the   case v control conditions
phosphodata.ksea.corr.svl <- leapR::leapR(
  geneset = kinasesubstrates,
  enrichment_method = "correlation_comparison",
  eset = phset,
  assay_name = "phosphoproteomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)

# for the example we are using an UNCORRECTED PVALUE
#     which will allow us to plot more values, but
#     for real applications it's necessary to use the
#     CORRECTED PVALUE

# here are all the kinases *significant (*uncorrected) from the analysis
or <- order(phosphodata.ksea.corr.svl[, "pvalue"])
ksea_result <- phosphodata.ksea.corr.svl[or, ][1:9, ]
ksea_cols <- rep("grey", 9)
ksea_cols[which(ksea_result[, "oddsratio"] > 0)] <- "black"

# plot left panel: correlation significance of top most significant kinases
barplot(ksea_result[, "oddsratio"],
  horiz = TRUE, xlim = c(-1, 0.5),
  names.arg = rownames(ksea_result), las = 1, col = ksea_cols
)

# plot right panel: abundance comparison results of the same kinases
barplot(phosphodata.ksea.comp.svl[rownames(ksea_result), "oddsratio"],
  horiz = TRUE, names.arg = rownames(ksea_result), las = 1, col = "black"
)

## ----session------------------------------------------------------------------
sessionInfo()

