#' Identify SLPs in screen hits
#'
#' Identify whether screen hits are SLPs of mutations deteced in both patients and cell lines, based on
#' predicted SLPs in \code{\link{corr_slp}} and \code{\link{comp_slp}}.
#'
#' @param cell a cell line.
#' @param screen_data a data.table of genomic screen results with three columns, "screen_entrez", "screen_symbol" and "cell_line".
#' @param cell_mut cell line mutation data.
#' @param tumour_slp merged SLPs.
#' @return A data.table.
#'   \describe{
#'     \item{cell_line}{Name of cell lines.}
#'     \item{screen_entrez}{Entrez ids of hits.}
#'     \item{screen_symbol}{Gene symbols of hits.}
#'     \item{mut_entrez}{Entrez ids of mutations.}
#'     \item{mut_symbol}{Gene symbols of mutations.}
#'     \item{is_slp}{Whether the targeted gene is a SLP.}
#'     \item{pvalue}{p_value from \code{\link[RankProd]{RankProducts}}.}
#'     \item{fdr}{"BH" adjusted pvalue via \code{\link[stats]{p.adjust}}.}
#'     \item{im}{The importance value returned by \code{\link{genie3}}.}
#'     \item{dualhit}{Whether the slp is identified by \code{\link{corr_slp}} and \code{\link{comp_slp}}.}
#' }
#' @examples
#' require(future)
#' require(doFuture)
#' plan(multisession, workers = 2)
#' library(magrittr)
#' library(data.table)
#' data(example_compSLP)
#' data(example_corrSLP)
#' merged_res <- merge_slp(example_compSLP, example_corrSLP)
#'
#' #- Toy hits data.
#' screen_1 <- merged_res[, .(slp_entrez, slp_symbol)] %>%
#'     unique %>%
#'     .[sample(nrow(.), round(.8 * nrow(.)))] %>%
#'     setnames(c(1, 2), c("screen_entrez", "screen_symbol")) %>%
#'     .[, cell_line := "cell_1"]
#'
#' screen_2 <- merged_res[, .(slp_entrez, slp_symbol)] %>%
#'     unique %>%
#'     .[sample(nrow(.), round(.8 * nrow(.)))] %>%
#'     setnames(c(1, 2), c("screen_entrez", "screen_symbol")) %>%
#'     .[, cell_line := "cell_2"]
#'
#' screen_hit <- rbind(screen_1, screen_2)
#'
#' #- Toy mutations data.
#' mut_1 <- merged_res[, .(mut_entrez)] %>%
#'     unique %>%
#'     .[sample(nrow(.), round(.8 * nrow(.)))] %>%
#'     .[, cell_line := "cell_1"]
#'
#' mut_2 <- merged_res[, .(mut_entrez)] %>%
#'     unique %>%
#'     .[sample(nrow(.), round(.8 * nrow(.)))] %>%
#'     .[, cell_line := "cell_2"]
#'
#' mut_info <- rbind(mut_1, mut_2)
#'
#' #- Hits that are identified as SLPs.
#' scr_res <- lapply(c("cell_1", "cell_2"), scr_slp, screen_hit, mut_info, merged_res)
#' scr_res[lengths(scr_res) == 0] <- NULL
#' scr_res <- rbindlist(scr_res)
#' plan(sequential)
#' @export
scr_slp <- function(cell, screen_data, cell_mut, tumour_slp) {
  stopifnot(is.data.table(screen_data))

  mut_entrez <- im <- pvalue <- slp_symbol <- is_slp <- cell_line <- NULL

  if (!(cell %in% screen_data$cell_line)) stop("(EE) ", cell, " is not found in RSA data.")
  if (!(cell %in% cell_mut$cell_line)) stop("(EE) ", cell, " is not found in cell line mutation data.")

  cell_mut[, mut_entrez := as.character(mut_entrez)]

  #- Cell sepecific screen data.
  screen_lite <- unique(screen_data[cell_line == cell])
  #- Mutation found in the both SLP prediction data and cell line.
  comm_mut    <- intersect(cell_mut[cell_line == cell, mut_entrez], tumour_slp$mut_entrez)

  if (length(comm_mut) > 0) {
    screen_slp <- merge(screen_lite, tumour_slp[mut_entrez %in% comm_mut], by.x = "screen_entrez", by.y = "slp_entrez", all.x = TRUE) %>%
      .[, is_slp := fifelse(is.na(pvalue) & is.na(im), "NO", "YES")] %>%
      .[, slp_symbol := NULL] %>%
      setcolorder(c("cell_line", "screen_entrez", "screen_symbol", "mut_entrez", "mut_symbol", "is_slp")) %>%
      .[order(-is_slp)]

    return(screen_slp)
  }
}

#' Identify consensus SLPs
#'
#' Identify consensus SLPs based on Cohen's Kappa or hypergeometric test.
#'
#' @param screen_slp screen hits data annotated with SLPs information, generated by \code{\link{scr_slp}}.
#' @param tumour_slp the merged SLPs data predicted by \code{\link{corr_slp}} and \code{\link{comp_slp}}.
#' @details
#'   Consensus SLPs are enriched screen hits that are SLPs of same mutations in different cell lines.
#'   For each common mutation, the SLPs predicted from human tumour data are used as the total sets.
#'   We used either Cohen's Kappa coefficient on a confusion matrix, or Hypergeometric test, to test
#'   the signficance of overlapping of screen hits.
#' @return A data.table.
#'   \describe{
#'     \item{mut_entrez}{Entrez ids of mutations.}
#'     \item{mut_symbol}{Gene symbols of mutations.}
#'     \item{cons_slp_entrez}{Entrez ids of consensus SLPs.}
#'     \item{cons_slp_symbol}{Gene symbols of Consensus SLPs.}
#'     \item{cell_1, cell_2}{From which pair of cell lines the consensus SLPs predicted.}
#'     \item{judgement}{Judgement based on Cohen's Kappa.}
#'     \item{kappa_value}{Cohen's Kappa coefficient}
#'     \item{pvalue}{pvalue for Cohen's Kappa coefficient.}
#'     \item{fdr}{"BH" adjusted pvalue via \code{\link[stats]{p.adjust}}.}
#' }
#' @references
#'   Landis JR, Koch GG (1977) The measurement of observer agreement for categorical data. Biomet-rics, 33: 159-174.
#' @examples
#' #- See the examples in the vignette.
#' if (FALSE) k_res <- cons_slp(scr_res, merged_res)
#' @export
cons_slp <- function(screen_slp, tumour_slp) {
  padj <- pvalue <- cons_slp_entrez <- N <- mut_entrez <- is_slp <- i <- NULL

  screen_lite <- screen_slp[is_slp == "YES"]
  rec_mut     <- screen_lite[, c("cell_line", "mut_entrez")] %>%
    unique %>%
    .[, .N, by = mut_entrez] %>%
    .[N >= 2] %>%
    extract2("mut_entrez")

  if (length(rec_mut) > 0) {
    suppressPackageStartupMessages(
      allres <- foreach(i = rec_mut) %dopar% {
        sub_screen <- screen_lite[mut_entrez == i]
        comb_cell  <- combn(unique(sub_screen$cell_line), 2, simplify = FALSE)
        res        <- lapply(comb_cell, fn_sub_cons_slp, z = i, screen_slp = sub_screen, tumour_slp = tumour_slp)

        res[lengths(res) == 0] <- NULL
        if (length(res) > 0) res <- rbindlist(res)

        return(res)
      })

    allres[lengths(allres) == 0] <- NULL

    if (length(allres) > 0) {
      allres <- rbindlist(allres) %>%
        na.omit %>%
        .[, padj := p.adjust(pvalue, method = "BH")]

      #- Expand the cons_slp_entrez to each SLP per row.
      bycol  <- setdiff(names(allres), "cons_slp_entrez")
      allres <- allres[, .(cons_slp_entrez = unlist(tstrsplit(cons_slp_entrez, "_"))), by = bycol] %>%
        merge(unique(screen_lite[, c("screen_entrez", "screen_symbol")]), by.x = "cons_slp_entrez", by.y = "screen_entrez") %>%
        setnames("screen_symbol", "cons_slp_symbol") %>%
        setcolorder(c("mut_entrez", "mut_symbol", "cons_slp_entrez", "cons_slp_symbol"))
    } else {
      message("(II) No consensus SLPs.")
    }
  } else {
    message("(II) No recurrent mutations among cell lines.")
  }
}

fn_sub_cons_slp <- function(paircell, z, screen_slp, tumour_slp) {
  slp_entrez <- cell_line <- mut_entrez <- screen_entrez <- NULL

  x <- paircell[1]
  y <- paircell[2]

  x_slp <- screen_slp[cell_line == x, screen_entrez]
  y_slp <- screen_slp[cell_line == y, screen_entrez]
  c_slp <- intersect(x_slp, y_slp)

  if (length(c_slp) > 0) {
    t_slp <- tumour_slp[mut_entrez == z, slp_entrez]
    mtx   <- fn_confusion_mtx(x_slp, y_slp, t_slp)
    k_res <- fmsb::Kappa.test(mtx)
    res   <- data.table(mut_entrez      = z,
                        mut_symbol      = unique(screen_slp$mut_symbol),
                        cell_1          = x,
                        cell_2          = y,
                        cons_slp_entrez = paste(c_slp, collapse = "_"),
                        judgement       = k_res$Judgement,
                        kappa_value     = k_res$Result$estimate,
                        pvalue          = k_res$Result$p.value)
    return(res)
  }
}

fn_confusion_mtx <- function(x, y, t) {
  #- Confusion matrix.
  #-    y   noy
  #- x
  #- nox
  nox <- setdiff(t, x)
  noy <- setdiff(t, y)
  n1  <- intersect(x, y) %>% length
  n2  <- intersect(x, noy) %>% length
  n3  <- intersect(nox, y) %>% length
  n4  <- intersect(nox, noy) %>% length
  mtx <- matrix(c(n1, n2, n3, n4), nrow = 2, byrow = TRUE)
  return(mtx)
}
