#' Define the consensus dicercall for each sRNA cluster
#'
#' @description The sRNA dicercall represents the length in nucleotides of the 
#' most abundant sRNA sequence within a cluster. The function calculates the 
#' consensus dicercall classification.
#'
#' @details
#' For each sample, the alignment/clustering step predicted the sRNA 
#' dicercall for each cluster. This value is stored in the columns starting 
#' with "DicerCall_".  This value represents the length of nucleotides of 
#' the most abundant sRNA within the cluster. For some clusters, there is no 
#' particular sRNA which is more abundant than another, hence, it is stated as 
#' "NA" or "N", which is referred to as unclassified. The `RNAdicercall()` 
#' function calculate the consensus dicercall for each sRNA cluster based on 
#' the values across replicates. There are several parameters which will alter
#' the output, including the handling of ties and the method to draw the 
#' consensus from. 
#'  
#' When ties.method = "random", as per default, ties are broken at random. 
#' In this case, the determination of a tie assumes that the entries are 
#' probabilities: there is a relative tolerance of 1e-5, relative to the 
#' largest (in magnitude, omitting infinity) entry in the row.
#' 
#' When ties.method = "exclude", ties between sRNA classification are ruled as 
#' unclassified ("N"). However, when there is a tie between the choice of a 
#' class or unclassified result the `exclude` option will always select the 
#' class choice over the unclassified result.
#'  
#' If users are working with a chimeric system, utilise the `chimeric=TRUE`
#' parameter and state `genome.ID` and `controls` parameter variables. This will
#' remove any potential mapping errors which could introduce false interpretation. 
#' 
#' To remove excess data noise, `tidy=TRUE` can be used to removed unclassified 
#' ("N") sRNA clusters, resulting in a reduced data set size. 
#' 
#' @param data data.frame; originally generated by [mobileRNA::RNAimport()].
#'
#' @param conditions character; vector containing sample replicate names. When 
#' supplied, the data from the named replicates will be the only ones used to 
#' calculate the dicercall consensus for each sRNA cluster. Each 
#' string should represent a sample name present in the dataframe supplied to
#' the `data` argument.
#'
#' @param tidy logical; tidy-up data by removing sRNA clusters with an unknown 
#' or unclassified result. Default setting \code{tidy=FALSE}, removes excess 
#' background noise. 
#'  
#' @param ties.method character; string specifying how ties are handled, choose
#' either "exclude" or "random". When using `random`, if there is a tie one of 
#' the classes will be chosen at random. While, when using `exclude` if there is 
#' a tie the class is set to undefined, however, if there is a tie between a 
#' undefined and a known class, the known class takes president (eg 3x24-nt and
#' 3xN-nt, then it will be classed as 24nt). Default setting 
#' `ties.method="exclude"`. 
#' 
#' 
#'@param chimeric logical; state whether the system is chimeric and contains 
#'multiple genomes/genotypes. 
#'
#'@param controls character; vector of control condition sample names. 
#'
#'@param genome.ID character; chromosome identifier of the genome representing 
#'either the origin of mobile molecules or the other genome in the chimeric 
#'system.
#'
#'@return The original input data with two additional columns appended known as
#'`DicerCounts` and `DicerConsensus`. The `DicerCounts` column stores the number 
#'of replicates that contributed to defining the consensus dicer-derived sRNA 
#'class. Note that when utilising the `exclude` ties methods, the `DicerCounts` 
#'will be represented as 0 when a tie is identified. While, the `DicerConsensus`
#'stores the consensus dicercall. 
#' 
#'
#'
#' @examples
#'  # load data 
#'  data("sRNA_data")
#'
#' # define consensus sRNA classes.
#' conditions <- c("heterograft_1", "heterograft_2", "heterograft_3")
#'
#' # Run function to define sRNA class for each cluster.
#' sRNA_data_dicercall <- RNAdicercall(data = sRNA_data,
#'                                   conditions = conditions,
#'                                   tidy=TRUE)
#'
#' @export
#' @importFrom dplyr %>%
#' @importFrom dplyr mutate
#' @importFrom dplyr filter
#' @importFrom tidyr replace_na
RNAdicercall <- function(data, conditions = NULL, ties.method = NULL, 
                         tidy = FALSE, chimeric = FALSE, controls = NULL, 
                         genome.ID = NULL) {
  if (base::missing(data)) {
    stop("data is missing. data must be an object of class matrix, data.frame, 
         DataFrame")
  }
  if (!base::inherits(data, c("matrix", "data.frame", "DataFrame"))) {
    stop("data must be an object of class matrix, data.frame, DataFrame")
  }
  if(is.null(ties.method)){
    ties.method <-  "exclude"
  }
  data[is.na(data)] <- "N"
  
  # remove mapping errors:
  if(chimeric){
    data <- .remove_mapping_errors_V2(data = data,controls = controls, 
                                      genome.ID = genome.ID)
  }
  
  class_colnames <- colnames(data)[grep("DicerCall_", colnames(data))]
  
  if (!is.null(conditions)) {
    message("Calculating consensus dicercall based on the select replicates...")
    onlyconditions <- base::unique(grep(paste(conditions, collapse = "|"), 
                                        class_colnames, value = TRUE))
  }
  else if (is.null(conditions)) {
    message("Calculating consensus dicercall based on all replicates...")
    onlyconditions <- class_colnames
  }
  # unique values across the dicer call columns 
  unique_vals <- unique(unlist(data[onlyconditions]))
  # rowsum of columsn. 
  for (value in unique_vals) {
    add_col <- paste0("nt_", value)
    data[add_col] <- rowSums(data[onlyconditions] == value)
  }
  
  # search columns based on location 
  t <- grep("^nt", base::names(data))
  
  if (ties.method == "random"){
    message("---The consensus dicercall will be choose at random in the case of a tie.")
    new_df <- data 
    new_df$DicerCounts <- apply(new_df[t], 1, max)
    new_df <- new_df %>% 
      dplyr::mutate(DicerConsensus = base::names(data)[t]
                    [max.col(data[t], ties.method = "random")* NA^(
                      rowSums(data[t]) ==0)]) %>%
      dplyr::mutate(DicerConsensus = tidyr::replace_na(DicerConsensus, "N")) 
    
  } else 
    if(ties.method == "exclude"){
    message("---The consensus dicercall will be excluded in the case of a tie.") 
      new_df <- data
      # Initialize result vector
      result <- vector("character", nrow(new_df))
      result[] <- "N"
      dicer_counts <- vector("character", nrow(new_df))
      dicer_counts[] <- "N"
      
      # For loop to check for two matching non-zero numbers within the same row
      for (i in seq_len(nrow(new_df)) ) {
        row_values <- new_df[t][i, ]
        if(rowSums(row_values) == 0){ # if no class across reps:
          classification <- "N"
          dicer_counts_val <- length(onlyconditions)
        } 
          non_zero_values <- as.numeric(row_values[row_values != 0])
          values_table <- table(non_zero_values)
          max_value <- max(non_zero_values)
          # Check if the maximum value is duplicated
          if (!is.na(values_table[max_value]) && values_table[max_value] > 1) {
            dicer_counts_val <- 0
            classification <- "N"
          } else {
            numeric_class <- names(row_values[, row_values[1, ] > 0, 
                                              drop = FALSE])
            max_value <- max(unlist(row_values))
            if(length(numeric_class) ==  1 ){ 
              # if there is only one class
              # and if it is the max acros other results
              classification <- numeric_class
              count_max <- max.col(row_values)
              dicer_counts_val<- as.numeric(row_values[,count_max])
            } else
              if(sum(unlist(row_values) == max_value) == 1){
                count_max <- max.col(row_values)
                classification <- colnames(row_values)[count_max]
                dicer_counts_val<- as.numeric(row_values[,count_max])
              } else 
            if(length(names(row_values[, row_values[1, ] > 0]))>= 2){
              if(sum(nchar(numeric_class)) > 10){
                # if tie between two different classes
                dicer_counts_val <- 0
                classification <- "N"
              } else {
                if(sum(nchar(numeric_class)) < 10){
                  # if tie with class and unclassed
                  index_longest <- which.max(nchar(numeric_class))
                  classification <- numeric_class[index_longest]
                  dicer_counts_val <- row_values[1, classification]
                }
              }
            } 
          }
        result[i] <- classification
        dicer_counts[i] <- dicer_counts_val
      }
      new_df$DicerCounts <- as.numeric(dicer_counts)
      new_df$DicerConsensus <- result
    } 
  # remove calulation columns 
   new_df <- new_df[, !grepl("^nt_", colnames(new_df))]
  # remove nt from output values
  new_df$DicerConsensus <- gsub("^nt_", "", new_df$DicerConsensus)
  
  if (tidy) {
    message("---Removing sRNA clusters with no consensus dicercall")
    new_df_tidy <- new_df %>% dplyr::filter(DicerConsensus != "N")
    return(new_df_tidy)
  }
  message("Complete!")
    return(new_df)
}

