% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gsvaNewAPI.R
\name{deduplicateGeneSets}
\alias{deduplicateGeneSets}
\title{Handling of Duplicated Gene Set Names}
\usage{
deduplicateGeneSets(
  geneSets,
  deduplUse = c("first", "drop", "union", "smallest", "largest")
)
}
\arguments{
\item{geneSets}{A named list of gene sets represented as character vectors
of gene IDs as e.g. returned by \code{\link{readGMT}}.}

\item{deduplUse}{A character vector of length 1 specifying one of several
methods to handle duplicated gene set names.
Duplicated gene set names are explicitly forbidden by the
\href{https://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats}{GMT file format specification}
but can nevertheless be encountered in the wild.
The available choices are:
\itemize{
\item \code{first} (the default): drops all gene sets whose names are duplicated
according to the base R function and retains only the first occurence of a
gene set name.
\item \code{drop}:  removes \emph{all} gene sets that have a duplicated name, including its
first occurrence.
\item \code{union}: replaces gene sets with duplicated names by a single gene set
containing the union of all their gene IDs.
\item \code{smallest}: drops gene sets with duplicated names and retains only the
smallest of them, i.e. the one with the fewest gene IDs.  If there are
several smallest gene sets, the first will be selected.
\item \code{largest}: drops gene sets with duplicated names and retains only the
largest of them, i.e. the one with the most gene IDs.  If there are
several largest gene sets, the first will be selected.
}}
}
\value{
A named list of gene sets represented as character vectors of
gene IDs.
}
\description{
Offers a choice of ways for handling duplicated gene set names
that may not be suitable as input to other gene set analysis functions.
}
