% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/agglomerate.R,
%   R/splitByRanks.R
\name{agglomerateByRank}
\alias{agglomerateByRank}
\alias{agglomerateByVariable}
\alias{agglomerateByModule}
\alias{agglomerateByRanks}
\alias{unsplitByRanks}
\alias{agglomerate-methods}
\alias{agglomerateByRank,TreeSummarizedExperiment-method}
\alias{agglomerateByRank,SingleCellExperiment-method}
\alias{agglomerateByRank,SummarizedExperiment-method}
\alias{agglomerateByVariable,TreeSummarizedExperiment-method}
\alias{agglomerateByVariable,SummarizedExperiment-method}
\alias{agglomerateByModule,SummarizedExperiment-method}
\alias{agglomerateByRanks,SummarizedExperiment-method}
\alias{agglomerateByRanks,SingleCellExperiment-method}
\alias{agglomerateByRanks,TreeSummarizedExperiment-method}
\alias{splitByRanks}
\alias{unsplitByRanks,SingleCellExperiment-method}
\alias{unsplitByRanks,TreeSummarizedExperiment-method}
\title{Agglomerate data using taxonomic information or other grouping}
\usage{
agglomerateByRank(x, ...)

agglomerateByVariable(x, ...)

agglomerateByModule(x, ...)

agglomerateByRanks(x, ...)

unsplitByRanks(x, ...)

\S4method{agglomerateByRank}{TreeSummarizedExperiment}(
  x,
  rank = taxonomyRanks(x)[1],
  update.tree = agglomerateTree,
  agglomerate.tree = agglomerateTree,
  agglomerateTree = TRUE,
  ...
)

\S4method{agglomerateByRank}{SingleCellExperiment}(
  x,
  rank = taxonomyRanks(x)[1],
  altexp = NULL,
  altexp.rm = strip_altexp,
  strip_altexp = TRUE,
  ...
)

\S4method{agglomerateByRank}{SummarizedExperiment}(
  x,
  rank = taxonomyRanks(x)[1],
  empty.rm = TRUE,
  empty.fields = c(NA, "", " ", "\\t", "-", "_"),
  ...
)

\S4method{agglomerateByVariable}{TreeSummarizedExperiment}(
  x,
  by,
  group = f,
  f,
  update.tree = mergeTree,
  mergeTree = TRUE,
  ...
)

\S4method{agglomerateByVariable}{SummarizedExperiment}(x, by, group = f, f, ...)

\S4method{agglomerateByModule}{SummarizedExperiment}(x, by, group, na.rm = FALSE)

\S4method{agglomerateByRanks}{SummarizedExperiment}(
  x,
  ranks = taxonomyRanks(x),
  na.rm = TRUE,
  as.list = FALSE,
  ...
)

\S4method{agglomerateByRanks}{SingleCellExperiment}(
  x,
  ranks = taxonomyRanks(x),
  na.rm = TRUE,
  as.list = FALSE,
  ...
)

\S4method{agglomerateByRanks}{TreeSummarizedExperiment}(
  x,
  ranks = taxonomyRanks(x),
  na.rm = TRUE,
  as.list = FALSE,
  ...
)

splitByRanks(x, ...)

\S4method{unsplitByRanks}{SingleCellExperiment}(
  x,
  ranks = taxonomyRanks(x),
  keep.dimred = keep_reducedDims,
  keep_reducedDims = FALSE,
  ...
)

\S4method{unsplitByRanks}{TreeSummarizedExperiment}(
  x,
  ranks = taxonomyRanks(x),
  keep.dimred = keep_reducedDims,
  keep_reducedDims = FALSE,
  ...
)
}
\arguments{
\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.}

\item{...}{arguments passed to \code{agglomerateByRank} function for
\code{SummarizedExperiment} objects and other functions.
See \code{\link[=agglomerate-methods]{agglomerateByRank}} for more details.}

\item{rank}{\code{Character scalar}. Defines a taxonomic rank. Must be a
value of \code{taxonomyRanks()} function.}

\item{update.tree}{\code{Logical scalar}. Should
\code{rowTree()} also be merged? (Default: \code{TRUE})}

\item{agglomerate.tree}{Deprecated. Use \code{update.tree} instead.}

\item{agglomerateTree}{Deprecated. Use \code{update.tree} instead.}

\item{altexp}{\code{Character scalar} or \code{integer scalar}.
Specifies an alternative experiment containing the input data.}

\item{altexp.rm}{\code{Logical scalar}. Should alternative
experiments be removed prior to agglomeration? This prevents too many
nested alternative experiments by default. (Default:
\code{TRUE})}

\item{strip_altexp}{Deprecated. Use \code{altexp.rm} instead.}

\item{empty.rm}{\code{Logical scalar}. Defines whether rows including
\code{empty.fields} in specified \code{rank} will be excluded.
(Default: \code{TRUE})}

\item{empty.fields}{\code{Character vector}. Defines which values should be
regarded as empty. (Default: \code{c(NA, "", " ", "\t")}). They will be
removed if \code{na.rm = TRUE} before agglomeration.}

\item{by}{\code{Character scalar}. Determines if data is merged
row-wise / for features ('rows') or column-wise / for samples ('cols').
Must be \code{'rows'} or \code{'cols'}.}

\item{group}{\code{Character scalar}, \code{character vector} or
\code{factor vector}. A column name from \code{rowData(x)} or
\code{colData(x)} or alternatively a vector specifying how the merging is
performed. If vector, the value must be the same length as
\code{nrow(x)/ncol(x)}. Rows or columns corresponding to the same level will
be merged. If \code{length(levels(group)) == nrow(x)/ncol(x)}, \code{x} will
be returned unchanged. For \code{agglomerateByModule}, \code{group} should
specify one or several names of logical or numeric binary variables from the
\code{rowData(x)/colData(x)} by which to agglomerate rows or columns.}

\item{f}{Deprecated. Use \code{group} instead.}

\item{mergeTree}{Deprecated. Use \code{update.tree} instead.}

\item{na.rm}{\code{Logical scalar}. Should NA values be omitted?
(Default: \code{TRUE})}

\item{ranks}{\code{Character vector}. Defines taxonomic ranks. Must all be
values of \code{taxonomyRanks()} function.}

\item{as.list}{\code{Logical scalar}. Should the list of
\code{SummarizedExperiment} objects be returned by the function
\code{agglomerateByRanks} as a SimpleList or stored in altExps?
(Default: \code{FALSE})}

\item{keep.dimred}{\code{Logical scalar}. Should the
\code{reducedDims(x)} be transferred to the result? Please note, that this
breaks the link between the data used to calculate the reduced dims.
(Default: \code{FALSE})}

\item{keep_reducedDims}{Deprecated. Use \code{keep.dimred} instead.}
}
\value{
\code{agglomerateByRank} returns a taxonomically-agglomerated,
optionally-pruned object of the same class as \code{x}.
\code{agglomerateByVariable} and \code{agglomerateByModule} return an object
of the same class as \code{x} with the specified entries merged into one
entry in all relevant components.

For \code{agglomerateByRanks}:
If \code{as.list = TRUE} : \code{SummarizedExperiment} objects in a
\code{SimpleList}
If \code{as.list = FALSE} : The \code{SummarizedExperiment} passed as a
parameter and now containing the \code{SummarizedExperiment} objects in its
altExps

For \code{unsplitByRanks}: \code{x}, with \code{rowData} and \code{assay}
data replaced by the unsplit data. \code{colData} of x is kept as well
and any existing \code{rowTree} is dropped as well, since existing
\code{rowLinks} are not valid anymore.
}
\description{
Agglomeration functions can be used to sum-up data based on specific criteria
such as taxonomic ranks, variables or prevalence.

\code{agglomerateByRank} can be used to sum up data based on associations
with certain taxonomic ranks, as defined in \code{rowData}. Only available
\code{\link{taxonomyRanks}} can be used.

\code{agglomerateByVariable} and \code{agglomerateByModule} merge data on
rows or columns of a \code{SummarizedExperiment} as defined by a
\code{factor} alongside the chosen dimension. This function allows
agglomeration of data based on other variables than taxonomy ranks.
Metadata from the \code{rowData} or \code{colData} are
retained as defined by \code{archetype}.
\code{\link[SummarizedExperiment:SummarizedExperiment-class]{assay}} are
agglomerated, i.e. summed up. If the assay contains values other than counts
or absolute values, this can lead to meaningless values being produced.

\code{agglomerateByRanks} takes a \code{SummarizedExperiment}, splits it
along the
taxonomic ranks, aggregates the data per rank, converts the input to a
\code{SingleCellExperiment} objects and stores the aggregated data as
alternative experiments. \code{unsplitByRanks} takes these alternative
experiments and flattens them again into a single
\code{SummarizedExperiment}.
}
\details{
Agglomeration sums up the values of assays at the specified taxonomic level.
With certain assays, e.g. those that include binary or negative values, this
summing can produce meaningless values. In those cases, consider performing
agglomeration first, and then applying the transformation afterwards.

\code{agglomerateByVariable} works similarly to
\code{\link[scuttle:sumCountsAcrossFeatures]{sumCountsAcrossFeatures}}.
However, additional support for \code{TreeSummarizedExperiment} was added and
science field agnostic names were used. In addition the \code{archetype}
argument lets the user select how to preserve row or column data. For merge
data of assays the function from \code{scuttle} are used.

\code{agglomerateByModule} allows to agglomerate features or samples based
on one or multiple variables of logical or numeric binary (0/1) type. It is
particularly useful for agglomerating by taxonomic or functional modules,
each defined by a logical or binary variable in the \code{rowData}, as
features can belong to several modules.

\code{agglomerateByRanks} will use by default all available taxonomic ranks,
but
this can be controlled by setting \code{ranks} manually. \code{NA} values
are removed by default, since they would not make sense, if the result
should be used for \code{unsplitByRanks} at some point. The input data
remains unchanged in the returned \code{SingleCellExperiment} objects.

\code{unsplitByRanks} will remove any \code{NA} value on each taxonomic rank
so that no ambiguous data is created. In additional, a column
\code{taxonomicLevel} is created or overwritten in the \code{rowData} to
specify from which alternative experiment this originates from. This can also
be used for \code{\link[SingleCellExperiment:splitAltExps]{splitAltExps}} to
split the result along the same factor again. The input data from the base
objects is not returned, only the data from the \code{altExp()}. Be aware
that
changes to \code{rowData} of the base object are not returned, whereas only
the \code{colData} of the base object is kept.
}
\examples{

### Agglomerate data based on taxonomic information

data(GlobalPatterns)
# print the available taxonomic ranks
colnames(rowData(GlobalPatterns))
taxonomyRanks(GlobalPatterns)

# agglomerate at the Family taxonomic rank
x1 <- agglomerateByRank(GlobalPatterns, rank="Family")
## How many taxa before/after agglomeration?
nrow(GlobalPatterns)
nrow(x1)

# Do not agglomerate the tree
x2 <- agglomerateByRank(
    GlobalPatterns, rank="Family", update.tree = FALSE)
nrow(x2) # same number of rows, but
rowTree(x1) # ... different
rowTree(x2) # ... tree

# If assay contains binary or negative values, summing might lead to
# meaningless values, and you will get a warning. In these cases, you might
# want to do agglomeration again at chosen taxonomic level.
tse <- transformAssay(GlobalPatterns, method = "pa")
tse <- agglomerateByRank(tse, rank = "Genus")
tse <- transformAssay(tse, method = "pa")

# Removing empty labels by setting empty.rm = TRUE
sum(is.na(rowData(GlobalPatterns)$Family))
x3 <- agglomerateByRank(GlobalPatterns, rank="Family", empty.rm = TRUE)
nrow(x3) # different from x2

# Because all the rownames are from the same rank, rownames do not include
# prefixes, in this case "Family:".
print(rownames(x3[1:3,]))

# To add them, use getTaxonomyLabels function.
rownames(x3) <- getTaxonomyLabels(x3, with.rank = TRUE)
print(rownames(x3[1:3,]))

# use 'empty.ranks.rm' to remove columns that include only NAs
x4 <- agglomerateByRank(
    GlobalPatterns, rank="Phylum", empty.ranks.rm = TRUE)
head(rowData(x4))

# If the assay contains NAs, you might want to specify na.rm=TRUE,
# since summing-up NAs lead to NA
x5 <- GlobalPatterns
# Replace first value with NA
assay(x5)[1,1] <- NA
x6 <- agglomerateByRank(x5, "Kingdom")
head( assay(x6) )
# Use na.rm=TRUE
x6 <- agglomerateByRank(x5, "Kingdom", na.rm = TRUE)
head( assay(x6) )

## Look at enterotype dataset...
data(enterotype)
## Print the available taxonomic ranks. Shows only 1 available rank,
## not useful for agglomerateByRank
taxonomyRanks(enterotype)

### Merge TreeSummarizedExperiments on rows and columns

data(esophagus)
esophagus
plot(rowTree(esophagus))
# Get a factor for merging
f <- factor(regmatches(rownames(esophagus),
    regexpr("^[0-9]*_[0-9]*",rownames(esophagus))))
merged <- agglomerateByVariable(
    esophagus, by = "rows", f, update.tree = TRUE)
plot(rowTree(merged))
#
data(GlobalPatterns)
GlobalPatterns
merged <- agglomerateByVariable(
    GlobalPatterns, by = "cols", colData(GlobalPatterns)$SampleType)
merged

## Agglomerate by multiple modules

# Generate 30 random modules
N_module <- 30L
modules <- sample(
    c(TRUE, FALSE),
    size = nrow(tse) * N_module,
    prob = c(0.2, 0.8),
    replace = TRUE
)

# Convert modules to matrix
modules <- matrix(modules, nrow = nrow(tse))

# Add module names as colnames
colnames(modules) <- paste0("module_", seq_len(ncol(modules)))

# Add modules to rowData
rowData(tse) <- cbind(rowData(tse), modules)

# Extract module columns
module_columns <- grep("module_", colnames(rowData(tse)), value = TRUE)

# Agglomerate based on modules
tse_module <- agglomerateByModule(tse, by = 1, group = module_columns)

# Optionally, store results into altExp slot
altExp(tse, "modules") <- tse_module

data(GlobalPatterns)
# print the available taxonomic ranks
taxonomyRanks(GlobalPatterns)

# agglomerateByRanks
# 
tse <- agglomerateByRanks(GlobalPatterns)
altExps(tse)
altExp(tse,"Kingdom")
altExp(tse,"Species")

# unsplitByRanks
tse <- unsplitByRanks(tse)
tse

}
\seealso{
\code{\link[=splitOn]{splitOn}}
\code{\link[=unsplitOn]{unsplitOn}}
\code{\link[=agglomerate-methods]{agglomerateByVariable}},
\code{\link[scuttle:sumCountsAcrossFeatures]{sumCountsAcrossFeatures}},
\code{\link[=agglomerate-methods]{agglomerateByRank}},
\code{\link[SingleCellExperiment:altExps]{altExps}},
\code{\link[SingleCellExperiment:splitAltExps]{splitAltExps}}
}
