% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/clusteringbase.R
\docType{data}
\name{geva.cluster}
\alias{geva.cluster}
\alias{options.cluster.method}
\alias{options.cl.score.method}
\alias{options.distance}
\title{GEVA Cluster Analysis}

\usage{
geva.cluster(
  sv,
  cluster.method = options.cluster.method,
  cl.score.method = options.cl.score.method,
  resolution = 0.3,
  distance.method = options.distance,
  ...,
  grouped.return = FALSE
)

options.cluster.method
# c("hierarchical", "density", "quantiles")

options.cl.score.method
# c("auto", "hclust.height", "density", "centroid")

options.distance
# c("euclidean", "manhattan")
}
\arguments{
\item{sv}{a \code{numeric} \code{\linkS4class{SVTable}} object (usually \code{\linkS4class{GEVASummary}})}

\item{cluster.method}{\code{character}, one of the main grouping methods (see `Details')}

\item{cl.score.method}{\code{character}, method used to calculate the cluster scores for each point. Ignored if \code{cluster.method} is \code{quantiles}}

\item{resolution}{\code{numeric} (\code{0} to \code{1}), used as a "zoom" parameter for cluster detection. A zero value returns the minimum number of clusters that can detected by the \code{cluster.method}, while \code{1} returns the maximum amount of clusters. Ignored if \code{cluster.method} is \code{quantiles}}

\item{distance.method}{\code{character}, two-point distance calculation method. Options are \code{"eucludian"} or \code{"manhattan"} distances}

\item{...}{further arguments passed to \code{\link[=geva.dcluster]{geva.dcluster()}}, \code{\link[=geva.hcluster]{geva.hcluster()}}, or \code{\link[=geva.quantiles]{geva.quantiles()}}.
\cr In addition, the following arguments are accepted:
\itemize{
\item{\code{eps} : \code{numeric}, defines the \emph{epsilon} coefficient for density clustering (see 'Details')}
\item{\code{mink.p} : \code{numeric}, parameter for the Minkowsky metric used in hierarchial clustering. Used as the \code{p} argument for \code{\link[fastcluster:hclust.vector]{fastcluster::hclust.vector()}}}
\item{\code{verbose} : \code{logical}, whether to print the current progress (default is \code{TRUE})}
}}

\item{grouped.return}{\code{logical}, whether to concatenate the clustered and summarized data into a single object}
}
\value{
This function produces a \code{\linkS4class{GEVAGroupSet}}-derived object, particularly a \code{\linkS4class{GEVACluster}} for the \code{"hierarchical"} and \code{"density"} cluster methods or a \code{\linkS4class{GEVAQuantiles}} for the \code{"quantiles"} method.

However, if \code{grouped.return} is \code{TRUE} and \code{sv} is a \code{\linkS4class{GEVASummary}} object, the produced \code{GEVAGroupSet} data will be concatenated to the input and returned as a \code{\linkS4class{GEVAGroupedSummary}}
}
\description{
Performs a cluster analysis from summarized data.
}
\details{
The \code{cluster.method} determines which grouping subroutine is used to classify the summarized data points based on distance and partitioning. Each option has their equivalent functions that can be called directly: \code{"density"} uses \code{\link[=geva.dcluster]{geva.dcluster()}}; \code{"hierarchical"} uses \code{\link[=geva.hcluster]{geva.hcluster()}}; and \code{"quantiles"} calls \code{\link[=geva.quantiles]{geva.quantiles()}}. However, this wrapper function can also be used to join \code{GEVASummary} and \code{GEVAGroupSet} objects into a single \code{GEVAGroupedSummary} object by setting \code{grouped.return} to \code{TRUE}.

The \code{cl.score.method} argument defines how scores are calculated for each SV point (row in \code{sv}) that was assigned to a cluster, (\emph{i.e.}, excluding non-clustered points). If specified as \code{"auto"}, the parameter will be selected based on the \code{cluster.method}: \code{"density"} (rate of neighbor points) for the density method; and \code{"hclust.height"} (local hierarchy height) for the hierarchical method. The \code{"centroid"} method calculates the scores based on the proportional distance between each point to its cluster's centroid. Note that the \code{cl.score.method} argument is ignored if \code{cluster.method} is \code{"quantiles"}, since quantile scores are always based on their local centroid distances.

The \code{resolution} value is a more accessible way to define the cluster separation threshold used in density and hierarchical clustering methods. Density clusters uses an \emph{epsilon} value that represents the minimum distance of separation, whereas hierarchical clusters are defined by cutting the hierarchy tree wherever there is a minimum distance between two hierarchies. In this sense, \code{resolution} translates a value between \code{0} and \code{1} to propotional value for \emph{epsilon} or hierarchical height (depending on the \code{cluster.method}) that would result in the least number of possible clusters for \code{0} and the highest number for \code{1}. Nevertheless, if \emph{epsilon} is specified as \code{eps} in the optinal arguments, its value is used and \code{resolution} is ignored.
}
\examples{
## Cluster analysis from a randomly generated input 

# Preparing the data
ginput <- geva.ideal.example()      # Generates a random input example
gsummary <- geva.summarize(ginput)  # Summarizes with the default parameters

# Hierarchical clustering
gclust <- geva.cluster(gsummary, cluster.method="hierarchical")
plot(gclust)

# Density clustering
gclust <- geva.cluster(gsummary, cluster.method="density")
plot(gclust)

# Density clustering with slightly more resolution
gclust <- geva.cluster(gsummary,
                       cluster.method="density",
                       resolution=0.35)
plot(gclust)

}
\seealso{
Other geva.cluster: 
\code{\link{geva.dcluster}()},
\code{\link{geva.hcluster}()},
\code{\link{geva.quantiles}()}
}
\concept{geva.cluster}
\keyword{datasets}
