% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/methods-matchRanges.R
\name{matchRanges}
\alias{matchRanges}
\alias{matchRanges,DF_OR_df_OR_dt,DF_OR_df_OR_dt,formula,character_OR_missing,logical_OR_missing-method}
\alias{matchRanges,GRanges,GRanges,formula,character_OR_missing,logical_OR_missing-method}
\alias{matchRanges,GInteractions,GInteractions,formula,character_OR_missing,logical_OR_missing-method}
\title{Generate a covariate-matched control set of ranges}
\usage{
matchRanges(focal, pool, covar, method = "nearest", replace = TRUE, ...)

\S4method{matchRanges}{DF_OR_df_OR_dt,DF_OR_df_OR_dt,formula,character_OR_missing,logical_OR_missing}(focal, pool, covar, method, replace)

\S4method{matchRanges}{GRanges,GRanges,formula,character_OR_missing,logical_OR_missing}(focal, pool, covar, method, replace)

\S4method{matchRanges}{GInteractions,GInteractions,formula,character_OR_missing,logical_OR_missing}(focal, pool, covar, method, replace)
}
\arguments{
\item{focal}{A DataFrame, GRanges, or GInteractions object containing
the focal data to match.}

\item{pool}{A DataFrame, GRanges, or GInteractions object containing
the pool from which to select matches.}

\item{covar}{A rhs formula with covariates on which to match.}

\item{method}{A character describing which matching method to use.
supported options are either 'nearest', 'rejection', or 'stratified'.}

\item{replace}{TRUE/FALSE describing whether to select matches with or without
replacement.}

\item{...}{Additional arguments.}
}
\value{
A covariate-matched control set of data.
}
\description{
\code{matchRanges()} uses a propensity score-based method to
generate a covariate-matched control set of DataFrame,
GRanges, or GInteractions objects.
}
\details{
Available inputs for \code{focal} and \code{pool} include \code{data.frame},
\code{data.table}, \code{DataFrame}, \code{GRanges}, or \code{GInteractions}.
\code{data.frame} and \code{data.table} inputs are coerced to \code{DataFrame}
objects and returned as \code{MatchedDataFrame} while \code{GRanges} and
\code{GInteractions} objects are returned as \code{MatchedGRanges} or
\code{MatchedGInteractions}, respectively.
}
\section{Methodology}{

\code{matchRanges} uses
\href{https://en.wikipedia.org/wiki/Propensity_score_matching}{propensity scores}
to perform subset selection on the \code{pool} set such that the resulting \code{matched}
set contains similar distributions of covariates to that of the \code{focal} set.
A propensity score is the conditional probability of assigning an element
(in our case, a genomic range) to a particular outcome (\code{Y}) given a set of
covariates. Propensity scores are estimated using a logistic regression model
where the outcome \code{Y=1} for \code{focal} and \code{Y=0} for \code{pool}, over the provided
covariates \code{covar}.
}

\section{Matching methods}{

\itemize{
\item \code{method = 'nearest'}: Nearest neighbor matching
with replacement. Finds the nearest neighbor by using a
rolling join with \code{data.table}. Matching without replacement
is not currently supported.
\item \code{method = 'rejection'}: (Default) Rejection sampling
with or without replacement. Uses a probability-based approach
to select options in the \code{pool} that match the \code{focal} distribition.
\item \code{method = 'stratified'}: Iterative stratified sampling
with or without replacement. Bins \code{focal} and \code{pool} propensity
scores by value and selects matches within bins until all \code{focal}
items have a corresponding match in \code{pool}.
}
}

\examples{
## Match with DataFrame
set.seed(123)
x <- makeExampleMatchedDataSet(type = 'DataFrame')
matchRanges(focal = x[x$feature1,],
            pool = x[!x$feature1,],
            covar = ~feature2 + feature3)

## Match with GRanges
set.seed(123)
x <- makeExampleMatchedDataSet(type = "GRanges")
matchRanges(focal = x[x$feature1,],
            pool = x[!x$feature1,],
            covar = ~feature2 + feature3)

## Match with GInteractions
set.seed(123)
x <- makeExampleMatchedDataSet(type = "GInteractions")
matchRanges(focal = x[x$feature1,],
            pool = x[!x$feature1,],
            covar = ~feature2 + feature3)

## Nearest neighbor matching with replacement
set.seed(123)
x <- makeExampleMatchedDataSet(type = 'DataFrame')
matchRanges(focal = x[x$feature1,],
            pool = x[!x$feature1,],
            covar = ~feature2 + feature3,
            method = 'nearest',
            replace = TRUE)

## Rejection sampling without replacement
set.seed(123)
x <- makeExampleMatchedDataSet(type = 'DataFrame')
matchRanges(focal = x[x$feature1,],
            pool = x[!x$feature1,],
            covar = ~feature2 + feature3,
            method = 'rejection',
            replace = FALSE)

## Stratified sampling without replacement
set.seed(123)
x <- makeExampleMatchedDataSet(type = 'DataFrame')
matchRanges(focal = x[x$feature1,],
            pool = x[!x$feature1,],
            covar = ~feature2 + feature3,
            method = 'stratified',
            replace = FALSE)

}
\references{
matchRanges manuscript:

Eric S. Davis, Wancen Mu, Stuart Lee, Mikhail G. Dozmorov,
Michael I. Love, Douglas H. Phanstiel. 2023.
"matchRanges: Generating null hypothesis genomic ranges
via covariate-matched sampling."
Bioinformatics. doi: 10.1093/bioinformatics/btad197
}
