% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/auxiliary.R
\name{establish_overlap1d}
\alias{establish_overlap1d}
\title{Establish m:n Mapping Between Peaks from Replicate 1 and 2}
\usage{
establish_overlap1d(
  rep1_df,
  rep2_df,
  ambiguity_resolution_method = c("overlap", "midpoint", "value"),
  max_gap = -1L
)
}
\arguments{
\item{rep1_df}{data frame of observations (i.e., genomic peaks) of
replicate 1, with at least the following columns (position of columns
matter, column names are irrelevant):
\tabular{rll}{
  column 1:  \tab \code{chr} \tab character; genomic location of peak -
  chromosome (e.g., \code{"chr3"})\cr
  column 2:  \tab \code{start} \tab integer; genomic location of peak -
  start coordinate\cr
  column 3:  \tab \code{end} \tab integer; genomic location of peak -
  end coordinate\cr
  column 4:  \tab \code{value} \tab numeric; p-value, FDR, or heuristic used
  to rank the interactions
}}

\item{rep2_df}{data frame of observations (i.e., genomic peaks) of
replicate 2, with the following columns (position of columns
matter, column names are irrelevant):
\tabular{rll}{
  column 1:  \tab \code{chr} \tab character; genomic location of peak -
  chromosome (e.g., \code{"chr3"})\cr
  column 2:  \tab \code{start} \tab integer; genomic location of peak -
  start coordinate\cr
  column 3:  \tab \code{end} \tab integer; genomic location of peak -
  end coordinate\cr
  column 4:  \tab \code{value} \tab numeric; p-value, FDR, or heuristic used
  to rank the interactions
}}

\item{ambiguity_resolution_method}{defines how ambiguous assignments
(when one interaction in replicate 1 overlaps with multiple interactions in
replicate 2 or vice versa)
are resolved. Available methods:
\tabular{rl}{
  \code{"value"} \tab interactions are prioritized by ascending or descending
  \code{value} column (see \code{sorting_direction}), e.g., if two
  interactions in replicate 1 overlap with one interaction in replicate 2,
  the interaction from replicate 1 is chosen which has a lower (if
  \code{sorting_direction} is \code{"ascending"}) or higher (if
  \code{"descending"}) value \cr
  \code{"overlap"} \tab the interaction pair is chosen which has the highest
  relative overlap, i.e., overlap in nucleotides of replicate 1 interaction
  anchor A and replicate 2 interaction anchor A,
  plus replicate 1 interaction anchor B and replicate 2 interaction anchor B,
  normalized by their lengths\cr
  \code{"midpoint"} \tab the interaction pair is chosen which has the
  smallest
  distance between their anchor midpoints, i.e., distance from midpoint of
  replicate 1 interaction anchor A to midpoint of
  replicate 2 interaction anchor A, plus distance from midpoint of
  replicate 1 interaction anchor B to midpoint of
  replicate 2 interaction anchor B
}}

\item{max_gap}{integer; maximum gap in nucleotides allowed between two
anchors for them to be considered as overlapping
(defaults to -1, i.e., overlapping anchors)}
}
\value{
data frame with the following columns:
\tabular{rll}{
  column 1:  \tab \code{rep1_idx} \tab index of interaction in replicate 1
  (i.e., row index in \code{rep1_df})\cr
  column 2:  \tab \code{rep2_idx} \tab index of interaction in replicate 2
  (i.e., row index in \code{rep2_df})\cr
  column 3:  \tab \code{arv} \tab ambiguity resolution value used turn
  m:n mapping into 1:1 mapping. Interaction pairs with lower \code{arv}
  are prioritized.
}
}
\description{
This method returns all overlapping interactions between two replicates.
For each pair of overlapping interactions, the
\emph{ambiguity resolution value} (ARV) is calculated, which helps to reduce
the m:n mapping to a 1:1 mapping. The semantics of the ARV depend on the
specified \code{ambiguity_resolution_method}, but in general interaction
pairs with lower ARVs have priority over interaction pairs with higher ARVs
when the bijective mapping is established.
}
\examples{
rep1_df <- idr2d:::chipseq$rep1_df
rep1_df$value <- preprocess(rep1_df$value, "log_additive_inverse")

rep2_df <- idr2d:::chipseq$rep2_df
rep2_df$value <- preprocess(rep2_df$value, "log_additive_inverse")

# shuffle to break preexisting order
rep1_df <- rep1_df[sample.int(nrow(rep1_df)), ]
rep2_df <- rep2_df[sample.int(nrow(rep2_df)), ]

# sort by value column
rep1_df <- dplyr::arrange(rep1_df, value)
rep2_df <- dplyr::arrange(rep2_df, value)

pairs_df <- establish_overlap1d(rep1_df, rep2_df)

}
