% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bsfind.R
\name{BSFind}
\alias{BSFind}
\title{RBP binding site definition for iCLIP data}
\usage{
BSFind(
  object,
  bsSize = NULL,
  cutoff.geneWiseFilter = NULL,
  cutoff.globalFilter = 0.01,
  est.bsResolution = "medium",
  est.geneResolution = "medium",
  est.maxBsWidth = 13,
  est.minimumStepGain = 0.02,
  est.maxSites = Inf,
  est.subsetChromosome = "chr1",
  est.minWidth = 2,
  est.offset = 1,
  est.sensitive = FALSE,
  est.sensitive.size = 5,
  est.sensitive.minWidth = 2,
  merge.minWidth = 2,
  merge.minCrosslinks = 2,
  merge.minClSites = 1,
  merge.CenterIsClSite = TRUE,
  merge.CenterIsSummit = TRUE,
  repro.cutoff = NULL,
  repro.nReps = NULL,
  repro.minCrosslinks = 1,
  overlaps.geneWiseFilter = "keepSingle",
  overlaps.geneAssignment = "frequency",
  overlaps.rule.geneAssignment = NULL,
  overlaps.TranscriptRegions = "frequency",
  overlaps.rule.TranscriptRegions = NULL,
  stf.flank = "bs",
  stf.flank.size = NULL,
  match.score = "score",
  match.geneID = "gene_id",
  match.geneName = "gene_name",
  match.geneType = "gene_type",
  match.ranges.score = NULL,
  match.option.score = "max",
  anno.annoDB = NULL,
  anno.genes = NULL,
  anno.transcriptRegionList = NULL,
  quiet = FALSE,
  veryQuiet = FALSE,
  ...
)
}
\arguments{
\item{object}{a \code{\link{BSFDataSet}} object with stored ranges}

\item{bsSize}{an odd integer value specifying the size of the output
binding sites}

\item{cutoff.geneWiseFilter}{numeric; defines the cutoff for which sites to
remove in in function \code{\link{pureClipGeneWiseFilter}}. The smallest step
is 1\% (0.01). A cutoff of 5\% will remove the lowest 5\% sites, given their
score, on each gene, thus keeping the strongest 95\%.}

\item{cutoff.globalFilter}{numeric; defines the cutoff for which sites to
keep, the smallest step is 1\% (0.01) in function
\code{\link{pureClipGlobalFilter}}}

\item{est.bsResolution}{character; level of resolution of the binding site
width in function \code{\link{estimateBsWidth}}}

\item{est.geneResolution}{character; level of resolution of the gene-wise
filtering in function \code{\link{estimateBsWidth}}}

\item{est.maxBsWidth}{numeric; the largest binding site width which should
considered in the testing}

\item{est.minimumStepGain}{numeric; the minimum additional gain in the score
in percent the next binding site width has to have, to be selected as best option}

\item{est.maxSites}{numeric; maximum number of PureCLIP sites that are used}

\item{est.subsetChromosome}{character; define on which chromosome the
estimation should be done in function \code{\link{estimateBsWidth}}}

\item{est.minWidth}{the minimum size of regions that are subjected to the
iterative merging routine, after the initial region concatenation.}

\item{est.offset}{constant added to the flanking count in the signal-to-flank
ratio calculation to avoid division by Zero}

\item{est.sensitive}{logical; whether to enable sensitive pre-filtering before
binding site merging or not}

\item{est.sensitive.size}{numeric; the size (in nucleotides) of the merged
sensitive region}

\item{est.sensitive.minWidth}{numeric; the minimum size (in nucleoties) of the
merged sensitive region}

\item{merge.minWidth}{the minimum size of regions that are subjected to the
iterative merging routine, after the initial region concatenation.}

\item{merge.minCrosslinks}{the minimal number of positions to overlap with at least
one crosslink event in the final binding sites}

\item{merge.minClSites}{the minimal number of crosslink sites that have to
overlap a final binding site}

\item{merge.CenterIsClSite}{logical, whether the center of a final binding
site must be covered by an initial crosslink site}

\item{merge.CenterIsSummit}{logical, whether the center of a final binding
site must exhibit the highest number of crosslink events}

\item{repro.cutoff}{numeric; percentage cutoff to be used for the
reproducibility quantile filtering}

\item{repro.nReps}{numeric; number of replicates that must meet the cutoff
defined in \code{repro.cutoff} for a binding site to be called reproducible.
Defaults to N-1.}

\item{repro.minCrosslinks}{numeric; minimal number of crosslinks a binding
site needs to have to be called reproducible. Acts as a lower boundary for
\code{repro.cutoff}. Defaults to 1.}

\item{overlaps.geneWiseFilter}{character; how overlaps should be handled in
\code{\link{pureClipGeneWiseFilter}}}

\item{overlaps.geneAssignment}{character; how overlaps should be handled in
\code{\link{assignToGenes}}}

\item{overlaps.rule.geneAssignment}{character vector; a vector of gene types
that should be used to handle overlaps if option 'hierarchy' is selected
for \code{\link{assignToGenes}}. The order of the vector is the order of
the hierarchy.}

\item{overlaps.TranscriptRegions}{character; how overlaps should be handled in
\code{\link{assignToTranscriptRegions}}}

\item{overlaps.rule.TranscriptRegions}{character vector; a vector of gene types
that should be used to handle overlaps if option 'hierarchy' is selected
for \code{\link{assignToTranscriptRegions}}. The order of the vector is the order of
the hierarchy.}

\item{stf.flank}{character; how the flanking region shoule be set. Options are
'bs', 'manual'}

\item{stf.flank.size}{numeric; if flank='manual' provide the desired flanking size}

\item{match.score}{character; meta column name of the crosslink site}

\item{match.geneID}{character; meta column name of the genes}

\item{match.geneName}{character; meta column name of the gene name}

\item{match.geneType}{character; meta column name of the gene type}

\item{match.ranges.score}{a GRanges object, with numeric column for the score
to match in function \code{\link{annotateWithScore}}}

\item{match.option.score}{character; meta column name of the crosslink site
in function \code{\link{annotateWithScore}}}

\item{anno.annoDB}{an object of class \code{OrganismDbi} that contains
the gene annotation !!! Experimental !!!}

\item{anno.genes}{an object of class \code{\link{GenomicRanges}} that represents
the gene ranges directly}

\item{anno.transcriptRegionList}{an object of class \code{\link{CompressedGRangesList}}
that holds an ranges for each transcript region}

\item{quiet}{logical; whether to print messages}

\item{veryQuiet}{logical; whether to suppress all messages}

\item{...}{additional arguments passed to \code{\link{estimateBsWidth}},
\code{\link{makeBindingSites}} and \code{\link{reproducibilityFilter}}}
}
\value{
an object of class \code{\link{BSFDataSet}} with ranges merged into
binding sites given the inputs.
}
\description{
This is the main function that performs the binding site definition analysis
through the following steps:
\enumerate{
\item Filter PureCLIP sites by their score distribution: \code{\link{pureClipGlobalFilter}}
\item Estimate the appropriate binding site width together with the optimal gene-wise filter level: \code{\link{estimateBsWidth}}
\item Filter PureCLIP sites by their score distribution per gene: \code{\link{pureClipGeneWiseFilter}}
\item Define equally sized binding sites: \code{\link{makeBindingSites}}
\item Perform replicate reproducibility filter: \code{\link{reproducibilityFilter}}
\item Assign binding sites to their hosting genes: \code{\link{assignToGenes}}
\item Assign binding sites to their hosting transcript regions: \code{\link{assignToTranscriptRegions}}
\item Re-assign PureCLIP scores to binding sites: \code{\link{annotateWithScore}}
\item Calculation of signal-to-flank ratio: \code{\link{calculateSignalToFlankScore}}
}
}
\details{
If only the annotation is provided (\code{anno.genes} and
\code{anno.transcriptRegionList}), then binding sites size (\code{bsSize})
and gene-wise cutoff (\code{cutoff.geneWiseFilter}) are estimated using
\code{\link{estimateBsWidth}}. To avoid this behavior one has to provide
input values for the arguments \code{bsSize} and \code{cutoff.geneWiseFilter}.

If no binding site size is provided through \code{bsSize}, then
\code{\link{estimateBsWidth}} is called to estimate the optimal size for the
given data-set. The result of this estimation can be looked at with
\code{\link{estimateBsWidthPlot}} and arguments can be adjusted if needed.

Use the \code{\link{processingStepsFlowChart}} function to get an overview
of all steps carried out by the function.

For complete details on each step, see the manual pages of the respective
functions. The \code{BSFind} function returns a \code{\link{BSFDataSet}}
with ranges merged into binding sites. A full flowchart for the entire process
can be visualized with \code{\link{processingStepsFlowChart}}. For each of the
individual steps dedicated diagnostic plots exists. Further information can be
found in our Bioconductor vignette:
\url{https://www.bioconductor.org/packages/release/bioc/html/BindingSiteFinder.html}
}
\examples{
# load clip data
files <- system.file("extdata", package="BindingSiteFinder")
load(list.files(files, pattern = ".rda$", full.names = TRUE))
# Load genes
load(list.files(files, pattern = ".rds$", full.names = TRUE)[1])
# load transcript regions
load(list.files(files, pattern = ".rds$", full.names = TRUE)[2])
BSFind(object = bds, bsSize = 9, anno.genes = gns,
 anno.transcriptRegionList = regions, est.subsetChromosome = "chr22")

}
\seealso{
\code{\link{BSFDataSet}}, \code{\link{estimateBsWidth}},
\code{\link{pureClipGlobalFilter}}, \code{\link{pureClipGeneWiseFilter}},
\code{\link{assignToGenes}}, \code{\link{assignToTranscriptRegions}},
\code{\link{annotateWithScore}}, \code{\link{reproducibilityFilter}},
\code{\link{calculateSignalToFlankScore}},
\code{\link{processingStepsFlowChart}}
}
