% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R
\name{identify_outliers}
\alias{identify_outliers}
\title{identify_outliers main}
\usage{
identify_outliers(
  .data,
  formula = ~1,
  .sample,
  .transcript,
  .abundance,
  .significance,
  .do_check,
  .scaling_factor = NULL,
  percent_false_positive_genes = 1,
  how_many_negative_controls = 500,
  approximate_posterior_inference = TRUE,
  approximate_posterior_analysis = TRUE,
  draws_after_tail = 10,
  save_generated_quantities = FALSE,
  additional_parameters_to_save = c(),
  cores = detect_cores(),
  pass_fit = FALSE,
  do_check_only_on_detrimental = length(parse_formula(formula)) > 0,
  tol_rel_obj = 0.01,
  just_discovery = FALSE,
  seed = sample(seq_len(length.out = 999999), size = 1),
  adj_prob_theshold_2 = NULL
)
}
\arguments{
\item{.data}{A tibble including a transcript name column | sample name column | read counts column | covariate columns | Pvalue column | a significance column}

\item{formula}{A formula. The sample formula used to perform the differential transcript abundance analysis}

\item{.sample}{A column name as symbol. The sample identifier}

\item{.transcript}{A column name as symbol. The transcript identifier}

\item{.abundance}{A column name as symbol. The transcript abundance (read count)}

\item{.significance}{A column name as symbol. A column with the Pvalue, or other significance measure (preferred Pvalue over false discovery rate)}

\item{.do_check}{A column name as symbol. A column with a boolean indicating whether a transcript was identified as differentially abundant}

\item{.scaling_factor}{In case the scaling factor must not be calculated (TMM method) using the input data but provided. It is useful, for example, for pseudobulk single-cell where the scaling might depend on sample sequencing depth for all cells rather than a particular cell type.}

\item{percent_false_positive_genes}{A real between 0 and 100. It is the aimed percent of transcript being a false positive. For example, percent_false_positive_genes = 1 provide 1 percent of the calls for outlier containing transcripts that has actually not outliers.}

\item{how_many_negative_controls}{An integer. How many transcript from the bottom non-significant should be taken for inferring the mean-overdispersion trend.}

\item{approximate_posterior_inference}{A boolean. Whether the inference of the joint posterior distribution should be approximated with variational Bayes It confers execution time advantage.}

\item{approximate_posterior_analysis}{A boolean. Whether the calculation of the credible intervals should be done semi-analytically, rather than with pure sampling from the posterior. It confers execution time and memory advantage.}

\item{draws_after_tail}{An integer. How many draws should on average be after the tail, in a way to inform CI.}

\item{save_generated_quantities}{A boolean. Used for development and testing purposes}

\item{additional_parameters_to_save}{A character vector. Used for development and testing purposes}

\item{cores}{An integer. How many cored to be used with parallel calculations.}

\item{pass_fit}{A boolean. Used for development and testing purposes}

\item{do_check_only_on_detrimental}{A boolean. Whether to test only for detrimental outliers (same direction as the fold change). It allows to test for less transcript/sample pairs and therefore higher the probability threshold.}

\item{tol_rel_obj}{A real. Used for development and testing purposes}

\item{just_discovery}{A boolean. Used for development and testing purposes}

\item{seed}{An integer. Used for development and testing purposes}

\item{adj_prob_theshold_2}{A boolean. Used for development and testing purposes}
}
\value{
A nested tibble \code{tbl} with transcript-wise information: \code{sample_wise_data} | plot | \verb{ppc samples failed} | \verb{tot deleterious_outliers}
}
\description{
This function runs the data modeling and statistical test for the hypothesis that a transcript includes outlier biological replicate.

\lifecycle{maturing}
}
\examples{

library(dplyr)

data("counts")

if(Sys.info()[['sysname']] == "Linux")
result =
  counts \%>\%
  dplyr::mutate(  is_significant = ifelse(symbol \%in\% c("SLC16A12", "CYP1A1", "ART3"), TRUE, FALSE) ) \%>\%
 ppcseq::identify_outliers(
	formula = ~ Label,
	sample, symbol, value,
	.significance = PValue,
	.do_check  = is_significant,
	percent_false_positive_genes = 1,
	tol_rel_obj = 0.01,
	approximate_posterior_inference =TRUE,
	approximate_posterior_analysis =TRUE,
	how_many_negative_controls = 50,
	cores=1
)

}
