% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/impute.R
\name{msImpute}
\alias{msImpute}
\title{Function for imputing missing values in data.}
\usage{
msImpute(
  data,
  imputeMethod = c("halfmin", "bpca", "knn", "rf"),
  kKnn = 5,
  nPcs = 3,
  maxIterRf = 10,
  nTreeRf = 100,
  compoundsAsNeighbors = FALSE,
  compVars = c("mz", "rt"),
  sampleVars = c("subject_id"),
  colExtraText = NULL,
  separator = NULL,
  missingValue = NA,
  returnToSE = FALSE,
  returnToDF = FALSE
)
}
\arguments{
\item{data}{Data set as either a data frame or `SummarizedExperiement`.}

\item{imputeMethod}{String specifying imputation method. 
Options are "halfmin" (half the minimum value), "bpca" (Bayesian PCA), 
and "knn" (k-nearest neighbors).}

\item{kKnn}{Number of clusters for 'knn' method.}

\item{nPcs}{Number of  principle components used for re-estimation for 
'bpca' method.}

\item{maxIterRf}{Maximum number of iterations to be performed given the
stopping criterion is not met beforehand for 'rf' method.}

\item{nTreeRf}{Number of trees to grow in each forest for 'rf' method.}

\item{compoundsAsNeighbors}{For KNN imputation. If TRUE, compounds will be 
used as neighbors rather than samples. Note that using compounds as 
neighbors is significantly slower than using samples.}

\item{compVars}{Vector of the columns which identify compounds. If a 
`SummarizedExperiment` is used for `data`, row variables will be used.}

\item{sampleVars}{Vector of the ordered sample variables found in each sample
column.}

\item{colExtraText}{Any extra text to ignore at the beginning of the sample 
columns names. Unused for `SummarizedExperiments`.}

\item{separator}{Character or text separating each sample variable in sample
columns. Unused for `SummarizedExperiment`.}

\item{missingValue}{Specifies the abundance value which indicates missing 
data. May be a numeric or `NA`.}

\item{returnToSE}{Logical value indicating whether to return as 
`SummarizedExperiment`}

\item{returnToDF}{Logical value indicating whether to return as data frame.}
}
\value{
A data frame or `SummarizedExperiment` with missing data imputed.
Default return type is set to match the data input but may be altered with 
the `returnToSE` or `returnToDF` arguments.
}
\description{
Replaces missing values with non-zero estimates calculated using a
selected method.
}
\examples{
# Load, tidy, summarize, and filter example dataset
data(msquant)

summarizedDF <- msSummarize(msquant,
                            compVars = c("mz", "rt"),
                            sampleVars = c("spike", "batch", "replicate", 
                            "subject_id"),
                            cvMax = 0.50,
                            minPropPresent = 1/3,
                            colExtraText = "Neutral_Operator_Dif_Pos_",
                            separator = "_",
                            missingValue = 1)
                            
filteredDF <- msFilter(summarizedDF,
                       filterPercent = 0.8,
                       compVars = c("mz", "rt"),
                       sampleVars = c("spike", "batch", "subject_id"),
                       separator = "_")

                           
# Impute dataset using 3 possible options
hmImputedDF <- msImpute(filteredDF, imputeMethod = "halfmin",
                        compVars = c("mz", "rt"),
                        sampleVars = c("spike", "batch", "subject_id"),
                        separator = "_",
                        missingValue = 0)

bpcaImputedDF <- msImpute(filteredDF, imputeMethod = "bpca",
                          nPcs = 3,
                          compVars = c("mz", "rt"),
                          sampleVars = c("spike", "batch", "subject_id"),
                          separator = "_",
                          missingValue = 0)

knnImputedDF <- msImpute(filteredDF, imputeMethod = "knn",
                         kKnn = 5,
                         compVars = c("mz", "rt"),
                         sampleVars = c("spike", "batch", "subject_id"),
                         separator = "_",
                         missingValue = 0)                                

}
\references{
Oba, S.et al.(2003) A Bayesian missing value estimation for gene
  expression profile data. Bioinformatics, 19, 2088-2096

  Stacklies, W.et al.(2007) pcaMethods A bioconductor package providing
  PCA methods for incomplete data. Bioinformatics, 23, 1164-1167.
  
  A. Kowarik, M. Templ (2016) Imputation with R package VIM. Journal of 
  Statistical Software, 74(7), 1-16.
  
  Stekhoven D. J., & Buehlmann, P. (2012). MissForest - non-parametric 
  missing value imputation for mixed-type data. Bioinformatics, 28(1), 
  112-118.
}
