% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nmr_data_analysis.R
\name{bp_kfold_VIP_analysis}
\alias{bp_kfold_VIP_analysis}
\title{K-fold bootstrap and permutation over PLS-VIP}
\usage{
bp_kfold_VIP_analysis(dataset, y_column, k = 4, ncomp = 3, nbootstrap = 300)
}
\arguments{
\item{dataset}{An \link{nmr_dataset_family} object}

\item{y_column}{A string with the name of the y column (present in the
metadata of the dataset)}

\item{k}{Number of folds, recomended between 4 to 10}

\item{ncomp}{number of components for the bootstrap models}

\item{nbootstrap}{number of bootstrap dataset}
}
\value{
A list with the following elements:
\itemize{
\item \code{important_vips}: A list with the important vips selected
\item \code{relevant_vips}: List of vips with some relevance
\item \code{wilcoxon_vips}: List of vips that pass a wilcoxon test
\item \code{vip_means}: Means of the vips scores
\item \code{vip_score_plot}: plot of the vips scores
\item \code{kfold_resuls}: results of the k \link{bp_VIP_analysis}
\item \code{kfold_index}: list of index of partitions of the folds
}
}
\description{
Bootstrap and permutation over PLS-VIP on AlpsNMR can be performed on both
\link{nmr_dataset_1D} full spectra as well as \link{nmr_dataset_peak_table} peak tables.
}
\details{
Use of the bootstrap and permutation methods for a more robust
variable importance in the projection metric for partial least
squares regression, in a k-fold cross validation
}
\examples{
# Data analysis for a table of integrated peaks
set.seed(42)
## Generate an artificial nmr_dataset_peak_table:
### Generate artificial metadata:
num_samples <- 64 # use an even number in this example
num_peaks <- 10
metadata <- data.frame(
    NMRExperiment = as.character(1:num_samples),
    Condition = sample(rep(c("A", "B"), times = num_samples / 2), num_samples)
)

### The matrix with peaks
peak_means <- runif(n = num_peaks, min = 300, max = 600)
peak_sd <- runif(n = num_peaks, min = 30, max = 60)
peak_matrix <- mapply(function(mu, sd) rnorm(num_samples, mu, sd),
    mu = peak_means, sd = peak_sd
)
colnames(peak_matrix) <- paste0("Peak", 1:num_peaks)
rownames(peak_matrix) <- paste0("Sample", 1:num_samples)

## Artificial differences depending on the condition:
peak_matrix[metadata$Condition == "A", "Peak2"] <-
    peak_matrix[metadata$Condition == "A", "Peak2"] + 70

peak_matrix[metadata$Condition == "A", "Peak6"] <-
    peak_matrix[metadata$Condition == "A", "Peak6"] - 60

### The nmr_dataset_peak_table
peak_table <- new_nmr_dataset_peak_table(
    peak_table = peak_matrix,
    metadata = list(external = metadata)
)

## We will use bootstrap and permutation method for VIPs selection
## in a a k-fold cross validation
bp_results <- bp_kfold_VIP_analysis(peak_table, # Data to be analyzed
    y_column = "Condition", # Label
    k = 2,
    ncomp = 1,
    nbootstrap = 5
)

message("Selected VIPs are: ", bp_results$important_vips)

}
