% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/load_data.R
\name{extract_variants}
\alias{extract_variants}
\title{Extract variants from mutliple objects}
\usage{
extract_variants(
  inputs,
  id = NULL,
  rename = NULL,
  sample_field = NULL,
  filename_as_id = FALSE,
  strip_extension = c(".vcf", ".vcf.gz", ".gz"),
  filter = TRUE,
  multiallele = c("expand", "exclude"),
  fix_vcf_errors = TRUE,
  extra_fields = NULL,
  chromosome_col = "chr",
  start_col = "start",
  end_col = "end",
  ref_col = "ref",
  alt_col = "alt",
  sample_col = "sample",
  verbose = TRUE
)
}
\arguments{
\item{inputs}{A vector or list of objects or file names. Objects can be
\linkS4class{CollapsedVCF}, \linkS4class{ExpandedVCF}, \linkS4class{MAF},
an object that inherits from \code{matrix} or \code{data.frame}, or
character strings that denote the path to a vcf or maf file.}

\item{id}{A character vector the same length as \code{inputs} denoting
the sample to extract from a vcf.
See \code{\link{extract_variants_from_vcf}} for more details.
Only used if the input is a vcf object or file. Default \code{NULL}.}

\item{rename}{A character vector the same length as \code{inputs} denoting
what the same will be renamed to.
See \code{\link{extract_variants_from_vcf}} for more details.
Only used if the input is a vcf object or file. Default \code{NULL}.}

\item{sample_field}{Some algoriths will save the name of the
sample in the ##SAMPLE portion of header in the VCF.
See \code{\link{extract_variants_from_vcf}} for more details.
Default \code{NULL}.}

\item{filename_as_id}{If set to \code{TRUE}, the file name will be used
as the sample name.
See \code{\link{extract_variants_from_vcf_file}} for more details.
Only used if the input is a vcf file. Default \code{TRUE}.}

\item{strip_extension}{Only used if \code{filename_as_id} is set to
\code{TRUE}. If set to \code{TRUE}, the file extention will be stripped
from the filename before setting the sample name.
See \code{\link{extract_variants_from_vcf_file}} for more details.
Only used if the input is a vcf file.
Default \code{c(".vcf",".vcf.gz",".gz")}}

\item{filter}{Exclude variants that do not have a \code{PASS} in the
\code{FILTER} column of VCF inputs.}

\item{multiallele}{Multialleles are when multiple alternative variants
are listed in the same row in the vcf.
See \code{\link{extract_variants_from_vcf}} for more details.
Only used if the input is a vcf object or file. Default \code{"expand"}.}

\item{fix_vcf_errors}{Attempt to automatically fix VCF file
formatting errors.
See \code{\link{extract_variants_from_vcf_file}} for more details.
Only used if the input is a vcf file. Default \code{TRUE}.}

\item{extra_fields}{Optionally extract additional fields from all input
objects. Default \code{NULL}.}

\item{chromosome_col}{The name of the column that contains the chromosome
reference for each variant. Only used if the input is a matrix or data.frame.
Default \code{"Chromosome"}.}

\item{start_col}{The name of the column that contains the start
position for each variant. Only used if the input is a matrix or data.frame.
Default \code{"Start_Position"}.}

\item{end_col}{The name of the column that contains the end
position for each variant. Only used if the input is a matrix or data.frame.
Default \code{"End_Position"}.}

\item{ref_col}{The name of the column that contains the reference
base(s) for each variant. Only used if the input is a matrix or data.frame.
Default \code{"Tumor_Seq_Allele1"}.}

\item{alt_col}{The name of the column that contains the alternative
base(s) for each variant. Only used if the input is a matrix or data.frame.
Default \code{"Tumor_Seq_Allele2"}.}

\item{sample_col}{The name of the column that contains the sample
id for each variant. Only used if the input is a matrix or data.frame.
Default \code{"sample"}.}

\item{verbose}{Show progress of variant extraction. Default \code{TRUE}.}
}
\value{
Returns a data.table of variants from a vcf
}
\description{
Chooses the correct function to extract variants from input based on
the class of the object or the file extension. Different types of objects
can be mixed within the list. For example, the list can include VCF files
and maf objects. Certain parameters such as \code{id} and \code{rename}
only apply to VCF objects or files and need to be individually specified
for each VCF. Therefore, these parameters should be suppied as a vector
that is the same length as the number of inputs. If other types of
objects are in the input list, then the value of \code{id} and \code{rename}
will be ignored for these items.
}
\examples{
# Get loations of two vcf files and a maf file
luad_vcf_file <- system.file("extdata", "public_LUAD_TCGA-97-7938.vcf",
  package = "musicatk"
)
lusc_maf_file <- system.file("extdata", "public_TCGA.LUSC.maf",
  package = "musicatk"
)
melanoma_vcfs <- list.files(system.file("extdata", package = "musicatk"),
  pattern = glob2rx("*SKCM*vcf"), full.names = TRUE
)

# Read all files in at once
inputs <- c(luad_vcf_file, melanoma_vcfs, lusc_maf_file)
variants <- extract_variants(inputs = inputs)
table(variants$sample)

# Run again but renaming samples in first four vcfs
new_name <- c(paste0("Sample", 1:4), NA)
variants <- extract_variants(inputs = inputs, rename = new_name)
table(variants$sample)

}
