% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_data_preprocessing.R
\name{check_input}
\alias{check_input}
\title{Check if input objects are ready for further analyses}
\usage{
check_input(seq = NULL, annotation = NULL, gene_field = "gene_id")
}
\arguments{
\item{seq}{A list of AAStringSet objects, each list element containing
protein sequences for a given species. This list must have names
(not NULL), and names of each list element must match the names of
list elements in \strong{annotation}.}

\item{annotation}{A GRangesList, CompressedGRangesList, or list of
GRanges with the annotation for the sequences in \strong{seq}. This list must
have names (not NULL), and names of each list element must match the names
of list elements in \strong{seq}.}

\item{gene_field}{Character, name of the column in the GRanges objects
that contains gene IDs. Default: "gene_id".}
}
\value{
TRUE if the objects pass the check.
}
\description{
Check if input objects are ready for further analyses
}
\details{
This function checks the input data for 3 required conditions:
\enumerate{
\item Names of \strong{seq} list (i.e., \code{names(seq)}) match
the names of \strong{annotation} GRangesList/CompressedGRangesList
(i.e., \code{names(annotation)})
\item For each species (list elements), the number of sequences
in \strong{seq} is not greater than the number of genes
in \strong{annotation}. This is a way to ensure users do not input
the translated sequences for multiple isoforms of the same gene (generated
by alternative splicing). Ideally, the number of sequences in \strong{seq}
should be equal to the number of genes in \strong{annotation}, but
this may not always stand true because of non-protein-coding genes.
\item For each species, sequence names (i.e., \code{names(seq[[x]])},
equivalent to FASTA headers) match gene names in \strong{annotation}.
}
}
\examples{
data(annotation) 
data(proteomes)
check_input(proteomes, annotation)
}
