% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/class_RegspliceData.R
\docType{class}
\name{RegspliceData-class}
\alias{RegspliceData-class}
\alias{RegspliceData}
\alias{assays,RegspliceData-method}
\alias{countsData}
\alias{countsData,RegspliceData-method}
\alias{weightsData}
\alias{weightsData,RegspliceData-method}
\alias{rowData,RegspliceData-method}
\alias{colData,RegspliceData-method}
\alias{[,RegspliceData,ANY,ANY,ANY-method}
\title{RegspliceData objects.}
\usage{
RegspliceData(counts, gene_IDs = NULL, n_exons = NULL, condition = NULL)

\S4method{assays}{RegspliceData}(x, withDimnames, ..., value)

countsData(x)

\S4method{countsData}{RegspliceData}(x)

weightsData(x)

\S4method{weightsData}{RegspliceData}(x)

\S4method{rowData}{RegspliceData}(x)

\S4method{colData}{RegspliceData}(x, ..., value)

\S4method{[}{RegspliceData,ANY,ANY,ANY}(x, i, j)
}
\arguments{
\item{counts}{RNA-seq read counts or exon microarray intensities (matrix or data 
frame). Rows are exons, and columns are biological samples. Alternatively,
\code{counts} also accepts a \code{SummarizedExperiment} input object containing all
required input data, which may be useful when running \code{regsplice} as part of a
pipeline with other packages.}

\item{gene_IDs}{Vector of gene IDs (character vector). Length is equal to the number 
of genes.}

\item{n_exons}{Vector of exon lengths (numeric vector of integers), i.e. the number of
exon bins per gene. Length is equal to the number of genes.}

\item{condition}{Experimental condition for each biological sample (character or 
numeric vector, or factor).}

\item{x}{\code{RegspliceData} object (for accessor or subsetting functions).}

\item{withDimnames}{See \code{SummarizedExperiment::assays()}.}

\item{...}{Additional arguments for replacement with \code{`[<-`}.}

\item{value}{Value for replacement with \code{`[<-`}.}

\item{i}{Gene names (character vector) or row numbers (numeric vector) for subsetting 
genes or exons. Note that when subsetting whole genes, gene names (character vector)
should be provided instead of row numbers, to avoid possible errors due to selecting
incorrect row numbers. Row numbers may be provided to subset individual exons.}

\item{j}{Column numbers (numeric vector) for subsetting biological samples.}
}
\value{
Returns a \code{RegspliceData} object.
}
\description{
\code{RegspliceData} objects contain data in the format required by functions in the 
\code{regsplice} analysis pipeline.
}
\details{
The \code{RegspliceData} format is based on the 
\code{\link[SummarizedExperiment]{SummarizedExperiment}} container. Initially, objects
contain raw data along with meta-data for rows (genes and exons) and columns 
(biological samples). During subsequent steps in the \code{regsplice} analysis 
pipeline, the data values are modified, and additional data and meta-data are added to
the object. Final results are stored in a \code{\linkS4class{RegspliceResults}} 
object.

\code{RegspliceData} objects are created with the constructor function 
\code{RegspliceData()}.

Required inputs for the constructor function are \code{counts} (matrix or data frame
of RNA-seq read counts or exon microarray intensities), \code{gene_IDs} (vector of
gene IDs), \code{n_exons} (vector of exon lengths, i.e. number of exon bins per gene),
and \code{condition} (vector of experimental conditions for each biological sample).

Alternatively, the inputs can be provided as a \code{SummarizedExperiment} object, 
which will be parsed to extract each of these components. This may be useful when 
running \code{regsplice} as part of a pipeline together with other packages.

See the vignette for an example showing how to construct \code{gene_IDs} and 
\code{n_exons} from a column of gene:exon IDs.

Exon microarray intensities should be log2-transformed, which is usually done during 
pre-processing of microarray data. (RNA-seq counts will be transformed automatically
during the \code{regsplice} analysis pipeline; see \code{\link{runVoom}}.)

After creating a \code{RegspliceData} object, the wrapper function 
\code{\link{regsplice}} can be used to run the analysis pipeline with a single
command. Alternatively, you can run the individual functions for each step in the
pipeline, beginning with \code{\link{filterZeros}} (see vignette for details).
}
\section{Fields}{

\describe{
\item{\code{counts}}{Matrix of RNA-seq read counts or exon microarray intensities. Rows are 
exons, and columns are biological samples.}

\item{\code{weights}}{(Optional) Matrix of observation-level weights. Rows are exons, and 
columns are biological samples. Created by the \code{\link{runVoom}} function.}

\item{\code{rowData}}{\code{DataFrame} of row meta-data. This should contain two columns: 
\code{gene_IDs} and \code{exon_IDs}, which are created by the \code{RegspliceData}
constructor function.}

\item{\code{colData}}{\code{DataFrame} of column meta-data. This contains the experimental 
condition and (optionally) normalization factors for each biological sample. 
Normalization factors are created by the \code{\link{runVoom}} function.}
}}

\section{Accessor functions}{


\itemize{
\item \code{countsData()}: Accesses the \code{counts} data matrix.
\item \code{weightsData()}: Accesses the (optional) \code{weights} data matrix.
\item \code{rowData()}: Accesses the \code{DataFrame} of row meta-data. This should
contain two columns: \code{gene_IDs} and \code{exon_IDs}.
\item \code{colData()}: Accesses the \code{DataFrame} of column meta-data. This
contains the experimental condition and (optionally) normalization factors for each
biological sample.
}
}

\section{Subsetting}{


Subsetting of \code{RegspliceData} objects is performed with square brackets, 
\code{x[i, j]}, where \code{x} is the name of the object. The subsetting operations
are designed to keep data and meta-data in sync.

For subsetting by rows, there are two possibilities:
\itemize{
\item Subsetting genes: To subset whole genes, provide a character vector of gene 
names to the argument \code{i}. The returned object will contain all rows 
corresponding to these genes. Row numbers should not be used when subsetting whole
genes, since this risks potential errors due to selecting incorrect rows.
\item Subsetting exons: To subset individual exons, provide the corresponding row
numbers to the argument \code{i}.
}

For subsetting by columns (biological samples), provide the corresponding column
numbers to the argument \code{j}.
}

\examples{
# ---------
# Example 1
# ---------

counts <- matrix(sample(100:200, 14 * 6, replace = TRUE), nrow = 14, ncol = 6)
gene_IDs <- paste0("gene", 1:5)
n_exons <- c(3, 2, 3, 1, 5)
condition <- rep(c(0, 1), each = 3)

rs_data <- RegspliceData(counts, gene_IDs, n_exons, condition)

rs_data
countsData(rs_data)
rowData(rs_data)
colData(rs_data)

rs_data[1, ]
rs_data[1, 1:3]

rs_data["gene1", ]
rs_data["gene1", 1:3]


# --------------------
# Example 2 (Vignette)
# --------------------

file_counts <- system.file("extdata/vignette_counts.txt", package = "regsplice")
data <- read.table(file_counts, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
head(data)

counts <- data[, 2:7]
tbl_exons <- table(sapply(strsplit(data$exon, ":"), function(s) s[[1]]))
gene_IDs <- names(tbl_exons)
n_exons <- unname(tbl_exons)
condition <- rep(c("untreated", "treated"), each = 3)

rs_data <- RegspliceData(counts, gene_IDs, n_exons, condition)

rs_data
head(countsData(rs_data))
rowData(rs_data)
colData(rs_data)

rs_data[1, ]
rs_data[1, 1:3]

rs_data["ENSG00000000003", ]
rs_data["ENSG00000000003", 1:3]

}
\seealso{
\code{\link{regsplice}} \code{\link{filterZeros}}
}
