% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/createTADdata.R
\name{createTADdata}
\alias{createTADdata}
\title{Function to create a data matrix used for building a predictive model to
classify boundary regions from functional genomic elements}
\usage{
createTADdata(
  bounds.GR,
  resolution,
  genomicElements.GR,
  featureType = "distance",
  resampling,
  trainCHR,
  predictCHR = NULL,
  genome = "hg19"
)
}
\arguments{
\item{bounds.GR}{a GRanges object with chromosomal coordinates of TAD
boundaries used to identify positive cases (can be obtained using
\code{\link{extractBoundaries}}). Required.}

\item{resolution}{Numeric, the width to bin the genome at, should match the
resolution that TADs were called at. Required.}

\item{genomicElements.GR}{a GRangesList object containing GRanges objects
for each ChIP-seq data to leverage in the random forest model (can be
obtained using the \code{\link{bedToGRangesList}}). Required.}

\item{featureType}{Character, controls how the feature space is constructed
(one of either "binary" (overlap yes/no), "oc" (overlap counts, the number
of overlaps), "op" (overlap percent, the percent of bin width covered by the
genomic annotation), or "distance" (log2-transformed distance from the center
of the nearest genomic annotation to the center of the bin); default is
"distance"). Required.}

\item{resampling}{Character, controls if and how the data should be
resampled to create balanced classes of boundary vs. nonboundary regions (one
of either "none" - no re-sampling, "ros" - Random Over-Sampling, "rus" -
Random Under-Sampling. Required.}

\item{trainCHR}{Character vector of chromosomes to use to build the binned
data matrix for training. Required.}

\item{predictCHR}{Character vector of chromosomes to use to build the binned
data matrix for testing. Default in NULL, indicating no test data is created.
 If trainCHR=predictCHR then a 7:3 split is created.}

\item{genome}{version of the human genome assembly. Used to filter out
bases overlapping centromeric regions. Accepted values - hg19 (default) or 
hg38.}
}
\value{
A list object containing two data.frames: 1) the training data, 2)
the test data (only if predictCHR is not NULL, otherwise it is NA). "y" is
an indicator whether the corresponding bin is a TAD boundary, and the
subsequent columns have the association measures between bins and the
genomic annotations
}
\description{
Function to create a data matrix used for building a predictive model to
classify boundary regions from functional genomic elements
}
\examples{
# Create training data for CHR21 and testing data for CHR22 with
# 5 kb binning, oc-type predictors from 26 different transcription factor
# binding sites from the GM12878 cell line, and random under-sampling

# Read in ARROWHEAD-called TADs at 5kb
data(arrowhead_gm12878_5kb)

#Extract unique boundaries
bounds.GR <- extractBoundaries(domains.mat = arrowhead_gm12878_5kb,
                               filter = FALSE,
                               CHR = c("CHR21", "CHR22"),
                               resolution = 5000)

# Read in GRangesList of 26 TFBS
data(tfbsList)

tadData <- createTADdata(bounds.GR = bounds.GR,
                         resolution = 5000,
                         genomicElements.GR = tfbsList,
                         featureType = "oc",
                         resampling = "rus",
                         trainCHR = "CHR21",
                         predictCHR = "CHR22")
}
