% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GFFFile-class.R, R/utilities.R
\docType{class}
\name{GFFFile-class}
\alias{GFFFile-class}
\alias{class:GFFFile}
\alias{class:GFF1File}
\alias{class:GFF2File}
\alias{class:GFF3File}
\alias{class:GVFFile}
\alias{class:GTFFile}
\alias{GFF1File-class}
\alias{GFF2File-class}
\alias{GFF3File-class}
\alias{GVFFile-class}
\alias{GTFFile-class}
\alias{GFFFile}
\alias{GFF1File}
\alias{GFF2File}
\alias{GFF3File}
\alias{GVFFile}
\alias{GTFFile}
\alias{import,GFFFile,ANY,ANY-method}
\alias{import.gff}
\alias{import.gff1}
\alias{import.gff2}
\alias{import.gff3}
\alias{import.gff,ANY-method}
\alias{import.gff1,ANY-method}
\alias{import.gff2,ANY-method}
\alias{import.gff3,ANY-method}
\alias{export,ANY,GFFFile,ANY-method}
\alias{export,GenomicRanges,GFFFile,ANY-method}
\alias{export,GenomicRangesList,GFFFile,ANY-method}
\alias{export,GRangesList,GFFFile,ANY-method}
\alias{export,GRangesList,GTFFile,ANY-method}
\alias{export.gff}
\alias{export.gff,ANY-method}
\alias{export.gff1}
\alias{export.gff1,ANY-method}
\alias{export.gff2}
\alias{export.gff2,ANY-method}
\alias{export.gff3}
\alias{export.gff3,ANY-method}
\alias{genome,GFFFile-method}
\alias{export,CompressedGRangesList,GFFFile,ANY-method}
\alias{export,SimpleGRangesList,GFFFile,ANY-method}
\title{GFFFile objects}
\usage{
GFFFile(resource, version = c("", "1", "2", "3"))

export.gff(object, con, ...)

\S4method{export.gff}{ANY}(object, con, ...)

\S4method{export}{ANY,GFFFile,ANY}(object, con, format, ...)

\S4method{export}{CompressedGRangesList,GFFFile,ANY}(object, con, format, ...)

\S4method{export}{GenomicRanges,GFFFile,ANY}(
  object,
  con,
  format,
  version = c("1", "2", "3"),
  source = "Bioc.gff",
  append = FALSE,
  index = FALSE
)

\S4method{export}{SimpleGRangesList,GFFFile,ANY}(object, con, format, ...)

export.gff1(object, con, ...)

\S4method{export.gff1}{ANY}(object, con, ...)

export.gff2(object, con, ...)

\S4method{export.gff2}{ANY}(object, con, ...)

export.gff3(object, con, ...)

\S4method{export.gff3}{ANY}(object, con, ...)

\S4method{import}{GFFFile,ANY,ANY}(
  con,
  format,
  text,
  version = c("", "1", "2", "3"),
  genome = NA,
  colnames = NULL,
  which = NULL,
  feature.type = NULL,
  sequenceRegionsAsSeqinfo = FALSE
)

import.gff1(con, ...)

\S4method{import.gff1}{ANY}(con, ...)

import.gff2(con, ...)

\S4method{import.gff2}{ANY}(con, ...)

import.gff3(con, ...)

\S4method{import.gff3}{ANY}(con, ...)

\S4method{genome}{GFFFile}(x)
}
\arguments{
\item{resource}{\code{character(1)} or \code{connection} A low-level resource typically
a path, URL, or connection.}

\item{version}{If the format is given as "gff", i.e., it does not specify a
version, then this should indicate the GFF version as one of \dQuote{} (for
import only, from the \code{gff-version} directive in the file or "1" if none),
"1", "2" or "3".}

\item{object}{The object to export, should be a \code{GRanges} or something
coercible to a \code{GRanges}. If the object has a method for \code{asGFF}, it is
called prior to coercion. This makes it possible to export a \code{GRangesList}
or \code{TxDb} in a way that preserves the hierarchical structure. For exporting
multiple tracks, in the UCSC track line metaformat, pass a
\code{GenomicRangesList}, or something coercible to one.}

\item{con}{A path, URL, connection or \code{GFFFile} object. For the functions
ending in \code{.gff}, \code{.gff1}, etc, the file format is indicated by the
function name. For the base \code{export} and \code{import} functions, the format
must be indicated another way. If \code{con} is a path, URL or connection,
either the file extension or the \code{format} argument needs to be one of
"gff", "gff1" "gff2", "gff3", "gvf", or "gtf". Compressed files ("gz",
"bz2" and "xz") are handled transparently.}

\item{...}{Arguments to pass down to methods to other methods. For import,
the flow eventually reaches the \code{GFFFile} method on \code{import}. When
\code{trackLine} is \code{TRUE} or the target format is BED15, the arguments are
passed through \code{export.ucsc}, so track line parameters are supported.}

\item{format}{If not missing, should be one of "gff", "gff1" "gff2", "gff3",
"gvf", or "gtf".}

\item{source}{The value for the source column in GFF. This is typically the
name of the package or algorithm that generated the feature.}

\item{append}{If \code{TRUE}, and \code{con} points to a file path, the data is
appended to the file. Obviously, if \code{con} is a connection, the data is
always appended.}

\item{index}{If \code{TRUE}, automatically compress and index the output file with
bgzf and tabix. Note that tabix indexing will sort the data by chromosome
and start. Tabix supports a single track in a file.}

\item{text}{If \code{con} is missing, a character vector to use as the input.}

\item{genome}{The identifier of a genome, or a \code{Seqinfo}, or \code{NA} if unknown.
Typically, this is a UCSC identifier like "hg19". An attempt will be made
to derive the \code{Seqinfo} on the return value using either an installed
BSgenome package or UCSC, if network access is available.}

\item{colnames}{A character vector naming the columns to parse. These should
name either fixed fields, like \code{source} or \code{type}, or, for GFF2 and GFF3,
any attribute.}

\item{which}{A \code{GRanges} or other range-based object supported by
\code{\link[IRanges]{findOverlaps}}. Only the intervals in the file
overlapping the given ranges are returned. This is much more efficient when
the file is indexed with the tabix utility.}

\item{feature.type}{\code{NULL} (the default) or a character vector of valid
feature types. If not \code{NULL}, then only the features of the specified
type(s) are imported.}

\item{sequenceRegionsAsSeqinfo}{If \code{TRUE}, attempt to infer the \code{Seqinfo}
(\code{seqlevels} and \code{seqlengths}) from the \dQuote{##sequence-region}
directives as specified by GFF3.}

\item{x}{A \code{GFFFile} object.}
}
\value{
A \code{GRanges} with the metadata columns described in the details.
}
\description{
These functions support the import and export of the GFF format,
of which there are three versions and several flavors.
}
\details{
The Generic Feature Format (GFF) format is a tab-separated table of
intervals. There are three different versions of GFF, and they all have the
same number of columns. In GFF1, the last column is a grouping factor,
whereas in the later versions the last column holds application-specific
attributes, with some conventions defined for those commonly used. This
attribute support facilitates specifying extensions to the format. These
include GTF (Gene Transfer Format, an extension of GFF2) and GVF (Genome
Variation Format, an extension of GFF3).  The \code{Bioc.gff} package recognizes
the "gtf" and "gvf" extensions and parses the extra attributes
into columns of the result; however, it does not perform any
extension-specific processing. Both GFF1 and GFF2 have been proclaimed
obsolete; however, the UCSC Genome Browser only supports GFF1 (and GTF), and
GFF2 is still in broad use.

GFF is distinguished from the simpler BED format by its flexible attribute
support and its hierarchical structure, as specified by the \code{group}
column in GFF1 (only one level of grouping) and the \code{Parent} attribute
in GFF3. GFF2 does not specify a convention for representing hierarchies,
although its GTF extension provides this for gene structures. The
combination of support for hierarchical data and arbitrary descriptive
attributes makes GFF(3) the preferred format for representing gene models.

Although GFF features a \code{score} column, large quantitative data belong
in a format like \code{BigWig} and alignments from
high-throughput experiments belong in \link[Rsamtools:BamFile]{BAM}. For
variants, the VCF format (supported by the VariantAnnotation package) seems
to be more widely adopted than the GVF extension.

A note on the UCSC track line metaformat: track lines are a means for
passing hints to visualization tools like the UCSC Genome Browser and the
Integrated Genome Browser (IGB), and they allow multiple tracks to be
concatenated in the same file. Since GFF is not a UCSC format, it is not
common to annotate GFF data with track lines, but \code{Bioc.gff} still
supports it. To export or import GFF data in the track line format, call
\code{export.ucsc} or \code{import.ucsc}.

The following is the mapping of GFF elements to a \code{GRanges} object.  NA
values are allowed only where indicated.  These appear as a "." in
the file. GFF requires that all columns are included, so \code{export}
generates defaults for missing columns.

\describe{
\item{seqid, start, end}{the \code{ranges} component.}
\item{source}{character vector in the \code{source} column; defaults to
"Bioc.gff" on export.}
\item{type}{character vector in the \code{type} column; defaults to
"sequence_feature" in the output, i.e., SO:0000110.}
\item{score}{numeric vector (NA's allowed) in the \code{score} column,
accessible via the \code{score} accessor; defaults to \code{NA} upon export.}
\item{strand}{strand factor (NA's allowed) in the \code{strand} column,
accessible via the \code{strand} accessor; defaults to \code{NA} upon export.}
\item{phase}{integer vector, either 0, 1 or 2 (NA's allowed); defaults to
\code{NA} upon export.}
\item{group}{a factor (GFF1 only); defaults to the \code{seqid} (e.g.,
chromosome) on export.}
}

In GFF versions 2 and 3, attributes map to arbitrary columns in the result.
In GFF3, some attributes (\code{Parent}, \code{Alias}, \code{Note}, \code{DBxref} and
\code{Ontology_term}) can have multiple, comma-separated values; these columns are
thus always \code{CharacterList} objects.
}
\section{Functions}{
\itemize{
\item \code{export.gff()}: 

\item \code{export.gff(ANY)}: 

\item \code{export(object = ANY, con = GFFFile, format = ANY)}: 

\item \code{export(object = CompressedGRangesList, con = GFFFile, format = ANY)}: 

\item \code{export(object = GenomicRanges, con = GFFFile, format = ANY)}: 

\item \code{export(object = SimpleGRangesList, con = GFFFile, format = ANY)}: 

\item \code{export.gff1()}: 

\item \code{export.gff1(ANY)}: 

\item \code{export.gff2()}: 

\item \code{export.gff2(ANY)}: 

\item \code{export.gff3()}: 

\item \code{export.gff3(ANY)}: 

\item \code{import(con = GFFFile, format = ANY, text = ANY)}: 

\item \code{import.gff1()}: 

\item \code{import.gff1(ANY)}: 

\item \code{import.gff2()}: 

\item \code{import.gff2(ANY)}: 

\item \code{import.gff3()}: 

\item \code{import.gff3(ANY)}: 

\item \code{genome(GFFFile)}: Gets the genome identifier from the "genome-build"
header directive.

}}
\section{GFFFile objects}{
 The \code{GFFFile} class extends
\code{\link[BiocIO:BiocFile-class]{BiocFile}} and is a formal
representation of a resource in the GFF format.  To cast a path, URL or
connection to a \code{GFFFile}, pass it to the \code{GFFFile} constructor. The
\code{GFF1File}, \code{GFF2File}, \code{GFF3File}, \code{GVFFile} and \code{GTFFile} classes all
extend \code{GFFFile} and indicate a particular version of the format.
}

\examples{

test_gff3 <- system.file(
    "extdata", "genes.gff3", package = "Bioc.gff", mustWork = TRUE
)

## basic import
test <- import(test_gff3)
test

## import.gff functions
import.gff(test_gff3)
import.gff3(test_gff3)

## GFFFile derivatives
test_gff_file <- GFF3File(test_gff3)
import(test_gff_file)
test_gff_file <- GFFFile(test_gff3)
import(test_gff_file)
test_gff_file <- GFFFile(test_gff3, version = "3")
import(test_gff_file)

## from connection
test_gff_con <- file(test_gff3)
test <- import(test_gff_con, format = "gff")

## various arguments
import(test_gff3, genome = "hg19")
import(test_gff3, colnames = character())
import(test_gff3, colnames = c("type", "geneName"))

## 'which'
library(GenomicRanges)
which <- GRanges("chr10:90000-93000")
import(test_gff3, which = which)

## 'append'
test_gff3_out <- file.path(tempdir(), "genes.gff3")

export(test[seqnames(test) == "chr10"], test_gff3_out)
export(test[seqnames(test) == "chr12"], test_gff3_out, append = TRUE)
import(test_gff3_out)

## 'index'
export(test, test_gff3_out, index = TRUE)
test_bed_gz <- paste(test_gff3_out, ".bgz", sep = "")
import(test_bed_gz, which = which)

## cleanup
file.remove(
    test_gff3_out, test_bed_gz, paste(test_bed_gz, "tbi", sep = ".")
)

}
\references{
\itemize{
\item GFF1, GFF2: \url{http://www.sanger.ac.uk/resources/software/gff/spec.html}
\item GFF3: \url{http://www.sequenceontology.org/gff3.shtml}
\item GVF: \url{http://www.sequenceontology.org/resources/gvf.html}
\item GTF: \url{http://mblab.wustl.edu/GTF22.html}
}
}
\author{
Michael Lawrence
}
\keyword{classes}
\keyword{methods}
