\name{BSgenome.Hsapiens.UCSC.hg19.masked}
\docType{package}

\alias{BSgenome.Hsapiens.UCSC.hg19.masked-package}
\alias{BSgenome.Hsapiens.UCSC.hg19.masked}

\title{Full masked genome sequences for Homo sapiens (UCSC version hg19, based on GRCh37.p13)}

\description{
  Full genome sequences for Homo sapiens (Human) as provided by UCSC (hg19, based on GRCh37.p13) and stored in Biostrings objects. The sequences are the same as in BSgenome.Hsapiens.UCSC.hg19, except that each of them has the 4 following masks on top: (1) the mask of assembly gaps (AGAPS mask), (2) the mask of intra-contig ambiguities (AMB mask), (3) the mask of repeats from RepeatMasker (RM mask), and (4) the mask of repeats from Tandem Repeats Finder (TRF mask). Only the AGAPS and AMB masks are "active" by default.
}

\details{
  
}

\note{
  The masks in this BSgenome data package were made from the following
  source data files:
  \preformatted{
AGAPS masks: gap.txt.gz, downloaded from https://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ on March 24, 2020
RM masks: hg19.fa.out.gz, downloaded from https://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/latest/ on March 24, 2020
TRF masks: hg19.trf.bed.gz, downloaded from https://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/latest/ on March 24, 2020
  }

  See \code{?\link[BSgenome.Hsapiens.UCSC.hg19]{BSgenome.Hsapiens.UCSC.hg19}} in the
  \pkg{BSgenome.Hsapiens.UCSC.hg19} package for information about how the sequences
  were obtained.

  See \code{?\link[BSgenome]{BSgenomeForge}} and the BSgenomeForge
  vignette (\code{vignette("BSgenomeForge")}) in the \pkg{BSgenome}
  software package for how to make a BSgenome data package.
}

\author{The Bioconductor Dev Team}

\seealso{
  \itemize{
    \item \link[BSgenome.Hsapiens.UCSC.hg19]{BSgenome.Hsapiens.UCSC.hg19} in the \pkg{BSgenome.Hsapiens.UCSC.hg19} package
          for information about how the sequences were obtained.
    \item \link[BSgenome]{BSgenome} objects and the
          \code{\link[BSgenome]{available.genomes}} function
          in the \pkg{BSgenome} software package.
    \item \link[Biostrings]{MaskedDNAString} objects in the \pkg{Biostrings}
          package.
    \item The BSgenomeForge vignette (\code{vignette("BSgenomeForge")})
          in the \pkg{BSgenome} software package for how to make a BSgenome
          data package.
  }
}

\examples{
BSgenome.Hsapiens.UCSC.hg19.masked
genome <- BSgenome.Hsapiens.UCSC.hg19.masked
head(seqlengths(genome))
genome$chr1  # a MaskedDNAString object!
## To get rid of the masks altogether:
unmasked(genome$chr1)  # same as BSgenome.Hsapiens.UCSC.hg19$chr1

if ("AGAPS" \%in\% masknames(genome)) {

  ## Check that the assembly gaps contain only Ns:
  checkOnlyNsInGaps <- function(seq)
  {
    ## Replace all masks by the inverted AGAPS mask
    masks(seq) <- gaps(masks(seq)["AGAPS"])
    unique_letters <- uniqueLetters(seq)
    if (any(unique_letters != "N"))
        stop("assembly gaps contain more than just Ns")
  }

  ## A message will be printed each time a sequence is removed
  ## from the cache:
  options(verbose=TRUE)

  for (seqname in seqnames(genome)) {
    cat("Checking sequence", seqname, "... ")
    seq <- genome[[seqname]]
    checkOnlyNsInGaps(seq)
    cat("OK\n")
  }
}

## See the GenomeSearching vignette in the BSgenome software
## package for some examples of genome-wide motif searching using
## Biostrings and the BSgenome data packages:
if (interactive())
    vignette("GenomeSearching", package="BSgenome")
}

\keyword{package}
\keyword{data}
