### =========================================================================
### SparseArray subsetting
### -------------------------------------------------------------------------


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### .subset_SVT_by_Lindex()
### .subset_SVT_by_Mindex()
###
### Both return a vector (atomic or list) of the same type() as 'x'.
###

propagate_names_if_1D <- function(ans, x_dimnames, index)
{
    if (length(x_dimnames) != 1L)
        return(ans)
    stopifnot(is.list(x_dimnames))
    x_names <- x_dimnames[[1L]]
    if (is.null(x_names))
        return(ans)
    stopifnot(is.character(x_names),
              identical(length(ans), length(index)))
    setNames(ans, x_names[index])
}

### 'Lindex' must be a numeric vector (integer or double), possibly a long one.
### NA indices are accepted.
.subset_SVT_by_Lindex <- function(x, Lindex)
{
    stopifnot(is(x, "SVT_SparseArray"))
    check_svt_version(x)
    stopifnot(is.vector(Lindex), is.numeric(Lindex))
    on.exit(free_global_OPBufTree())
    ans <- SparseArray.Call("C_subset_SVT_by_Lindex",
                            x@dim, x@type, x@SVT, FALSE, Lindex)
    propagate_names_if_1D(ans, dimnames(x), Lindex)
}

setMethod("subset_Array_by_Lindex", "SVT_SparseArray", .subset_SVT_by_Lindex)

### Alright, '.subset_SVT_by_Mindex(x, Mindex)' could just have done:
###
###     .subset_SVT_by_Lindex(x, Mindex2Lindex(Mindex, dim(x)))
###
### However, the C code in C_subset_SVT_by_Mindex() avoids the Mindex2Lindex()
### step and so should be slightly more efficient, at least in theory. But is
### it? Some quick testing suggests that there's actually no significant
### difference!
### TODO: Investigate this more.
.subset_SVT_by_Mindex <- function(x, Mindex)
{
    stopifnot(is(x, "SVT_SparseArray"))
    check_svt_version(x)
    stopifnot(is.matrix(Mindex))
    x_dimnames <- dimnames(x)
    if (!is.numeric(Mindex)) {
        if (!is.character(Mindex))
            stop(wmsg("invalid matrix subscript type \"", type(Mindex), "\""))
        if (is.null(x_dimnames))
            stop(wmsg("SparseArray object to subset has no dimnames"))
        ## Subsetting an ordinary array with dimnames on it by a character
        ## matrix is supported in base R but we don't support this yet for
        ## SparseArray objects.
        stop("subsetting a SparseArray object by a character matrix ",
             "is not supported at the moment")
    }
    on.exit(free_global_OPBufTree())
    ans <- SparseArray.Call("C_subset_SVT_by_Mindex",
                            x@dim, x@type, x@SVT, FALSE, Mindex)
    propagate_names_if_1D(ans, x_dimnames, Mindex)
}

setMethod("subset_Array_by_Mindex", "SVT_SparseArray", .subset_SVT_by_Mindex)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### .subset_SVT_as_Rarray()
###
### Exomorphic N-dimensional subsetting of an SVT_SparseArray object.
###
### Equivalent to 'as.array(.subset_SVT_as_SVT(x, Nindex))' but slightly
### more efficient.

.subset_SVT_as_Rarray <- function(x, Nindex, ignore.dimnames=FALSE)
{
    stopifnot(is(x, "SVT_SparseArray"), isTRUEorFALSE(ignore.dimnames))
    check_svt_version(x)

    new_dim <- S4Arrays:::get_Nindex_lengths(Nindex, x@dim)
    Noffs <- Nindex2Noffs(Nindex)
    ans <- SparseArray.Call("C_subset_SVT_as_Rarray",
                            x@dim, x@type, x@SVT, FALSE, Noffs)
    if (!ignore.dimnames)
        dimnames(ans) <- S4Arrays:::subset_dimnames_by_Nindex(x@dimnames,
                                                              Nindex)
    ans
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### .subset_SVT_as_SVT()
###
### Endomorphic N-dimensional subsetting of an SVT_SparseArray object.
###
### Like the 'index' argument in 'extract_array()', the 'Nindex' argument
### must be a **normalized** N-index, that is, a list with one list element
### per dimension in 'x' where each list element is either a NULL or an
### integer vector of valid indices along the corresponding dimension in 'x'.
###
### Returns an SVT_SparseArray object of the same type() as 'x' (endomorphism).

.subset_SVT_as_SVT <- function(x, Nindex, ignore.dimnames=FALSE)
{
    stopifnot(is(x, "SVT_SparseArray"), isTRUEorFALSE(ignore.dimnames))
    check_svt_version(x)

    new_dim <- S4Arrays:::get_Nindex_lengths(Nindex, x@dim)
    Noffs <- Nindex2Noffs(Nindex)
    new_SVT <- SparseArray.Call("C_subset_SVT_as_SVT",
                                x@dim, x@type, x@SVT, Noffs)

    ## Compute 'new_dimnames'.
    if (is.null(dimnames(x)) || ignore.dimnames) {
        new_dimnames <- vector("list", length(x@dim))
    } else {
        new_dimnames <- S4Arrays:::subset_dimnames_by_Nindex(x@dimnames, Nindex)
    }
    BiocGenerics:::replaceSlots(x, dim=new_dim,
                                   dimnames=new_dimnames,
                                   SVT=new_SVT,
                                   check=FALSE)
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### subset_Array_by_Nindex(), extract_array(), and extract_sparse_array()
### methods for SVT_SparseArray objects
###

setMethod("subset_Array_by_Nindex", "SVT_SparseArray",
    function(x, Nindex, drop=TRUE)
    {
        ans_dim <- S4Arrays:::get_Nindex_lengths(Nindex, x@dim)
        if (drop && sum(ans_dim != 1L) <= 1L) {
            ans <- .subset_SVT_as_Rarray(x, Nindex)
            return(S4Arrays:::drop_even_if_1D(ans))
        }
        ans <- .subset_SVT_as_SVT(x, Nindex)
        if (drop)
            ans <- drop(ans)
        ans
    }
)

setMethod("extract_array", "SVT_SparseArray",
    function(x, index) .subset_SVT_as_Rarray(x, index, ignore.dimnames=TRUE)
)

### No need to propagate the dimnames.
setMethod("extract_sparse_array", "SVT_SparseArray",
    function(x, index) .subset_SVT_as_SVT(x, index, ignore.dimnames=TRUE)
)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### extract_sparse_array() and extract_array() methods for COO_SparseArray
### objects
###

### IMPORTANT NOTE: The returned COO_SparseArray object is guaranteed to be
### **correct** ONLY if the subscripts in 'index' do NOT contain duplicates!
### If they contain duplicates, the correct COO_SparseArray object to return
### should contain repeated nonzero data. However, in order to keep it as
### efficient as possible, the code below does NOT repeat the nonzero data
### that corresponds to duplicates subscripts. It does not check for
### duplicates in 'index' either because this check could have a
### significant cost.
### All this is OK because .extract_COO_SparseArray_subset() should
### always be used in a context where 'index' does NOT contain duplicates.
### The only situation where 'index' CAN contain duplicates is when
### .extract_COO_SparseArray_subset() is called by
### .extract_array_from_COO_SparseArray(), in which case the
### missing nonzero data are added later.
.extract_COO_SparseArray_subset <- function(x, index)
{
    stopifnot(is(x, "COO_SparseArray"))
    ans_dim <- S4Arrays:::get_Nindex_lengths(index, dim(x))
    x_nzcoo <- x@nzcoo
    for (along in seq_along(ans_dim)) {
        i <- index[[along]]
        if (is.null(i))
            next
        x_nzcoo[ , along] <- match(x_nzcoo[ , along], i)
    }
    ## Note that calling rowAnyNAs() on ordinary matrix 'x_nzcoo' would
    ## also work as it would call the rowAnyNAs() S4 generic defined in
    ## MatrixGenerics, and the latter would eventually dispatch on
    ## matrixStats::rowAnyNAs(). However, calling matrixStats::rowAnyNAs()
    ## should be slightly more efficient. Also note that this call is the
    ## only reason why we list matrixStats in the Imports field.
    keep_idx <- which(!matrixStats::rowAnyNAs(x_nzcoo))
    ans_nzcoo <- x_nzcoo[keep_idx, , drop=FALSE]
    ans_nzdata <- x@nzdata[keep_idx]
    COO_SparseArray(ans_dim, ans_nzcoo, ans_nzdata, check=FALSE)
}
setMethod("extract_sparse_array", "COO_SparseArray",
    .extract_COO_SparseArray_subset
)

.extract_array_from_COO_SparseArray <- function(x, index)
{
    coo0 <- .extract_COO_SparseArray_subset(x, index)
    ## If the subscripts in 'index' contain duplicates, 'coo0' is
    ## "incomplete" in the sense that it does not contain the nonzero data
    ## that should have been repeated according to the duplicates in the
    ## subscripts (see IMPORTANT NOTE above).
    ans0 <- as.array(coo0)
    ## We "complete" 'ans0' by repeating the nonzero data according to the
    ## duplicates present in 'index'. Note that this is easy and cheap to
    ## do now because 'ans0' uses a dense representation (it's an ordinary
    ## array). This would be harder to do **natively** on the
    ## COO_SparseArray form (i.e. without converting to dense first
    ## then back to sparse).
    sm_index <- lapply(index,
        function(i) {
            if (is.null(i))
                return(NULL)
            sm <- match(i, i)
            if (isSequence(sm))
                return(NULL)
            sm
        })
    if (all(S4Vectors:::sapply_isNULL(sm_index)))
        return(ans0)
    S4Arrays:::subset_by_Nindex(ans0, sm_index)
}
setMethod("extract_array", "COO_SparseArray",
    .extract_array_from_COO_SparseArray
)

