params <-
list(test = FALSE)

## ----setup, include=FALSE, message=FALSE--------------------------------------
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(BiocStyle)

## ----eval=FALSE---------------------------------------------------------------
# if (!require("BiocManager", quietly = TRUE)) {
#   install.packages("BiocManager")
# }
# 
# BiocManager::install("spicyWorkflow")

## ----load libraries, echo=FALSE, results="hide", warning=FALSE----------------
suppressPackageStartupMessages({
  library(cytomapper)
  library(dplyr)
  library(ggplot2)
  library(simpleSeg)
  library(FuseSOM)
  library(ggpubr)
  library(scater)
  library(spicyR)
  library(ClassifyR)
  library(lisaClust)
  library(Statial)
  library(tidySingleCellExperiment)
  library(SpatialExperiment)
  library(SpatialDatasets)
})

## ----eval=FALSE---------------------------------------------------------------
# library(cytomapper)
# library(dplyr)
# library(ggplot2)
# library(simpleSeg)
# library(FuseSOM)
# library(ggpubr)
# library(scater)
# library(spicyR)
# library(ClassifyR)
# library(lisaClust)
# library(Statial)
# library(tidySingleCellExperiment)
# library(SpatialExperiment)
# library(SpatialDatasets)

## ----set parameters-----------------------------------------------------------
use_mc <- TRUE

if (use_mc) {
  nCores <- max(parallel::detectCores()/2, 1)
} else {
  nCores <- 2
}
BPPARAM <- simpleSeg:::generateBPParam(nCores)

theme_set(theme_classic())

## ----load images--------------------------------------------------------------
pathToImages <- SpatialDatasets::Ferguson_Images()
tmp <- tempfile()
unzip(pathToImages, exdir = tmp)

# Store images in a CytoImageList on_disk as h5 files to save memory.
images <- cytomapper::loadImages(
  tmp,
  single_channel = TRUE,
  on_disk = TRUE,
  h5FilesPath = HDF5Array::getHDF5DumpDir(),
  BPPARAM = BPPARAM
)

mcols(images) <- S4Vectors::DataFrame(imageID = names(images))

## -----------------------------------------------------------------------------

cn <- channelNames(images) # Read in channel names
head(cn)

cn <- sub(".*_", "", cn) # Remove preceding letters
cn <- sub(".ome", "", cn) # Remove the .ome
head(cn)

channelNames(images) <- cn # Reassign channel names


## -----------------------------------------------------------------------------
head(names(images))

nam <- sapply(strsplit(names(images), "_"), `[`, 3)
head(nam)

names(images) <- nam # Reassigning image names
mcols(images)[["imageID"]] <- nam # Reassigning image names

## -----------------------------------------------------------------------------
masks <- simpleSeg(images,
                   nucleus = c("HH3"),
                   pca = TRUE,
                   cellBody = "dilate",
                   discSize = 3,
                   sizeSelection = 20,
                   transform = "sqrt",
                   tissue = c("panCK", "CD45", "HH3"),
                   cores = nCores
                   )

## ----visualise segmentation---------------------------------------------------
EBImage::display(colorLabels(masks[[1]]))

## -----------------------------------------------------------------------------
plotPixels(image = images["F3"], 
           mask = masks["F3"],
           img_id = "imageID", 
           colour_by = c("HH3"), 
           display = "single",
           colour = list(HH3 = c("black","blue")),
           legend = NULL,
           bcg = list(
             HH3 = c(1, 1, 2)
           ))

## -----------------------------------------------------------------------------
plotPixels(image = images["F3"], 
           mask = masks["F3"],
           img_id = "imageID", 
           colour_by = c("HH3", "CD31", "FX111A"), 
           display = "single",
           colour = list(HH3 = c("black","blue"),
                         CD31 = c("black", "red"),
                         FX111A = c("black", "green") ),
           legend = NULL,
           bcg = list(
             HH3 = c(1, 1, 2),
             CD31 = c(0, 1, 2),
             FX111A = c(0, 1, 1.5)
           ))

## -----------------------------------------------------------------------------
# Summarise the expression of each marker in each cell
cells <- cytomapper::measureObjects(masks,
                                    images,
                                    img_id = "imageID",
                                    return_as = "spe",
                                    BPPARAM = BPPARAM)

spatialCoordsNames(cells) <- c("x", "y")

## -----------------------------------------------------------------------------
clinical <- read.csv(
  system.file(
    "extdata/clinicalData_TMA1_2021_AF.csv",
    package = "spicyWorkflow"
  )
)

rownames(clinical) <- clinical$imageID
clinical <- clinical[names(images), ]


## -----------------------------------------------------------------------------
colData(cells) <- cbind(colData(cells), clinical[cells$imageID, ])

## ----eval=FALSE---------------------------------------------------------------
# save(cells, file = "spe_Ferguson_2022.rda")

## ----eval=FALSE---------------------------------------------------------------
# load(system.file("extdata/cells.rda", package = "spicyWorkflow"))

## ----fig.width=5, fig.height=5------------------------------------------------
# Plot densities of CD3 for each image.
cells |> 
  join_features(features = rownames(cells), shape = "wide", assay = "counts") |> 
  ggplot(aes(x = CD3, colour = imageID)) + 
  geom_density() + 
  theme(legend.position = "none")

## -----------------------------------------------------------------------------
# Usually we specify a subset of the original markers which are informative to separating out distinct cell types for the UMAP and clustering.
ct_markers <- c("podoplanin", "CD13", "CD31",
                "panCK", "CD3", "CD4", "CD8a",
                "CD20", "CD68", "CD16", "CD14", "HLADR", "CD66a")

# ct_markers <- c("podoplanin", "CD13", "CD31",
#                 "panCK", "CD3", "CD4", "CD8a",
#                 "CD20", "CD68", "CD14", "CD16",
#                 "CD66a")

set.seed(51773)
# Perform dimension reduction using UMAP.
cells <- scater::runUMAP(
  cells,
  subset_row = ct_markers,
  exprs_values = "counts"
)

# Select a subset of images to plot.
someImages <- unique(cells$imageID)[c(1, 5, 10, 20, 30, 40)]

# UMAP by imageID.
scater::plotReducedDim(
  cells[, cells$imageID %in% someImages],
  dimred = "UMAP",
  colour_by = "imageID"
)

## ----fig.width=5, fig.height=5------------------------------------------------
# Leave out the nuclei markers from our normalisation process. 
useMarkers <- rownames(cells)[!rownames(cells) %in% c("DNA1", "DNA2", "HH3")]

# Transform and normalise the marker expression of each cell type.
cells <- normalizeCells(cells,
                        markers = useMarkers,
                        transformation = NULL,
                        method = c("trim99", "mean", "PC1"),
                        assayIn = "counts",
                        cores = nCores
)

# Plot densities of CD3 for each image
cells |> 
  join_features(features = rownames(cells), shape = "wide", assay = "norm") |> 
  ggplot(aes(x = CD3, colour = imageID)) + 
  geom_density() + 
  theme(legend.position = "none")

## -----------------------------------------------------------------------------
set.seed(51773)
# Perform dimension reduction using UMAP.
cells <- scater::runUMAP(
  cells,
  subset_row = ct_markers,
  exprs_values = "norm",
  name = "normUMAP"
)

someImages <- unique(cells$imageID)[c(1, 5, 10, 20, 30, 40)]

# UMAP by imageID.
scater::plotReducedDim(
  cells[, cells$imageID %in% someImages],
  dimred = "normUMAP",
  colour_by = "imageID"
)

## ----FuseSOM------------------------------------------------------------------
# Set seed.
set.seed(51773)

# Generate SOM and cluster cells into 10 groups
cells <- runFuseSOM(
  cells,
  markers = ct_markers,
  assay = "norm",
  numClusters = 10
)


## -----------------------------------------------------------------------------
cells <- estimateNumCluster(cells, kSeq = 2:30)
optiPlot(cells, method = "gap")

## -----------------------------------------------------------------------------
# Visualise marker expression in each cluster.
scater::plotGroupedHeatmap(
  cells,
  features = ct_markers,
  group = "clusters",
  exprs_values = "norm",
  center = TRUE,
  scale = TRUE,
  zlim = c(-3, 3),
  cluster_rows = FALSE,
  block = "clusters"
)

## -----------------------------------------------------------------------------
cells <- cells |>
  mutate(cellType = case_when(
    clusters == "cluster_1" ~ "GC", # Granulocytes
    clusters == "cluster_2" ~ "MC", # Myeloid cells
    clusters == "cluster_3" ~ "SC", # Squamous cells
    clusters == "cluster_4" ~ "EP", # Epithelial cells
    clusters == "cluster_5" ~ "SC", # Squamous cells
    clusters == "cluster_6" ~ "TC_CD4", # CD4 T cells
    clusters == "cluster_7" ~ "BC", # B cells
    clusters == "cluster_8" ~ "EC", # Endothelial cells
    clusters == "cluster_9" ~ "TC_CD8", # CD8 T cells
    clusters == "cluster_10" ~ "DC" # Dendritic cells
  ))

## -----------------------------------------------------------------------------
reducedDim(cells, "spatialCoords") <- spatialCoords(cells)

cells |> 
  filter(imageID == "F3") |> 
  plotReducedDim("spatialCoords", colour_by = "cellType")


## -----------------------------------------------------------------------------
# Check cell type frequencies.
cells$cellType |>
  table() |>
  sort()

## -----------------------------------------------------------------------------
# UMAP by cell type
scater::plotReducedDim(
  cells[, cells$imageID %in% someImages],
  dimred = "normUMAP",
  colour_by = "cellType"
)

## -----------------------------------------------------------------------------
# Perform simple student's t-tests on the columns of the proportion matrix.
testProp <- colTest(cells, 
                    condition = "group", 
                    feature = "cellType",
                    type = "ttest")

head(testProp)

## -----------------------------------------------------------------------------
prop <- getProp(cells, feature = "cellType")
prop[1:5, 1:5]

## -----------------------------------------------------------------------------
clusterToUse <- rownames(testProp)[1]

prop |>
  select(all_of(clusterToUse)) |>
  tibble::rownames_to_column("imageID") |>
  left_join(clinical, by = "imageID") |>
  ggplot(aes(x = group, y = .data[[clusterToUse]], fill = group)) +
  geom_boxplot()

## ----eval=FALSE---------------------------------------------------------------
# load(system.file("extdata/computed_cells.rda", package = "spicyWorkflow"))

## -----------------------------------------------------------------------------
spicyTest <- spicy(cells,
                   condition = "group",
                   cellTypeCol = "cellType",
                   imageIDCol = "imageID",
                   Rs = 1:10*10,
                   sigma = 50,
                   BPPARAM = BPPARAM)

topPairs(spicyTest, n = 10)


## -----------------------------------------------------------------------------
# Visualise which relationships are changing the most.
signifPlot(
  spicyTest,
  breaks = c(-1.5, 1.5, 0.5)
)

## -----------------------------------------------------------------------------
spicyBoxPlot(spicyTest, 
             from = "SC", 
             to = "GC")

## -----------------------------------------------------------------------------
spicyBoxPlot(spicyTest, 
             rank = 1)

## -----------------------------------------------------------------------------
set.seed(51773)

# Cluster cells into spatial regions with similar composition.
cells <- lisaClust(
  cells,
  k = 4,
  sigma = 50,
  cellType = "cellType",
  BPPARAM = BPPARAM
)

## ----fig.height=5, fig.width=5------------------------------------------------
# Visualise the enrichment of each cell type in each region
regionMap(cells, cellType = "cellType", limit = c(0.2, 2))

## ----message=FALSE, warning=FALSE---------------------------------------------
cells |> 
  filter(imageID == "F3") |> 
  plotReducedDim("spatialCoords", colour_by = "region")

## -----------------------------------------------------------------------------
# Use hatching to visualise regions and cell types.
hatchingPlot(
  cells,
  useImages = "F3",
  cellType = "cellType",
  nbp = 300
)

## -----------------------------------------------------------------------------
# Test if the proportion of each region is associated
# with progression status.
testRegion <- colTest(
  cells,
  feature = "region",
  condition = "group",
  type = "ttest"
)

testRegion

## -----------------------------------------------------------------------------
cells$m.cx <- spatialCoords(cells)[,"x"]
cells$m.cy <- spatialCoords(cells)[,"y"]

cells <- getDistances(cells,
  maxDist = 200,
  nCores = nCores,
  cellType = "cellType",
  spatialCoords = c("m.cx", "m.cy")
)

## -----------------------------------------------------------------------------
p <- plotStateChanges(
  cells = cells,
  cellType = "cellType",
  spatialCoords = c("m.cx", "m.cy"),
  type = "distances",
  image = "F3",
  from = "SC",
  to = "EP",
  marker = "podoplanin",
  size = 1,
  shape = 19,
  interactive = FALSE,
  plotModelFit = FALSE,
  method = "lm"
)

p

## -----------------------------------------------------------------------------
state_dist <- calcStateChanges(
  cells = cells,
  cellType = "cellType",
  type = "distances",
  assay = 2,
  nCores = nCores,
  minCells = 100
)

head(state_dist[state_dist$imageID == "F3",], n = 10)

## -----------------------------------------------------------------------------
# Preparing outcome vector
outcome <- cells$group[!duplicated(cells$imageID)]
names(outcome) <- cells$imageID[!duplicated(cells$imageID)]

# Preparing features for Statial
distMat <- prepMatrix(state_dist)

distMat <- distMat[names(outcome), ]

# Remove some very small values
distMat <- distMat[, colMeans(abs(distMat) > 0.0001) > .8]

survivalResults <- colTest(distMat, outcome, type = "ttest")

head(survivalResults)

## -----------------------------------------------------------------------------
fergusonTree <- treekoR::getClusterTree(t(assay(cells, "norm")),
                                        cells$cellType,
                                        hierarchy_method="hopach")

parent1 <- c("TC_CD8", "TC_CD4", "DC")
parent2 <- c("BC", "GC")
parent3 <- c(parent1, parent2)

parent4 <- c("MC", "EP", "SC")
parent5 <- c(parent4, "EC")

all = c(parent1, parent2, parent3, parent4, parent5)

treeDf = Statial::parentCombinations(all, parent1, parent2, parent3, parent4, parent5)

fergusonTree$clust_tree |> plot()

## -----------------------------------------------------------------------------
kontext <- Kontextual(
  cells = cells,
  cellType = "cellType",
  spatialCoords = c("m.cx", "m.cy"),
  parentDf = treeDf,
  r = 50,
  cores = nCores
)

## -----------------------------------------------------------------------------
# Converting Kontextual result into data matrix
kontextMat <- prepMatrix(kontext)

# Replace NAs with 0
kontextMat[is.na(kontextMat)] <- 0

survivalResults <- spicyR::colTest(kontextMat, outcome, type = "ttest")

head(survivalResults)


## -----------------------------------------------------------------------------
# Create list to store data.frames
data <- list()

# Add proportions of each cell type in each image
data[["Proportions"]] <- getProp(cells, "cellType")

# Add pair-wise associations
spicyMat <- bind(spicyTest)
spicyMat[is.na(spicyMat)] <- 0
spicyMat <- spicyMat |>
  select(!condition) |>
  tibble::column_to_rownames("imageID")

data[["SpicyR"]] <- spicyMat

# Add proportions of each region in each image
# to the list of dataframes.
data[["LisaClust"]] <- getProp(cells, "region")


# Add SpatioMark features
data[["SpatioMark"]] <- distMat

# Add Kontextual features
data[["Kontextual"]] <- kontextMat

## -----------------------------------------------------------------------------
# Set seed
set.seed(51773)

# Perform cross-validation of a random forest model
# with 100 repeats of 5-fold cross-validation.
cv <- crossValidate(
  measurements = data,
  outcome = outcome,
  classifier = "randomForest",
  nFolds = 5,
  nRepeats = 50,
  nCores = nCores
)

## -----------------------------------------------------------------------------
# Calculate AUC for each cross-validation repeat and plot.
performancePlot(
  cv,
  metric = "AUC",
  characteristicsList = list(x = "Assay Name"),
  orderingList = list("Assay Name" = c("Proportions", "SpicyR", "LisaClust", "Kontextual", "SpatioMark"))
)

## -----------------------------------------------------------------------------
samplesMetricMap(cv)

## -----------------------------------------------------------------------------
sessionInfo()