Load packages

library(LinQView)
library(Seurat)
library(cowplot)
library(tidyverse)
library(ggplot2)

Step 1 Load data from 10X folder

# Load in the RNA UMI matrix
cbmc.rna <- as.sparse(x = read.csv(file = "../Data/citeseq/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz", 
    sep = ",", header = TRUE, row.names = 1))
cbmc.rna <- CollapseSpeciesExpressionMatrix(object = cbmc.rna)
# Load in the ADT UMI matrix
cbmc.adt <- as.sparse(x = read.csv(file = "../Data/citeseq/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz", 
    sep = ",", header = TRUE, row.names = 1))
cbmc.adt <- cbmc.adt[setdiff(x = rownames(x = cbmc.adt), y = c("CCR5", "CCR7", "CD10")), 
    ]

Step 2 Create object

t1 <- Sys.time()
cbmc <- createObject(rna = cbmc.rna, adt = cbmc.adt)
t2 <- Sys.time()
t2 - t1

Step 3 Pre-process

1) Filter out unwanted cells (optional)

for this dataset, we don’t need to filter out unwanted cells

cbmc <- subset(cbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 
    softThreshold(cbmc))

2) Remove unwanted genes (optional)

for this dataset, we don’t need to filter out unwanted genes

# cbmc <- removeGene(object = cbmc,pattern = '^IG[HKL]')

3) Normalization

data Normalization for both ADT (CLR) and RNA (log)

t1 <- Sys.time()
cbmc <- dataNormalization(object = cbmc)
t2 <- Sys.time()
t2 - t1

4) Indentify HVGs for RNA data

Call seurat function to identify highly variable genes (HVG) for RNA data

t1 <- Sys.time()
cbmc <- FindVariableFeatures(object = cbmc)  # directly use Seurat Function
t2 <- Sys.time()
t2 - t1

5) Data scaling

Scale data for both ADT and RNA

t1 <- Sys.time()
cbmc <- dataScaling(object = cbmc)
t2 <- Sys.time()
t2 - t1

Step 4 Linear dimension reduction (PCA)

directly call Seurat function for linear dimension reduction (PCA)

t1 <- Sys.time()
cbmc <- RunPCA(cbmc, features = VariableFeatures(object = cbmc), verbose = FALSE)  # directly use Seurat Function
t2 <- Sys.time()
t2 - t1

Step 5 Determine number of PCs

call Seurat function JackStraw to determine number of PCs

# cbmc <- JackStraw(cbmc, num.replicate = 100) cbmc <- ScoreJackStraw(cbmc, dims
# = 1:20) JackStrawPlot(cbmc, dims = 1:15)
ElbowPlot(cbmc)

Step 6 Distance calculation and joint distance calculation

calculate cell-cell distances for RNA, ADT and joint. alpha was set to 0.5 as initial, number of PC was set to 20 by default.

t1 <- Sys.time()
cbmc <- jointDistance(object = cbmc, keep.rna = TRUE, keep.adt = TRUE)
t2 <- Sys.time()
t2 - t1

Step 7 Non-linear dimension reduction (UMAP and t-SNE)

run UMAP as Non-linear dimension reduction for RNA, ADT and joint analysis.

t1 <- Sys.time()
# cbmc <- umapFromDistane(object = cbmc, assay = 'All')
cbmc <- tsneFromDistane(object = cbmc, assay = "All")
t2 <- Sys.time()
t2 - t1

Step 8 Clustering

t1 <- Sys.time()
cbmc <- clusteringFromDistance(object = cbmc, assay = "All", resolution = c(0.9, 
    0.9, 0.9))
t2 <- Sys.time()
t2 - t1

# contribution of two modalities
distHeatMap(object = cbmc)

Step 9 Visualization ADT vs RNA vs Joint

1) Cell clusters

# gridDimPlot(cbmc, wide.rel = 1.5, legend = FALSE, reduction.prefix = 'tsne_',
# height.rel = 0.5)

plots <- generateGridDimPlot(cbmc, legend = FALSE, darkTheme = FALSE)
listPlot(object = plots, labels = "")

###### user also can only plot some of those plots by index, figure ident or figure
###### map info
listPlot(object = plots, fig.ident = "RNA", ncol = 2)

listPlot(object = plots, fig.ident = "RNA", fig.map = "RNA")

###### user can use plotInfo() function to get index, figure ident and figure map
###### information, then plot figures by index
plotInfo(plots)
listPlot(object = plots, fig.id = 1)

2) Heat maps

# Heatmap for joint clusters
heatMapPlot(object = cbmc, group.by = "jointClusterID", height.rel = 1, adt.label = TRUE)

# Heatmap for RNA clusters
heatMapPlot(object = cbmc, group.by = "rnaClusterID", height.rel = 1, adt.label = TRUE)

# Heatmap for ADT clusters
heatMapPlot(object = cbmc, group.by = "adtClusterID", height.rel = 1, adt.label = TRUE)

VlnPlot(cbmc, features = c("rna_CD8A", "adt_CD8", "rna_NCAM1", "adt_CD56", "rna_CD3G", 
    "adt_CD3"), group.by = "jointClusterID", pt.size = 0, ncol = 2)

ROGUE score + ADT score

joint.score.rna <- scoreRNA(cbmc, group.by = "jointClusterID", platform = "UMI")

joint.score.adt <- scoreADT(cbmc, group.by = "jointClusterID", k = "auto", rank = 2)

rna.score.rna <- scoreRNA(cbmc, group.by = "rnaClusterID", platform = "UMI")

rna.score.adt <- scoreADT(cbmc, group.by = "rnaClusterID", k = "auto", rank = 2)

adt.score.rna <- scoreRNA(cbmc, group.by = "adtClusterID", platform = "UMI")

adt.score.adt <- scoreADT(cbmc, group.by = "adtClusterID", k = "auto", rank = 2)

adt.data <- data.frame(score = c(as.numeric(adt.score.rna), as.numeric(adt.score.adt)), 
    label = c(rep("RNA score", 27), rep("ADT score", 27)), cluster = c(colnames(adt.score.rna), 
        colnames(adt.score.rna)))
rna.data <- data.frame(score = c(as.numeric(rna.score.rna), as.numeric(rna.score.adt)), 
    label = c(rep("RNA score", 20), rep("ADT score", 20)), cluster = c(colnames(rna.score.rna), 
        colnames(rna.score.rna)))
joint.data <- data.frame(score = c(as.numeric(joint.score.rna), as.numeric(joint.score.adt)), 
    label = c(rep("RNA score", 23), rep("ADT score", 23)), cluster = c(colnames(joint.score.rna), 
        colnames(joint.score.rna)))

adt.data$cluster <- factor(adt.data$cluster, levels = c(0:26))
rna.data$cluster <- factor(rna.data$cluster, levels = c(0:19))
joint.data$cluster <- factor(joint.data$cluster, levels = c(0:22))

p.adt <- ggplot(adt.data, aes(x = cluster, y = score, fill = label)) + geom_col(position = "dodge", 
    color = "black") + LightTheme() + ylab("") + xlab("") + ylim(0, 1) + scale_fill_brewer(palette = "Accent")
p.rna <- ggplot(rna.data, aes(x = cluster, y = score, fill = label)) + geom_col(position = "dodge", 
    color = "black") + LightTheme() + ylab("") + xlab("") + ylim(0, 1) + scale_fill_brewer(palette = "Accent")
p.joint <- ggplot(joint.data, aes(x = cluster, y = score, fill = label)) + geom_col(position = "dodge", 
    color = "black") + LightTheme() + ylab("") + xlab("") + ylim(0, 1) + scale_fill_brewer(palette = "Accent")

plot_grid(p.joint, p.rna, p.adt, ncol = 1, labels = c("Joint cluster", "RNA cluster", 
    "ADT cluster"), label_x = 0.3)

# export::graph2pdf(x = p.adt, file = './ADT_bar.pdf',width = 10, height = 4)
# export::graph2pdf(x = p.rna, file = './RNA_bar.pdf',width = 10, height = 4)
# export::graph2pdf(x = p.joint, file = './Joint_bar.pdf',width = 10, height = 4)

score <- c(as.numeric(joint.score.rna), as.numeric(joint.score.adt), as.numeric(rna.score.rna), 
    as.numeric(rna.score.adt), as.numeric(adt.score.rna), as.numeric(adt.score.adt))
name <- c(rep("RNA score", 23), rep("ADT score", 23), rep("RNA score", 20), rep("ADT score", 
    20), rep("RNA score", 27), rep("ADT score", 27))
Clustering <- c(rep("LinQ-View", 46), rep("RNA", 40), rep("ADT", 54))
data <- data.frame(score = score, name = name, Clustering = Clustering)

p <- ggplot(data, aes(x = name, y = score, fill = Clustering)) + geom_boxplot(outlier.alpha = 0) + 
    geom_dotplot(binaxis = "y", stackdir = "center", position = position_dodge(0.75), 
        binwidth = 0.01) + LightTheme() + theme(legend.position = "top") + ylab("Purity Score") + 
    xlab("")
p

# export::graph2pdf(x = p, file = './score_box.pdf',width = 4, height = 4)

R session info

sessionInfo()

## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.0     stringr_1.4.0     dplyr_1.0.2       purrr_0.3.4      
##  [5] readr_1.4.0       tidyr_1.1.2       tibble_3.0.4      ggplot2_3.3.2    
##  [9] tidyverse_1.3.0   cowplot_1.1.0     Seurat_3.9.9.9002 LinQView_0.99.0  
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15            colorspace_1.4-1      deldir_0.1-29        
##   [4] ellipsis_0.3.1        ggridges_0.5.2        fs_1.5.0             
##   [7] rstudioapi_0.12       spatstat.data_1.4-3   farver_2.0.3         
##  [10] leiden_0.3.3          listenv_0.8.0         ggrepel_0.8.2        
##  [13] fansi_0.4.1           lubridate_1.7.9       RSpectra_0.16-0      
##  [16] xml2_1.3.2            codetools_0.2-16      splines_3.6.3        
##  [19] knitr_1.30            polyclip_1.10-0       jsonlite_1.7.1       
##  [22] umap_0.2.6.0          broom_0.7.2           ica_1.0-2            
##  [25] cluster_2.1.0         dbplyr_1.4.4          png_0.1-7            
##  [28] uwot_0.1.8            shiny_1.5.0           sctransform_0.3.1    
##  [31] compiler_3.6.3        httr_1.4.2            backports_1.2.0      
##  [34] assertthat_0.2.1      Matrix_1.2-18         fastmap_1.0.1        
##  [37] lazyeval_0.2.2        limma_3.42.2          cli_2.1.0            
##  [40] later_1.1.0.1         formatR_1.7           htmltools_0.5.0      
##  [43] tools_3.6.3           rsvd_1.0.3            igraph_1.2.6         
##  [46] gtable_0.3.0          glue_1.4.2            RANN_2.6.1           
##  [49] reshape2_1.4.4        Rcpp_1.0.5            spatstat_1.64-1      
##  [52] cellranger_1.1.0      vctrs_0.3.4           nlme_3.1-150         
##  [55] lmtest_0.9-38         xfun_0.18             globals_0.13.1       
##  [58] rvest_0.3.6           mime_0.9              miniUI_0.1.1.1       
##  [61] lifecycle_0.2.0       irlba_2.3.3           goftest_1.2-2        
##  [64] future_1.19.1         MASS_7.3-53           zoo_1.8-8            
##  [67] scales_1.1.1          hms_0.5.3             promises_1.1.1       
##  [70] spatstat.utils_1.17-0 parallel_3.6.3        RColorBrewer_1.1-2   
##  [73] yaml_2.2.1            reticulate_1.18       pbapply_1.4-3        
##  [76] gridExtra_2.3         rpart_4.1-15          stringi_1.5.3        
##  [79] ROGUE_1.0             rlang_0.4.8           pkgconfig_2.0.3      
##  [82] matrixStats_0.57.0    evaluate_0.14         lattice_0.20-41      
##  [85] ROCR_1.0-11           tensor_1.5            labeling_0.4.2       
##  [88] patchwork_1.0.1       htmlwidgets_1.5.2     tidyselect_1.1.0     
##  [91] RcppAnnoy_0.0.16      plyr_1.8.6            magrittr_1.5         
##  [94] R6_2.5.0              generics_0.1.0        DBI_1.1.0            
##  [97] withr_2.3.0           haven_2.3.1           pillar_1.4.6         
## [100] mgcv_1.8-33           fitdistrplus_1.1-1    prettydoc_0.4.0      
## [103] survival_3.2-7        abind_1.4-5           future.apply_1.6.0   
## [106] modelr_0.1.8          crayon_1.3.4          KernSmooth_2.23-17   
## [109] plotly_4.9.2.1        rmarkdown_2.5         readxl_1.3.1         
## [112] grid_3.6.3            data.table_1.13.2     blob_1.2.1           
## [115] reprex_0.3.0          digest_0.6.27         xtable_1.8-4         
## [118] httpuv_1.5.4          openssl_1.4.3         munsell_0.5.0        
## [121] viridisLite_0.3.0     askpass_1.1