Load packages
Step 1 Load data from 10X folder
# Load in the RNA UMI matrix
cbmc.rna <- as.sparse(x = read.csv(file = "../Data/citeseq/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz",
sep = ",", header = TRUE, row.names = 1))
cbmc.rna <- CollapseSpeciesExpressionMatrix(object = cbmc.rna)
# Load in the ADT UMI matrix
cbmc.adt <- as.sparse(x = read.csv(file = "../Data/citeseq/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz",
sep = ",", header = TRUE, row.names = 1))
cbmc.adt <- cbmc.adt[setdiff(x = rownames(x = cbmc.adt), y = c("CCR5", "CCR7", "CD10")),
]
Step 2 Create object
Step 3 Pre-process
1) Filter out unwanted cells (optional)
for this dataset, we don’t need to filter out unwanted cells
2) Remove unwanted genes (optional)
for this dataset, we don’t need to filter out unwanted genes
3) Normalization
data Normalization for both ADT (CLR) and RNA (log)
4) Indentify HVGs for RNA data
Call seurat function to identify highly variable genes (HVG) for RNA data
5) Data scaling
Scale data for both ADT and RNA
Step 4 Linear dimension reduction (PCA)
directly call Seurat function for linear dimension reduction (PCA)
Step 5 Determine number of PCs
call Seurat function JackStraw to determine number of PCs
# cbmc <- JackStraw(cbmc, num.replicate = 100) cbmc <- ScoreJackStraw(cbmc, dims
# = 1:20) JackStrawPlot(cbmc, dims = 1:15)
ElbowPlot(cbmc)
Step 6 Distance calculation and joint distance calculation
calculate cell-cell distances for RNA, ADT and joint. alpha was set to 0.5 as initial, number of PC was set to 20 by default.
Step 7 Non-linear dimension reduction (UMAP and t-SNE)
run UMAP as Non-linear dimension reduction for RNA, ADT and joint analysis.
Step 8 Clustering
t1 <- Sys.time()
cbmc <- clusteringFromDistance(object = cbmc, assay = "All", resolution = c(0.9,
0.9, 0.9))
t2 <- Sys.time()
t2 - t1
Step 9 Visualization ADT vs RNA vs Joint
1) Cell clusters
# gridDimPlot(cbmc, wide.rel = 1.5, legend = FALSE, reduction.prefix = 'tsne_',
# height.rel = 0.5)
plots <- generateGridDimPlot(cbmc, legend = FALSE, darkTheme = FALSE)
listPlot(object = plots, labels = "")
###### user also can only plot some of those plots by index, figure ident or figure
###### map info
listPlot(object = plots, fig.ident = "RNA", ncol = 2)
###### user can use plotInfo() function to get index, figure ident and figure map
###### information, then plot figures by index
plotInfo(plots)
listPlot(object = plots, fig.id = 1)
2) Heat maps
# Heatmap for joint clusters
heatMapPlot(object = cbmc, group.by = "jointClusterID", height.rel = 1, adt.label = TRUE)
# Heatmap for RNA clusters
heatMapPlot(object = cbmc, group.by = "rnaClusterID", height.rel = 1, adt.label = TRUE)
# Heatmap for ADT clusters
heatMapPlot(object = cbmc, group.by = "adtClusterID", height.rel = 1, adt.label = TRUE)
VlnPlot(cbmc, features = c("rna_CD8A", "adt_CD8", "rna_NCAM1", "adt_CD56", "rna_CD3G",
"adt_CD3"), group.by = "jointClusterID", pt.size = 0, ncol = 2)
ROGUE score + ADT score
joint.score.adt <- scoreADT(cbmc, group.by = "jointClusterID", k = "auto", rank = 2)
rna.score.rna <- scoreRNA(cbmc, group.by = "rnaClusterID", platform = "UMI")
rna.score.adt <- scoreADT(cbmc, group.by = "rnaClusterID", k = "auto", rank = 2)
adt.score.rna <- scoreRNA(cbmc, group.by = "adtClusterID", platform = "UMI")
adt.data <- data.frame(score = c(as.numeric(adt.score.rna), as.numeric(adt.score.adt)),
label = c(rep("RNA score", 27), rep("ADT score", 27)), cluster = c(colnames(adt.score.rna),
colnames(adt.score.rna)))
rna.data <- data.frame(score = c(as.numeric(rna.score.rna), as.numeric(rna.score.adt)),
label = c(rep("RNA score", 20), rep("ADT score", 20)), cluster = c(colnames(rna.score.rna),
colnames(rna.score.rna)))
joint.data <- data.frame(score = c(as.numeric(joint.score.rna), as.numeric(joint.score.adt)),
label = c(rep("RNA score", 23), rep("ADT score", 23)), cluster = c(colnames(joint.score.rna),
colnames(joint.score.rna)))
adt.data$cluster <- factor(adt.data$cluster, levels = c(0:26))
rna.data$cluster <- factor(rna.data$cluster, levels = c(0:19))
joint.data$cluster <- factor(joint.data$cluster, levels = c(0:22))
p.adt <- ggplot(adt.data, aes(x = cluster, y = score, fill = label)) + geom_col(position = "dodge",
color = "black") + LightTheme() + ylab("") + xlab("") + ylim(0, 1) + scale_fill_brewer(palette = "Accent")
p.rna <- ggplot(rna.data, aes(x = cluster, y = score, fill = label)) + geom_col(position = "dodge",
color = "black") + LightTheme() + ylab("") + xlab("") + ylim(0, 1) + scale_fill_brewer(palette = "Accent")
p.joint <- ggplot(joint.data, aes(x = cluster, y = score, fill = label)) + geom_col(position = "dodge",
color = "black") + LightTheme() + ylab("") + xlab("") + ylim(0, 1) + scale_fill_brewer(palette = "Accent")
plot_grid(p.joint, p.rna, p.adt, ncol = 1, labels = c("Joint cluster", "RNA cluster",
"ADT cluster"), label_x = 0.3)
# export::graph2pdf(x = p.adt, file = './ADT_bar.pdf',width = 10, height = 4)
# export::graph2pdf(x = p.rna, file = './RNA_bar.pdf',width = 10, height = 4)
# export::graph2pdf(x = p.joint, file = './Joint_bar.pdf',width = 10, height = 4)
score <- c(as.numeric(joint.score.rna), as.numeric(joint.score.adt), as.numeric(rna.score.rna),
as.numeric(rna.score.adt), as.numeric(adt.score.rna), as.numeric(adt.score.adt))
name <- c(rep("RNA score", 23), rep("ADT score", 23), rep("RNA score", 20), rep("ADT score",
20), rep("RNA score", 27), rep("ADT score", 27))
Clustering <- c(rep("LinQ-View", 46), rep("RNA", 40), rep("ADT", 54))
data <- data.frame(score = score, name = name, Clustering = Clustering)
p <- ggplot(data, aes(x = name, y = score, fill = Clustering)) + geom_boxplot(outlier.alpha = 0) +
geom_dotplot(binaxis = "y", stackdir = "center", position = position_dodge(0.75),
binwidth = 0.01) + LightTheme() + theme(legend.position = "top") + ylab("Purity Score") +
xlab("")
p
R session info
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.0 stringr_1.4.0 dplyr_1.0.2 purrr_0.3.4
## [5] readr_1.4.0 tidyr_1.1.2 tibble_3.0.4 ggplot2_3.3.2
## [9] tidyverse_1.3.0 cowplot_1.1.0 Seurat_3.9.9.9002 LinQView_0.99.0
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_1.4-1 deldir_0.1-29
## [4] ellipsis_0.3.1 ggridges_0.5.2 fs_1.5.0
## [7] rstudioapi_0.12 spatstat.data_1.4-3 farver_2.0.3
## [10] leiden_0.3.3 listenv_0.8.0 ggrepel_0.8.2
## [13] fansi_0.4.1 lubridate_1.7.9 RSpectra_0.16-0
## [16] xml2_1.3.2 codetools_0.2-16 splines_3.6.3
## [19] knitr_1.30 polyclip_1.10-0 jsonlite_1.7.1
## [22] umap_0.2.6.0 broom_0.7.2 ica_1.0-2
## [25] cluster_2.1.0 dbplyr_1.4.4 png_0.1-7
## [28] uwot_0.1.8 shiny_1.5.0 sctransform_0.3.1
## [31] compiler_3.6.3 httr_1.4.2 backports_1.2.0
## [34] assertthat_0.2.1 Matrix_1.2-18 fastmap_1.0.1
## [37] lazyeval_0.2.2 limma_3.42.2 cli_2.1.0
## [40] later_1.1.0.1 formatR_1.7 htmltools_0.5.0
## [43] tools_3.6.3 rsvd_1.0.3 igraph_1.2.6
## [46] gtable_0.3.0 glue_1.4.2 RANN_2.6.1
## [49] reshape2_1.4.4 Rcpp_1.0.5 spatstat_1.64-1
## [52] cellranger_1.1.0 vctrs_0.3.4 nlme_3.1-150
## [55] lmtest_0.9-38 xfun_0.18 globals_0.13.1
## [58] rvest_0.3.6 mime_0.9 miniUI_0.1.1.1
## [61] lifecycle_0.2.0 irlba_2.3.3 goftest_1.2-2
## [64] future_1.19.1 MASS_7.3-53 zoo_1.8-8
## [67] scales_1.1.1 hms_0.5.3 promises_1.1.1
## [70] spatstat.utils_1.17-0 parallel_3.6.3 RColorBrewer_1.1-2
## [73] yaml_2.2.1 reticulate_1.18 pbapply_1.4-3
## [76] gridExtra_2.3 rpart_4.1-15 stringi_1.5.3
## [79] ROGUE_1.0 rlang_0.4.8 pkgconfig_2.0.3
## [82] matrixStats_0.57.0 evaluate_0.14 lattice_0.20-41
## [85] ROCR_1.0-11 tensor_1.5 labeling_0.4.2
## [88] patchwork_1.0.1 htmlwidgets_1.5.2 tidyselect_1.1.0
## [91] RcppAnnoy_0.0.16 plyr_1.8.6 magrittr_1.5
## [94] R6_2.5.0 generics_0.1.0 DBI_1.1.0
## [97] withr_2.3.0 haven_2.3.1 pillar_1.4.6
## [100] mgcv_1.8-33 fitdistrplus_1.1-1 prettydoc_0.4.0
## [103] survival_3.2-7 abind_1.4-5 future.apply_1.6.0
## [106] modelr_0.1.8 crayon_1.3.4 KernSmooth_2.23-17
## [109] plotly_4.9.2.1 rmarkdown_2.5 readxl_1.3.1
## [112] grid_3.6.3 data.table_1.13.2 blob_1.2.1
## [115] reprex_0.3.0 digest_0.6.27 xtable_1.8-4
## [118] httpuv_1.5.4 openssl_1.4.3 munsell_0.5.0
## [121] viridisLite_0.3.0 askpass_1.1