Single cell clustering (IMC)
Given: - Cell-by-protein expression matrix. Each row is a single cell. Each column is a protein marker. Values are z-scores of log-normalized protein intensities. - single cell coordinates (X and Y). Input files can be obtained at https://github.com/bernard2012/TNBC.disparity. The file names are "Refined.matrix_v1_TNBC_Clinical_updated_v0.txt" and "Xcen_v0_TNBC_race_reprocessed.txt".
Step 1
K-means clustering in R using Giotto package:library(Matrix)
library(Giotto)
cytof<-data.table::fread(file="Refined.matrix_v1_TNBC_Clinical_updated_v0.txt")
pos<-data.table::fread(file="Xcen_v0_TNBC_race_reprocessed.txt")
expr=as.matrix(cytof[,-1])
rownames(expr) = cytof$cell
cytof_test=createGiottoObject(raw_exprs=t(expr), spatial_locs=pos[,.(X,Y)])
cytof_test@norm_expr=Matrix::as.matrix(t(expr))
cytof_test = doKmeans(gobject = cytof_test, expression_values = 'norm', centers = 20, nstart = 10000)
plotMetaDataHeatmap(cytof_test, expression_values="norm", metadata_cols=c("kmeans"), show_values="original")
km=pDataDT(cytof_test)
write.table(file="frequency.kmeans.k20iter10000", km, quot=F, sep="\t", row.names=F)
Results saved to a file named frequency.kmeans.k20iter10000.
cell_ID kmeans
Case1_ROI1_1 18
Case1_ROI1_2 6
Case1_ROI1_3 9
Case1_ROI1_4 6
Case1_ROI1_5 5
Case1_ROI1_6 6
Case1_ROI1_7 9
Case1_ROI1_8 6
Case1_ROI1_9 5
Case1_ROI1_10 5
Case1_ROI1_11 15
Case1_ROI1_12 12
Case1_ROI1_13 12
Case1_ROI1_14 20
Case1_ROI1_15 9
Case1_ROI1_16 12
Case1_ROI1_17 6
Case1_ROI1_18 18
Case1_ROI1_19 18
Case1_ROI1_20 5
...
Step 2: tSNE
kmeansk20<-read.table("frequency.kmeans.k20iter10000", head=T)
colnames(kmeansk20) <- c("cell_ID", "k20.10000")
cytof_test=addCellMetadata(cytof_test, new_metadata=kmeansk20, by_column=T, column_cell_ID="cell_ID")
cytof_test <- runPCA(gobject = cytof_test, scale_unit = F, center=T, method="factominer")
cytof_test <- runtSNE(cytof_test, dim_reduction_to_use=NULL)
plotTSNE(cytof_test, cell_color="k20.10000", show_center_label=F)
save(cytof_test, file="cytof_test.RData")
Step 3: Annotate cluster
load("cytof_test.RData")
kmeansk20<-read.table("frequency.kmeans.k20iter10000", head=T)
colnames(kmeansk20) <- c("cell_ID", "k20.10000")
cytof_test=addCellMetadata(cytof_test, new_metadata=kmeansk20, by_column=T, column_cell_ID="cell_ID")
clusters_cytof=c("1 CD16 CD163 PDL1", "2 PanCK", "3 CD11c pHH3", "4 Vimentin AR PDL1 PLK1", "5 CD11c FOXP3 pHH3 ECadherin", "6 HIF1a", "7 CD152 CD8a", "8 PLK1 PanCK Ki67", "9 GranzymeB CD152 HIF1a", "10 CD68", "11 CD3 CD45 CD4 CD8a CD45RO", "12 CD16 CD163 CD68", "13 CD31 Vimentin AR", "14 CD45 CD45RA CD45RO", "15 CD4 CD3", "16 PanCK VEGF ECadherin", "17 KIFC1 GranzymeB Ki67", "18 Ki67 KIFC1", "19 PLK1 PD1", "20 CD31 CD45RA")
names(clusters_cytof)=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
cytof_test=annotateGiotto(cytof_test, annotation_vector=clusters_cytof, cluster_column="k20.10000", name="k20.10000_annot")
library(stringr)
cell_metadata<-pDataDT(cytof_test)
tt=sapply(cell_metadata$cell_ID, function(x) str_locate_all(pattern="_", x)[[1]][2,][1])
sample=substr(cell_metadata$cell_ID, 1, tt-1)
state<-cbind(sample, cell_metadata$cell_ID)
colnames(state)=c("sample", "cell_ID")
cytof_test=addCellMetadata(cytof_test, new_metadata=state, by_column=T, column_cell_ID="cell_ID")
Step 4: Cell-cell interaction
Spatial cell-cell interaction analysis per ROI. Be sure to have the `subset.giotto.R` file. The content looks like the following:do_one_subset <- function(samplex){
subset1=subsetGiotto(cytof_test, cell_ids=cell_metadata[sample==samplex][["cell_ID"]])
subset1=createSpatialNetwork(subset1, name="delauney_network")
#kmeans_annot
cell_proximities=cellProximityEnrichment(subset1, cluster_column="k20.10000_annot", spatial_network_name="delauney_network", number_of_simulations=1000, set_seed=FALSE)
write.table(file=paste0("mar23/prox.", samplex, ".txt"), cell_proximities$enrichm_res, quot=F, sep="\t")
}
This will perform the function `do_one_subset()` on one ROI. The input argument is the ROI ID.
Once the file is in place, load the script in R:
source("subset.giotto.R")
Then repeat over all ROIs:
do_one_subset("Case1_ROI1_1")
do_one_subset("Case1_ROI1_2")
do_one_subset("Case1_ROI1_3")
do_one_subset("Case1_ROI1_4")
do_one_subset("Case1_ROI1_5")
...
The output is stored in "mar23/prox.*".
Final results
Each prox.* file in directory mar23, will look like the following:unified_int type_int original simulations enrichm p_higher_orig p_lower_orig p.adj_higher p.adj_lower PI_value int_ranking
1 20 CD31 CD45RA--20 CD31 CD45RA homo 141 20.082 2.75180728604768 0 1 0 1 8.25542185814303 1
2 13 CD31 Vimentin AR--13 CD31 Vimentin AR homo 82 11.627 2.7165994209468 0 1 0 1 8.1497982628404 2
From this we can sort the results based on the enrichment and the PI_value columns.