visualize.EA.cluster.fig2e
from smfishHmrf.HMRFInstance import HMRFInstance
from smfishHmrf.DatasetMatrix import DatasetMatrix, DatasetMatrixSingleField, DatasetMatrixMultiField
from smfishHmrf.spatial import rank_transform_matrix, calc_silhouette_per_gene
import random
import sys
import math
import numpy as np
import scipy
import scipy.stats
from scipy.stats import zscore
from scipy.spatial.distance import euclidean, squareform, pdist
import smfishHmrf.reader as reader
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.lines import Line2D
from smfishHmrf.bias_correction import calc_bias_moving, do_pca, plot_pca
from scipy.cluster.vq import kmeans2
def norm_centroid(cent):
scale_factor = 2000/4
cent[:,0] = (cent[:,0] - float(1000)) / float(500)
cent[:,1] = (cent[:,1] - float(-1000)) / float(500)
return cent
def read_centroid(n):
f = open(n)
f.readline()
num_cell = 0
for l in f:
l = l.rstrip("\n")
num_cell+=1
f.close()
Xcen = np.empty((num_cell, 2), dtype="float32")
field = np.empty((num_cell), dtype="int32")
f = open(n)
f.readline()
ind = 0
for l in f:
l = l.rstrip("\n")
ll = l.split(",")
Xcen[ind,:] = [float(ll[-2]), float(ll[-1])]
field[ind] = int(ll[0])
ind+=1
f.close()
return Xcen, field
def read_position(n):
f = open(n)
f.readline()
Xcen = {}
order = []
for l in f:
l = l.rstrip("\n")
ll = l.split("\t")
x,y = float(ll[1]), float(ll[2])
t_id = ll[0]
Xcen[t_id] = (x,y)
order.append(t_id)
f.close()
cen = np.empty((len(order), 2), dtype="float32")
for i in range(len(order)):
cen[i,:] = Xcen[order[i]]
field = []
for i in range(len(order)):
t_id = order[i]
t_field = "_".join(t_id.split("_")[:2])
tt = t_field
field.append(tt)
field = np.array(field)
return cen, field
def read_cluster(n, cluster_label):
f = open(cluster_label)
m = []
for l in f:
l = l.rstrip("\n")
m.append(l)
f.close()
map_label = {}
for im,v in enumerate(m):
map_label[im+1] = v
f = open(n)
clust = []
for l in f:
l = l.rstrip("\n")
clust.append(map_label[int(l)])
f.close()
return np.array(clust)
'''
def read_metadata(n):
f = open(n)
h = f.readline().rstrip("\n").split("\t")
meta = {}
for l in f:
l = l.rstrip("\n")
ll = l.split("\t")
t_zip = dict(zip(h, ll[1:]))
t_id = t_zip["cell_ID"]
meta.setdefault("cell_ID",[])
meta["cell_ID"].append(t_id)
for i in ["0", "0.5", "1", "1.5", "2", "2.5", "3", "3.5", "4"]:
for j in ["10", "20", "30"]:
t_key = "hmrf_k.%s_b.%s" % (j, i)
tt = int(t_zip[t_key])
meta.setdefault(t_key, [])
meta[t_key].append(tt)
meta.setdefault("leiden_clus", [])
meta["leiden_clus"].append(int(t_zip["leiden_clus"]))
f.close()
for k in meta:
meta[k] = np.array(meta[k])
return meta
'''
def convert(n, vmin, vmax):
return (n - vmin) / (vmax - vmin + 1)
if __name__=="__main__":
Xcen, field = read_position("Xcen_v0_TNBC_race_reprocessed.txt")
#Xcen, field = read_position("Xcen.txt")
#meta = read_metadata("frequency.kmeans")
clust = read_cluster("frequency.kmeans.k20iter10000.1", "cell.cell.interact/uniq.clusters")
#clust = read_cluster("frequency.leiden")
#max_clust = np.max(clust)
print(clust)
uniq_field = np.unique(field)
print(uniq_field)
print(len(uniq_field))
#sys.exit(0)
dot_size = 50
ncol = 5
nrow = int(10 / ncol)
size_factor = 5
if 10%ncol>0:
nrow+=1
print(nrow, ncol)
f, axn = plt.subplots(nrow, ncol, figsize=(ncol * size_factor, nrow * size_factor))
plt.subplots_adjust(hspace=0.05, wspace=0.05)
ct = 0
#print(uniq_field)
#sys.exit(0)
#AA patients (selective)
# patient18, patient18, patient23 patient23 patient37 patient4 patient4 patient40 patient41
uniq_field = ["Case2_ROI9", "Case6_ROI7", "Case9_ROI4", "Case1_ROI2", "Case9_ROI12", "Case10_ROI14", "Case10_ROI2", "Case7_ROI6", \
"Case8_ROI4", "Case8_ROI10"]
#uniq_field = ["Case2_ROI10", "Case2_ROI9", "Case6_ROI7", "Case6_ROI8", "Case9_ROI4", "Case1_ROI1", "Case1_ROI2", "Case9_ROI12", "Case9_ROI13"]
#AA patients (all)
'''
uniq_field = ["Case1_ROI5", "Case1_ROI6", "Case2_ROI1", "Case2_ROI2", "Case2_ROI3", "Case2_ROI4", "Case2_ROI5", \
"Case2_ROI6", "Case2_ROI7", "Case2_ROI8", "Case2_ROI10", "Case2_ROI9", "Case1_ROI7", "Case1_ROI8", "Case6_ROI1", \
"Case6_ROI2", "Case6_ROI3", "Case6_ROI4", "Case6_ROI7", "Case6_ROI8", "Case1_ROI10", "Case1_ROI9", "Case9_ROI1", \
"Case9_ROI2", "Case9_ROI3", "Case9_ROI4", "Case9_ROI5", "Case9_ROI6", "Case9_ROI8", "Case9_ROI9", "Case1_ROI1", \
"Case1_ROI2", "Case9_ROI11", "Case9_ROI12", "Case9_ROI13", "Case9_ROI14", "Case6_ROI5", "Case6_ROI6", "Case1_ROI11", \
"Case9_ROI15", "Case9_ROI7", "Case9_ROI10", "Case1_ROI12", "Case1_ROI13", "Case1_ROI14", "Case1_ROI3", "Case1_ROI4"]
'''
#EA patients (all)
'''
uniq_field = ["Case10_ROI10", "Case10_ROI11", "Case10_ROI12", "Case10_ROI13", "Case10_ROI14", "Case10_ROI1", \
"Case10_ROI2", "Case10_ROI3", "Case10_ROI4", "Case10_ROI5", "Case10_ROI6", "Case10_ROI7", "Case10_ROI8", "Case10_ROI9", \
"Case4_ROI1", "Case4_ROI2", "Case4_ROI3" "Case4_ROI4", "Case4_ROI5", "Case4_ROI6", "Case7_ROI10", "Case7_ROI11", "Case7_ROI12", \
"Case7_ROI13", "Case7_ROI1", "Case7_ROI2" "Case7_ROI3", "Case7_ROI4", "Case7_ROI5", "Case7_ROI6", "Case7_ROI7", "Case7_ROI8", \
"Case7_ROI9", "Case8_ROI10", "Case8_ROI11" "Case8_ROI12", "Case8_ROI13", "Case8_ROI14", "Case8_ROI15" "Case8_ROI16", "Case8_ROI17",\
"Case8_ROI18", "Case8_ROI1", "Case8_ROI2" "Case8_ROI3", "Case8_ROI4", "Case8_ROI5", "Case8_ROI6", "Case8_ROI7", "Case8_ROI8", "Case8_ROI9"]
'''
print(len(uniq_field))
#uniq_field = ["Case10_ROI8", "Case10_ROI9", "Case7_ROI12", "Case7_ROI13", "Case10_ROI10", "Case10_ROI11", "Case7_ROI10", "Case8_ROI6", "Case10_ROI13", "Case10_ROI14"]
'''
uniq_field = ['Case1_Bone_ROI1', 'Case1_Bone_ROI2', 'Case1_Bone_ROI3', 'Case1_Bone_ROI4',
'Case1_Bone_ROI5', 'Case1_Breast_ROI1', 'Case1_Breast_ROI2',
'Case1_Breast_ROI3', 'Case1_Breast_ROI4', 'Case1_Breast_ROI5',
'Case2_Bone_ROI1', 'Case2_Bone_ROI2', 'Case2_Bone_ROI3', 'Case2_Bone_ROI4',
'Case2_Bone_ROI5', 'Case2_Breast_ROI1', 'Case2_Breast_ROI2']
'''
c1 = "18 Ki67 KIFC1"
c2 = "20 CD31 CD45RA"
c3 = "10 CD68"
c4 = "9 GranzymeB CD152 HIF1a"
c5 = "3 CD11c pHH3"
c6 = "5 CD11c FOXP3 pHH3 ECadherin"
c7 = "7 CD152 CD8a"
for fd in uniq_field:
#m = np.where(field==fd)[0]
#m = np.where((field==fd) & ((clust==1) | (clust==8) | (clust==7) | (clust==2) | (clust==5) | (clust==13) | (clust==14) | (clust==6) | (clust==15)))[0]
#m = np.where((field==fd) & ((clust==13) | (clust==14) | (clust==15)))[0]
#m = np.where((field==fd) & (clust==c1))[0]
m2 = np.where((field==fd) & (clust==c2))[0]
m3 = np.where((field==fd) & (clust==c3))[0]
m4 = np.where((field==fd) & (clust==c4))[0]
m5 = np.where((field==fd) & (clust==c5))[0]
#m6 = np.where((field==fd) & (clust==c6))[0]
m7 = np.where((field==fd) & (clust==c7))[0]
#m8 = np.where((field==fd) & ((clust!=c1) & (clust!=c2) & (clust!=c3) & (clust!=c4) & (clust!=c5) & (clust!=c6) & (clust!=7)))[0]
m8 = np.where((field==fd) & ((clust!=c2) & (clust!=c3) & (clust!=c4) & (clust!=c5) & (clust!=7)))[0]
#cl = meta["leiden_clus"]
#cl = meta["hmrf_k.20_b.1"]
tot_size = m2.shape[0] + m3.shape[0] + m4.shape[0] + m5.shape[0] + m7.shape[0]
tot_Xcen = np.empty((tot_size, 2), dtype="float32")
tot_Xcen[0:m2.shape[0],:] = Xcen[m2,:]
xt = m2.shape[0]
tot_Xcen[xt:xt+m3.shape[0],:] = Xcen[m3,:]
xt += m3.shape[0]
tot_Xcen[xt:xt+m4.shape[0],:] = Xcen[m4,:]
xt += m4.shape[0]
tot_Xcen[xt:xt+m5.shape[0],:] = Xcen[m5,:]
xt += m5.shape[0]
tot_Xcen[xt:xt+m7.shape[0],:] = Xcen[m7,:]
tot_color = []
tot_color.extend(["red" for i in range(m2.shape[0])])
tot_color.extend(["blue" for i in range(m3.shape[0])])
tot_color.extend(["orange" for i in range(m4.shape[0])])
tot_color.extend(["green" for i in range(m5.shape[0])])
tot_color.extend(["magenta" for i in range(m7.shape[0])])
tot_color = np.array(tot_color)
ind = [i for i in range(tot_size)]
random.shuffle(ind)
#colors = np.empty(m.shape, dtype="int32")
#for c in range(colors.shape[0]):
# colors[c] = 1
axn.flat[ct].scatter(Xcen[m8,0], Xcen[m8,1], s=dot_size*0.3, c="lightgray", edgecolors=None) #other
'''
#axn.flat[ct].scatter(Xcen[m,0], Xcen[m,1], s=dot_size, c="black", edgecolors="dimgray", cmap="RdYlBu", vmin=1, vmax=1) #ki67 kifc1
axn.flat[ct].scatter(Xcen[m2,0], Xcen[m2,1], s=dot_size, c="red", edgecolors="dimgray", cmap="RdYlBu", vmin=1, vmax=1) #cd31 cd45ra
axn.flat[ct].scatter(Xcen[m3,0], Xcen[m3,1], s=dot_size, c="blue", edgecolors="dimgray", cmap="RdYlBu", vmin=1, vmax=1) #cd68
axn.flat[ct].scatter(Xcen[m4,0], Xcen[m4,1], s=dot_size, c="orange", edgecolors="dimgray", cmap="RdYlBu", vmin=1, vmax=1) #gzmb cd152 hif1a
axn.flat[ct].scatter(Xcen[m5,0], Xcen[m5,1], s=dot_size, c="green", edgecolors="dimgray", cmap="RdYlBu", vmin=1, vmax=1) #cd11c phh3
#axn.flat[ct].scatter(Xcen[m6,0], Xcen[m6,1], s=dot_size, c="pink", edgecolors="dimgray", cmap="RdYlBu", vmin=1, vmax=1) #cd11c foxp3 ecad
axn.flat[ct].scatter(Xcen[m7,0], Xcen[m7,1], s=dot_size, c="magenta", edgecolors="dimgray", cmap="RdYlBu", vmin=1, vmax=1) #cd152 cd8a
'''
axn.flat[ct].scatter(tot_Xcen[ind,0], tot_Xcen[ind,1], s=dot_size, c=tot_color[ind], edgecolors="dimgray")
axn.flat[ct].title.set_text(fd)
#axn.flat[ct].title.set_visible(False)
axn.flat[ct].set_facecolor("white")
axn.flat[ct].axis("off")
ct+=1
#f.savefig("visual.clusters.EAclusters.AApatient.pdf",bbox_inches='tight')
f.savefig("visual.clusters.EAclusters.fig2e.pdf",bbox_inches='tight')
#plt.show()
sys.exit(0)
Running Command
python3 visualize.EA.cluster.fig2e.py
Results