stats_across_tumor_Tcellexh


import sys
import os
import re
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

def read_coord(n):
	f = open(n)
	Xcen = []
	cells = []
	for l in f:
		l = l.rstrip("\n")
		ll = l.split(",")
		cells.append(ll[0])
		Xcen.append((float(ll[-2]), float(ll[-1])))
	f.close()
	Xcen2 = np.empty((len(Xcen), 2), dtype="float32")
	for ind,(i,j) in enumerate(Xcen):
		Xcen2[ind, :] = [j,-1.0*i]
	Xcen = Xcen2
	return Xcen, cells

def read_expression(n):
	f = open(n)
	h = f.readline().rstrip("\n").split()
	h = [xh.replace(".", "-") for xh in h]
	num_gene = 0
	for l in f:
		l = l.rstrip("\n")
		num_gene+=1
	f.close()
	mat = np.empty((num_gene, len(h)), dtype="float32")
	f = open(n)
	f.readline()
	ig = 0
	genes = []
	for l in f:
		l = l.rstrip("\n")
		ll = l.split()
		gene = ll[0]
		values = [float(v) for v in ll[1:]]
		mat[ig,:] = values
		genes.append(gene)
		ig+=1
	f.close()
	return mat, h, genes

def read_list(n):
	f = open(n)
	m = []
	for l in f:
		l = l.rstrip("\n")
		m.append(l)
	f.close()
	return m

def do_one_sample(sample):
	#sample = sys.argv[1]
	mat, cells, genes = read_expression("../../dir_%s/Giotto_norm_expr.txt" % sample)
	Xcen, Xcells = read_coord("../../dir_%s/spatial/tissue_positions_list.csv" % sample)

	map_cell = {}
	for ic,c in enumerate(Xcells):
		map_cell[c] = ic

	good_cell_ind = np.array([map_cell[c] for c in cells])
	Xcen = Xcen[good_cell_ind, :]

	gene_lists = []
	'''
	gene_lists.append(("MKI67", "KIFC1", "PECAM1", "PTPRC"))
	gene_lists.append(("CD68", "GZMB", "CTLA4", "HIF1A"))
	gene_lists.append(("ITGAX", "GZMB", "CTLA4","HIF1A"))
	gene_lists.append(("PECAM1", "PTPRC", "ITGAX", "FOXP3", "CDH1"))
	gene_lists.append(("CTLA4", "GZMB", "CD8A", "HIF1A"))
	gene_lists.append(("PLK1", "PDCD1", "MKI67", "KRT8"))
	'''

	'''
	gene_lists.append(tuple(read_list("q1.EA.more.txt")))
	gene_lists.append(tuple(read_list("q2.EA.more.txt")))
	gene_lists.append(tuple(read_list("q3.EA.more.txt")))
	gene_lists.append(tuple(read_list("q4.EA.more.txt")))
	gene_lists.append(tuple(read_list("q5.EA.more.txt")))
	gene_lists.append(tuple(read_list("q6.EA.more.txt")))
	'''
	#gene_lists.append(("CD8A", "CD3D", "TRBC2", "LAG3", "HAVCR2", "CXCL13", "GZMB", "BATF", "CCL3", "CSF1"))
	gene_lists.append(("CD8A", "CD3D", "TRBC2", "LAG3", "HAVCR2", "CXCL13", "GZMB", "BATF", "CCL3", "CSF1", "TIGIT"))
	#gene_lists.append(("PTMS", "CAMK1", "HAVCR2", "CDK1", "CSF1", "GZMB", "PDCD1", "ITGAE", "MKI67", "IL13", "TNFRSF18", "IRF4", "TOX", "CCL3", "CXCR6", "GZMB", "FASLG", "TRBC2", "CD8A"))

	map_gene = {}
	for ig,g in enumerate(genes):
		map_gene[g] = ig

	averages = {}
	for ig,aG in enumerate(gene_lists):
		gene_ids = []
		for gx in aG:
			if not gx in map_gene:
				continue
			gene_ids.append(map_gene[gx])
		gene_ids = np.array(gene_ids)
		avg = np.mean(mat[gene_ids, :], axis=0)
		averages[ig] = avg
		#sc = axn.flat[ct].scatter(Xcen[:,0], Xcen[:,1], s=dot_size, c=avg, edgecolors=None, cmap="Reds", vmin=v1, vmax=v2)

	return gene_lists, averages, Xcen

if __name__=="__main__":
	choice = "ea"	
	#choice = "aa"	
	t_list = read_list("paper.%s.list" % choice)
	#t_list = read_list("curated.%s.list" % choice)
	#t_list = read_list("all.%s.list" % choice)

	list_g, list_avg, list_Xcen = [], [], []
	for gsm in t_list:
		glists, averages, Xcen = do_one_sample(gsm)
		list_g.append(glists)
		list_avg.append(averages)
		list_Xcen.append(Xcen)
	
	'''
	dot_size = 5
	ncol = 5
	nrow = int(10 / ncol)
	size_factor = 3
	if 10%ncol>0:
		nrow+=1
	print(nrow, ncol)
	f, axn = plt.subplots(nrow, ncol, figsize=(ncol * size_factor, nrow * size_factor))
	plt.subplots_adjust(hspace=0.1, wspace=0.1)
	'''
	ct = 0

	query_id = int(sys.argv[1]) - 1
	for ind in range(len(t_list)):
		Xcen = list_Xcen[ind]
		avg = list_avg[ind][query_id]
		'''
		if choice=="ea":
			if ind==0 or ind==6:
				avg = avg + 0.2
		'''
		v1 = 0
		v2 = 1.5
		#sc = axn.flat[ct].scatter(Xcen[:,0], Xcen[:,1], s=dot_size, c=avg, edgecolors=None, cmap="Reds", vmin=v1, vmax=v2)
		#axn.flat[ct].set_facecolor("white")
		#f.colorbar(sc, ax=axn.flat[ct])
		#axn.flat[ct].axis("off")
		#cutoff=1.0
		#cutoff=0.85
		cutoff=0.5
		mx = np.where(avg>cutoff)[0]
		print(ind, choice, mx.shape[0], Xcen.shape[0], mx.shape[0] / Xcen.shape[0])
		ct+=1
	#plt.savefig("all.EA.metagene.q%d.png" % (query_id+1))
	#plt.savefig("all.AA.metagene.q%d.png" % (query_id+1))

		

Running Command

python3 stats_across_tumor_Tcellexh.py 1

Results

0 aa 0 1289 0.0
1 aa 100 1654 0.060459492140266025
2 aa 2 1593 0.0012554927809165098
3 aa 0 1109 0.0
4 aa 0 1949 0.0
5 aa 0 1554 0.0
6 aa 0 1058 0.0
7 aa 0 1127 0.0
8 aa 44 2117 0.020784128483703354
9 aa 76 1178 0.06451612903225806

0 ea 13 3116 0.00417201540436457
1 ea 288 1511 0.19060225016545335
2 ea 0 844 0.0
3 ea 3 772 0.0038860103626943004
4 ea 14 1264 0.011075949367088608
5 ea 3 1325 0.0022641509433962265
6 ea 10 2088 0.004789272030651341
7 ea 14 850 0.01647058823529412
8 ea 0 558 0.0
9 ea 399 1518 0.2628458498023715