import numpy as np
import matplotlib.pyplot as plt
from pandas import *
import pandas as pd 
from patients import *
from sklearn.mixture import GaussianMixture

class ROI:
	def __init__(self):
		# ROIs in sets, to indicate the column names that are in these lobes
		self.temporalROIs_LH = set()
		self.temporalROIs_RH = set()
		self.MTLROIs_LH = set()
		self.MTLROIs_RH = set()
		self.parietalROIs_LH = set()
		self.parietalROIs_RH = set()
		self.occipitalROIs_LH = set()
		self.occipitalROIs_RH = set()
		self.frontalROIs_LH = set()
		self.frontalROIs_RH = set()

		# Arrays used to store the cells for each lobe (Global)
		self.temporal_LH = []
		self.temporal_RH = []
		self.MTL_LH = []
		self.MTL_RH = []
		self.parietal_LH = []
		self.parietal_RH = []
		self.occipital_LH = []
		self.occipital_RH = []
		self.frontal_LH = []
		self.frontal_RH = []

	def removeNan(self, arr):
		return arr[~ np.isnan(arr)]

	#Patient 1153 has NaNs, we omit them from this work
	def ridNan(self, arr):
		arr.pop(1153)
		res = self.removeNan(np.array(arr)).reshape(-1, 1)
		return res

	# Debugging purposes
	def printGlobalDistribution(self): 
		print("temporal_LH ", self.temporal_LH) 
		print("temporal_RH", self.temporal_RH) 
		print("MTL_LH ", self.MTL_LH)
		print("MTL_RH ", self.MTL_RH)
		print("parietal_LH", self.parietal_LH) 
		print("parietal_RH", self.parietal_RH) 
		print("occipital_LH", self.occipital_LH) 
		print("occipital_RH", self.occipital_RH) 
		print("frontal_LH", self.frontal_LH)
		print("frontal_RH", self.frontal_RH) 
		return

	def printROIList(self):
		print("temporalROIs_LH ", self.temporalROIs_LH) 
		print("temporalROIs_RH", self.temporalROIs_RH) 
		print("MTL_ROIs_LH", self.MTLROIs_LH)
		print("MTL_ROIs_RH", self.MTLROIs_RH)
		print("parietalROIs_LH", self.parietalROIs_LH) 
		print("parietalROIs_RH", self.parietalROIs_RH) 
		print("occipitalROIs_LH", self.occipitalROIs_LH) 
		print("occipitalROIs_RH", self.occipitalROIs_RH) 
		print("frontalROIs_LH", self.frontalROIs_LH)
		print("frontalROIs_RH", self.frontalROIs_RH) 
		return

	def visualization(arr):
		plt.hist(np.array(arr))
		plt.show()

	def processPatientLocal(self, row):

		temporal_LH_local = []
		temporal_RH_local = []
		MTL_LH_local = []
		MTL_RH_local = []
		parietal_LH_local = []
		parietal_RH_local = []
		occipital_LH_local = []
		occipital_RH_local = []
		frontal_LH_local = []
		frontal_RH_local = []

		def localToGlobal(local_arr, global_arr):
			if local_arr:
				global_arr.append(np.mean(local_arr))
			return

		def addFeatureLocal(name, value):
			name = str(name)
			if name in self.temporalROIs_LH:
				temporal_LH_local.append(value)
			elif name in self.temporalROIs_RH:
				temporal_RH_local.append(value)
			elif name in self.MTLROIs_LH:
				MTL_LH_local.append(value)
			elif name in self.MTLROIs_RH: 
				MTL_RH_local.append(value)
			elif name in self.parietalROIs_LH:
				parietal_LH_local.append(value)
			elif name in self.parietalROIs_RH:
				parietal_RH_local.append(value)
			elif name in self.occipitalROIs_LH:
				occipital_LH_local.append(value)
			elif name in self.occipitalROIs_RH:
				occipital_RH_local.append(value)
			elif name in self.frontalROIs_LH:
				frontal_LH_local.append(value)
			elif name in self.frontalROIs_RH:
				frontal_RH_local.append(value)
			return

		for key in row.keys():
			curVal = row[key]
			addFeatureLocal(key,curVal)

		localToGlobal(temporal_LH_local, self.temporal_LH)
		localToGlobal(temporal_RH_local, self.temporal_RH)
		localToGlobal(MTL_LH_local, self.MTL_LH)
		localToGlobal(MTL_RH_local, self.MTL_RH)
		localToGlobal(parietal_LH_local, self.parietal_LH)
		localToGlobal(parietal_RH_local, self.parietal_RH)
		localToGlobal(occipital_LH_local, self.occipital_LH)
		localToGlobal(occipital_RH_local, self.occipital_RH)
		localToGlobal(frontal_LH_local, self.frontal_LH)
		localToGlobal(frontal_RH_local, self.frontal_RH)
		return

	# read-in patients file and process it 
	def readPatientsFile(self, file):
		patients = read_csv(file)
		for index, row in patients.iterrows():
			self.processPatientLocal(row)
		return patients["RID"].to_list()

	# read in the reference
	def readReference(self, file):
		# open the file in read mode
		data = read_csv(file)
		for key in data.keys():
			curVal = data[key][0]
			if curVal == "Frontal":
				self.frontalROIs_LH.add(key)
				self.frontalROIs_RH.add(key)
			elif curVal == "LH_Frontal":
				self.frontalROIs_LH.add(key)
			elif curVal == "RH_Frontal":
				self.frontalROIs_RH.add(key)
			elif curVal == "Parietal":
				self.parietalROIs_LH.add(key)
				self.parietalROIs_RH.add(key)
			elif curVal == "LH_Parietal":
				self.parietalROIs_LH.add(key)
			elif curVal == "RH_Parietal":
				self.parietalROIs_RH.add(key)
			elif curVal == "Occipital":
				self.occipitalROIs_LH.add(key)
				self.occipitalROIs_RH.add(key)
			elif curVal == "LH_Occipital":
				self.occipitalROIs_LH.add(key)
			elif curVal == "RH_Occipital":
				self.occipitalROIs_RH.add(key)
			elif curVal == "Temporal":
				self.temporalROIs_LH.add(key)
				self.temporalROIs_RH.add(key)
			elif curVal == "LH_Temporal":
				self.temporalROIs_LH.add(key)
			elif curVal == "RH_Temporal":
				self.temporalROIs_RH.add(key)
			elif curVal == "MTL": 
				self.MTLROIs_LH.add(key)
				self.MTLROIs_RH.add(key)
			elif curVal == "LH_MTL":
				self.MTLROIs_LH.add(key)
			elif curVal == "RH_MTL":
				self.MTLROIs_RH.add(key)
		return

	# compute the mean and standard deviation for current lobe for population
	def gaussianMix(self, arr):
		gm = GaussianMixture(n_components=2, random_state=0).fit(arr)
		temp = gm.means_
		cov = gm.covariances_
		if temp[0] <= temp[1]:
			idx = 0
		else:
			idx = 1
		return temp[idx][0], np.sqrt(cov[idx][idx])[0]

	# Compute the z_score for each person
	def z_score(self, arrGlobal, gaussMean, gaussSTD):
		return (arrGlobal - gaussMean) / gaussSTD

	def lobeCompute(self, arrGlobal, indices):
		temp = self.ridNan(arrGlobal)
		gaussM, gaussSTD = self.gaussianMix(temp)
		z_scores = self.z_score(temp, gaussM, gaussSTD)
		z_scores = np.delete(z_scores, indices)
		return z_scores 


def getIndices(l1, l2): 
	indices = []
	rids = []
	for i in range(len(l1)): 
		if l1[i] not in l2: 
			indices.append(i)
		else: 
			rids.append(l1[i])
	return indices, rids

def main():
	cur_ROI = ROI()
	cur_ROI.readReference("ROIs.csv")
	### TODO: Need to change this to local directory before proceeding
	patientList = cur_ROI.readPatientsFile("UCBERKELEYAV1451_11_16_21.csv")
	#Removing entry 1153
	patientList.pop(1153)
	patientsToKeep = sharedpatient()
	#List of indices that we remove as they are patients not shared between SNP and image data
	indices, rids = getIndices(patientList, patientsToKeep)
	
	#Ordering is Temporal LH/RH, Parietal LH/RH, Occipital LH/RH, Frontal LH/RH, MTL LH/RH
	temp1 = cur_ROI.lobeCompute(cur_ROI.temporal_LH, indices)
	#temp1 = np.resize(temp1, (len(temp1), 1))
	temp2 = cur_ROI.lobeCompute(cur_ROI.temporal_RH, indices)
	#temp2 = np.resize(temp2, (len(temp2), 1))
	temp3 = cur_ROI.lobeCompute(cur_ROI.parietal_LH, indices)
	temp4 = cur_ROI.lobeCompute(cur_ROI.parietal_RH, indices)
	temp5 = cur_ROI.lobeCompute(cur_ROI.occipital_LH, indices)
	temp6 = cur_ROI.lobeCompute(cur_ROI.occipital_RH, indices)
	temp7 = cur_ROI.lobeCompute(cur_ROI.frontal_LH, indices)
	temp8 = cur_ROI.lobeCompute(cur_ROI.frontal_RH, indices)
	temp9 = cur_ROI.lobeCompute(cur_ROI.MTL_LH, indices)
	temp10 = cur_ROI.lobeCompute(cur_ROI.MTL_RH, indices)
	rois_zscores = np.column_stack((temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10))
	df = pd.DataFrame(rois_zscores)
	df.columns = ["Temporal LH", "Temporal RH", "Parietal LH", "Parietal RH", "Occipital LH", "Occipital RH", "Frontal LH", "Frontal RH", "MTL LH", "MTL RH"]
	df.index = rids
	#print(np.array([temp1, temp2, temp3,temp4,temp5,temp6,temp7,temp8, temp9, temp10]))
	#rois_zscores = np.array([temp1, temp2, temp3,temp4,temp5,temp6,temp7,temp8, temp9, temp10])
	df.to_csv("rois_zscores.csv")
	return 

if __name__ == "__main__":
	main()


