import os
import csv
import matplotlib.pyplot as plt
import numpy as np
from sklearn.manifold import TSNE


problem_listA=[]
for i in range(500):
    problem_listA.append(i+11)

problem_listB=[]
for i in range(500):
    problem_listB.append(i+11+510)


pathA = "../1.subject_measurement_data/Group_A_human_data_main"
pathB = "../1.subject_measurement_data/Group_B_human_data_main"

file_listA = os.listdir(pathA)
file_listB = os.listdir(pathB)


def output_vector(file_list_, problem_list, path):
    whole_vector_set = []
    for file_name in file_list_:


        file_name = path + '/' + file_name

        f = open(file_name, 'r')
        rdr = csv.reader(f)

        rdr2 = []
        for line in rdr:
            rdr2.append(line)

        subject_vector = []
        for aa in problem_list:
            for ii in rdr2:
                if ii[1] == str(aa):
                    imsi1_p = ii[3]
                    imsi2_p = ii[4]
                    imsi3_p = ii[5]
                    imsi_p = [int(imsi1_p), int(imsi2_p), int(imsi3_p)]

                    imsi1_a = ii[15]
                    imsi2_a = ii[16]
                    imsi3_a = ii[17]
                    imsi_a = [float(imsi1_a), float(imsi2_a), float(imsi3_a)]

                    new_a = []

                    max_index = imsi_p.index(max(imsi_p))
                    new_a.append(imsi_a[max_index])
                    del imsi_p[max_index]
                    del imsi_a[max_index]

                    max_index = imsi_p.index(max(imsi_p))
                    new_a.append(imsi_a[max_index])
                    del imsi_p[max_index]
                    del imsi_a[max_index]

                    max_index = imsi_p.index(max(imsi_p))
                    new_a.append(imsi_a[max_index])
                    del imsi_p[max_index]
                    del imsi_a[max_index]

                    if new_a[0] >= new_a[2]:
                        subject_vector.append(1)
                    else:
                        subject_vector.append(-1)

                    if new_a[1] >= new_a[2]:
                        subject_vector.append(1)
                    else:
                        subject_vector.append(-1)

                    if new_a[0] >= new_a[1]:
                        subject_vector.append(1)
                    else:
                        subject_vector.append(-1)


        whole_vector_set.append(subject_vector)
    return np.array(whole_vector_set)

whole_vector_setA = output_vector(file_listA, problem_listA, pathA)
whole_vector_setB = output_vector(file_listB, problem_listB, pathB)


mean_centered_dataA = whole_vector_setA - np.mean(whole_vector_setA, axis=0)
mean_centered_dataB = whole_vector_setB - np.mean(whole_vector_setB, axis=0)



########################

tsne = TSNE(n_components=2, random_state=0)
tsne2 = TSNE(n_components=2, random_state=0)
projected_dataA = tsne.fit_transform(mean_centered_dataA)
projected_dataB = tsne2.fit_transform(mean_centered_dataB)



plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.subplots_adjust(left=0.07, right=0.96)
plt.scatter(projected_dataA[:, 0], projected_dataA[:, 1], )
for i in range(len(projected_dataA)):

    plt.annotate(str(i+1),
                xy=(projected_dataA[i, 0], projected_dataA[i, 1]), xycoords='data',
                xytext=(5, -5), textcoords='offset points', color='gray')


plt.title('2D Projection of SPV using t-SNE (Group A)')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.grid(True)
plt.text(-4, -1, '(a)', fontsize=12, ha='center')

print("projected B", len(projected_dataB))
plt.subplot(1, 2, 2) # tight_layout=True
plt.scatter(projected_dataB[:, 0], projected_dataB[:, 1], )
for i in range(len(projected_dataB)):

    plt.annotate(str(i+1+62),
                xy=(projected_dataB[i, 0], projected_dataB[i, 1]), xycoords='data',
                xytext=(5, -5), textcoords='offset points', color='gray')


plt.title('2D Projection of SPV using t-SNE (Group B)')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.grid(True)
plt.text(-4, -1, '(b)', fontsize=12, ha='center')


plt.subplots_adjust(wspace=0.2)


plt.show()

############################################


varA=np.var(np.array(mean_centered_dataA), axis=0).tolist()
varB=np.var(np.array(mean_centered_dataB), axis=0).tolist()

varA.sort(reverse=True)
varB.sort(reverse=True)

fig, axs = plt.subplots(2, figsize=(5, 4))
axs[0].plot(range(1500), varA, marker='', linestyle='-', )

axs[0].set_title('Variance of each component (Group A)')
axs[0].set_xlabel('Component (Decending order)')
axs[0].set_ylabel('Variance')
axs[0].set_ylim(0.3, 1.1)

axs[1].plot(range(1500), varB, marker='', linestyle='-',)
axs[1].set_title('Variance of each component (Group B)')
axs[1].set_xlabel('Component (Decending order)')
axs[1].set_ylabel('Variance')
axs[1].set_ylim(0.3, 1.1)



plt.tight_layout()
plt.show()

