import numpy as np
import pandas as pd
import torch
from scipy.spatial.distance import *
import torch.nn as nn
import torch.nn.functional as F
# print(feature.shape)


class PROCESS:
    def __init__(self, df2, label, feature):
        self.df2 = df2
        self.label = label
        self.feature = feature
        self.stand = 0.6
        self.cosine = []
        self.euclidean = []
        self.braycurtis = []
        self.Correlation = []
        self.canberra = []

    def cosine_similarity(self, first, second):
        vec1 = []
        for i in range(1, len(first) - 1):
            vec1.append(first[i])
        vec2 = []
        for i in range(1, len(second) - 1):
            vec2.append(second[i])
        s = cosine(vec1, vec2)
        return 1 - s

    def similar1(self):
        for i in self.df2:
            li = []
            for j in self.df2:
                li.append(self.cosine_similarity(self.df2[i], self.df2[j]))
            self.cosine.append(li)
        self.cosine = np.array(self.cosine)

    def euclidean_similarity(self, first, second):
        vec1 = []
        for i in range(1, len(first) - 1):
            vec1.append(first[i])
        vec2 = []
        for i in range(1, len(second) - 1):
            vec2.append(second[i])
        s = euclidean(vec1, vec2)
        return s

    def similar2(self):
        MAX = 0
        for i in self.df2:
            li = []
            for j in df2:
                li.append(self.euclidean_similarity(self.df2[i], self.df2[j]))
                if self.euclidean_similarity(self.df2[i], self.df2[j]) > MAX:
                    MAX = self.euclidean_similarity(self.df2[i], self.df2[j])
            self.euclidean.append(li)

        for i in range(len(self.euclidean)):
            for j in range(len(self.euclidean)):
                self.euclidean[i][j] = 1 - self.euclidean[i][j] / MAX
        self.euclidean = np.array(self.euclidean)

    def braycurtis_similarity(self, first, second):
        vec1 = []
        for i in range(1, len(first) - 1):
            vec1.append(first[i])
        vec2 = []
        for i in range(1, len(second) - 1):
            vec2.append(second[i])
        s = braycurtis(vec1, vec2)
        return 1 - s

    def similar3(self):
        for i in self.df2:
            li = []
            for j in self.df2:
                li.append(self.braycurtis_similarity(self.df2[i], self.df2[j]))
            self.braycurtis.append(li)
        self.braycurtis = np.array(self.braycurtis)

    def Correlation_similarity(self, first, second):
        vec1 = []
        for i in range(1, len(first) - 1):
            vec1.append(first[i])
        vec2 = []
        for i in range(1, len(second) - 1):
            vec2.append(second[i])
        s = correlation(vec1, vec2)
        return 1 - s

    def similar4(self):
        for i in self.df2:
            li = []
            for j in self.df2:
                li.append(self.braycurtis_similarity(self.df2[i], self.df2[j]))
            self.Correlation.append(li)
        self.Correlation = np.array(self.Correlation)
        # print(Correlation[1])

    def canberra_similarity(self, first, second):
        vec1 = []
        for i in range(1, len(first) - 1):
            vec1.append(first[i])
        vec2 = []
        for i in range(1, len(second) - 1):
            vec2.append(second[i])
        s = canberra(vec1, vec2)
        return 1 - s

    def similar5(self):
        for i in self.df2:
            li = []
            for j in self.df2:
                li.append(self.canberra_similarity(self.df2[i], self.df2[j]))
            self.canberra.append(li)
        self.canberra = np.array(self.canberra)

    def total(self):
        train_idx = []
        test_idx = []
        # two
        normal = 0
        for i in self.label:
            if i == [0, 1]:
                normal += 1
        normal_train = int(0.8 * normal)
        anti_train = int(0.8 * (len(self.label) - normal))
        print(normal_train, anti_train)
        normal = anti = 0
        for i in range(len(self.label)):
            if self.label[i] == [0, 1] and normal < normal_train:
                normal += 1
                train_idx.append(i)
            elif self.label[i] == [1, 0] and anti < anti_train:
                anti += 1
                train_idx.append(i)
        # # three
        # label_1 = label_2 = label_3 = 0
        # for i in self.label:
        #     if i == [0, 0, 1]:
        #         label_1 += 1
        #     elif i == [0, 1, 0]:
        #         label_2 += 1
        #     elif i == [1, 0, 0]:
        #         label_3 += 1
        # label_1_train = int(0.8 * label_1)
        # label_2_train = int(0.8 * label_2)
        # label_3_train = int(0.8 * label_3)
        # label_1 = label_2 = label_3 = 0
        # for i in range(len(self.label)):
        #     if self.label[i] == [0, 0, 1] and label_1 < label_1_train:
        #         label_1 += 1
        #         train_idx.append(i)
        #     elif self.label[i] == [0, 1, 0] and label_2 < label_2_train:
        #         label_2 += 1
        #         train_idx.append(i)
        #     elif self.label[i] == [1, 0, 0] and label_3 < label_3_train:
        #         label_3 += 1
        #         train_idx.append(i)

        for i in range(len(self.label)):
            if i in train_idx:
                pass
            else:
                test_idx.append(i)
        train_idx = np.array(train_idx)
        test_idx = np.array(test_idx)
        print(test_idx)
        data = {}
        data['label'] = np.array(self.label)
        # data['cos'] = self.cosine
        data['euclidean'] = self.euclidean
        data['braycurtis'] = self.braycurtis
        data['correlation'] = self.braycurtis
        data['train_idx'] = train_idx
        data['val_idx'] = test_idx
        data['test_idx'] = test_idx
        data['feature'] = self.feature
        # print(data)
        return data

    def save(self):
        source_nodes = []
        target_nodes = []
        for index1 in range(len(self.euclidean)):
            for index2 in range(index1 + 1, len(self.euclidean)):
                if (self.euclidean[index1][index2] > self.stand):
                    source_nodes.append(index1)
                    target_nodes.append(index2)
        edge_index1 = torch.tensor([source_nodes, target_nodes], dtype=torch.long)
        # print(edge_index1.shape)

        torch.save(edge_index1, 'edge_index1.pt')
        # data = torch.load('edge_index1.pt')
        #
        # print(data)
        source_nodes = []
        target_nodes = []
        for index1 in range(len(self.braycurtis)):
            for index2 in range(index1 + 1, len(self.braycurtis)):
                if (self.braycurtis[index1][index2] > self.stand):
                    source_nodes.append(index1)
                    target_nodes.append(index2)
        edge_index2 = torch.tensor([source_nodes, target_nodes], dtype=torch.long)
        torch.save(edge_index2, 'edge_index2.pt')
        source_nodes = []
        target_nodes = []
        for index1 in range(len(self.Correlation)):
            for index2 in range(index1 + 1, len(self.Correlation)):
                if (self.Correlation[index1][index2] > self.stand):
                    source_nodes.append(index1)
                    target_nodes.append(index2)
        edge_index3 = torch.tensor([source_nodes, target_nodes], dtype=torch.long)
        torch.save(edge_index3, 'edge_index3.pt')
if __name__ == '__main__':
    path = 'GSE185507_series_matrix.csv'
    df1 = pd.read_csv(path)
    # print(df1)
    from sklearn.preprocessing import LabelEncoder

    encoder = LabelEncoder().fit(df1["!Sample_characteristics_ch1"])
    df1["sampleID_after"] = encoder.transform(df1["!Sample_characteristics_ch1"])

    label = []

    for index, row in df1.iterrows():
        a = 0
        b = 0
        for i in range(1, len(df1.columns) - 1):
            # print(row[i])
            if row[i] < 0.01:
                a += 1
            if row[i] == 0:
                b += 1
        if a >= 120 or b >= 120:
            df1 = df1.drop(index, axis=0)
    df1 = df1.reset_index(drop=True)
    df2 = df1.iloc[:, 1:-1]
    print(df2.shape)
    for i in df2.columns:
        if i[0] == 'n':
            label.append([0, 1])
        else:
            label.append([1, 0])

    df2 = df1.iloc[:, 1:-1]
    # df2 = (df2 - df2.min()) / (df2.max() - df2.min())
    # print(df2.shape)
    # print(df2)
    for i in df2.columns:
        # print(i[0:6])
        # if i[0:6] == 'lve: 0':
        if i[0] == 'n':
            label.append([0, 1])
        else:
            label.append([1, 0])
    # # three coloretal
    # for i in df2.columns:
    #     if i[0] == 'n':
    #         label.append([0, 0, 1])
    #     elif i[0] == 'c':
    #         label.append([0, 1, 0])
    #     elif i[0] == 's':
    #         label.append([1, 0, 0])
    feature = np.array(df2.values).transpose()
    # print(len(label))

    import pickle
    def write_pkl(write_data, pkl_path):
        pickle.dump(write_data, open(pkl_path, 'wb'))

    path = 'data.pkl'
    a = PROCESS(df2, label, feature)
    # a.similar1()
    a.similar2()
    a.similar3()
    # a.similar4()
    data = a.total()

    # print(data)
    a.save()
    write_pkl(data, path)

