import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os
import glob
import Dataload
from tqdm import tqdm
import utils
np.random.seed(42)

def Calc_Coefficients(standard, regression_spectrum):
    standard_pinv = np.linalg.pinv(standard)

    return np.dot(standard_pinv, regression_spectrum)

def has_duplicate(vector):
    unique = np.unique(vector)
    return unique.shape[0] != vector.shape[0]


def EvaluateEnergyPoint(Standard_matrix, StandardEnergy, EnergyPoint, TryNum, components_num):
    standard_num = Standard_matrix.shape[1]

    loss_list = []
    for i in range(TryNum):

        index_num = np.zeros(components_num)
        while has_duplicate(index_num):
            index_num = np.random.randint(0, standard_num, components_num)
        standard_use = Standard_matrix[:, index_num]

        standard_use_interpolate = np.zeros((EnergyPoint.shape[0], standard_use.shape[1]))
        for i in range(components_num):
            standard_use_interpolate[:, i] = Dataload.calc_interpolate(StandardEnergy, standard_use[:, i], EnergyPoint)

        coefficients = np.random.uniform(0, 1, components_num)
        coefficients = coefficients / np.sum(coefficients)

        regression_spectrum = np.dot(standard_use_interpolate, coefficients)

        regression_spectrum_noise = utils.spectrum2mutnoise(regression_spectrum)
        noise_term = np.random.normal(0, regression_spectrum_noise, regression_spectrum_noise.shape[0])

        regression_spectrum = regression_spectrum + noise_term

        coefficients_result = Calc_Coefficients(standard_use_interpolate, regression_spectrum)

        loss = np.power(coefficients_result - coefficients, 2)

        loss_list.append(np.sqrt(np.mean(loss)))


    return np.array(loss_list)


def Evaluate_each_energy(Standard_matrix, StandardEnergy, EnergyPoint, initial_sampling_num, TryNum, components_num, save = True, save_path = 0):
    rmse_RandomFitting = []
    sampling_num = []

    for i in tqdm(range(EnergyPoint.shape[0] - initial_sampling_num)):
        Energy_use = EnergyPoint[:initial_sampling_num + i]
        #Energy_use = np.linspace(7076.2, 7181.2, initial_sampling_num + i) #Use to evaluate uniform step sampling

        loss = EvaluateEnergyPoint(Standard_matrix, StandardEnergy, Energy_use, TryNum, components_num)

        rmse_RandomFitting.append(np.mean(loss))
        sampling_num.append(initial_sampling_num + i)

    if save:
        df = pd.DataFrame()
        df["SamplingNum"] = np.array(sampling_num)
        df["Mean_RandomFitting"] = np.array(rmse_RandomFitting)

        df.to_csv(save_path)


    return np.array(sampling_num), np.array(rmse_RandomFitting)


def TryOneEvaluate(path, components_num, save_path):
    path_spectra = "LinearRegressionStandard.csv"
    initial_sampling_num = 2
    TryNum = 10
    components_num = components_num
    path_sampling_point = path
    path_energy = "MDRStandard_energy.csv"

    save_path = save_path

    Standard_matrix = pd.read_csv(path_spectra).values[:, 1:]
    print(Standard_matrix.shape)

    StandardEnergy = pd.read_csv(path_energy)["Energy"].values

    df = pd.read_csv(path_sampling_point)
    EnergyPoint = df["SamplingPoints"].values

    Evaluate_each_energy(Standard_matrix, StandardEnergy, EnergyPoint,
                         initial_sampling_num, TryNum, components_num,
                         save=True, save_path=save_path)



if __name__ == "__main__":
    save_folder = "results/Eval"
    path_folder = "results/OptimalPoints/SamplingPoints.csv"
    components_num = 2
    save_path = save_folder + "/Components2.csv"

    TryOneEvaluate(path_folder, components_num, save_path)

    components_num = 3
    save_path = save_folder + "/Components3.csv"

    TryOneEvaluate(path_folder, components_num, save_path)

    components_num = 4
    save_path = save_folder + "/Components4.csv"

    TryOneEvaluate(path_folder, components_num, save_path)

    components_num = 5
    save_path = save_folder + "/Components5.csv"

    TryOneEvaluate(path_folder, components_num, save_path)

