#%%from datasets import sine_curve
from datasets import sine_curve
from stage3    import stage_embedding, evaluate_kendall_abs
from sklearn.manifold import TSNE
import time, numpy as np
from curve import RandomFourierCurve
from scipy.stats import rankdata
import umap
from experiment_utils import fiedler_permutation, spectral_ordering, gaussian_kernel
from pprint import pp
import pandas as pd

n_runs = 10
samples = [500, 1000, 2000]
noise_stds = [.5, 1, 1.5, 2]
dims = [50, 100, 200]
results = {}
for d in dims:
    results[d] = {}
    for noise_std in noise_stds:
        results[d][noise_std] = {}
        for n_points in samples:
            results[d][noise_std][n_points] = {}
            taus = np.empty((n_runs, 5))
            runtimes = np.empty((n_runs, 5))
            runs_start = time.time()
            for run in range(n_runs):
                curve = RandomFourierCurve(d=d, K=10, alpha=2.3, span=0.25, seed=run)
                # --- stretch so that κ_max ≈ 2.0 -----------------------------------
                smooth = curve.stretch_to_curvature(kappa_max=2.0)
                t, _ = smooth.unit_speed_grid(n_points)
                kappa0 = smooth.curvature(t).max()
                np.random.shuffle(t)
                X, true_order = smooth.c(t), rankdata(t, method='average')                         # unpack
                # print(true_order)
                # add noise
                X += np.random.normal(scale=noise_std, size=X.shape)
                
                start = time.time()
                umap_obj = umap.UMAP(n_components=1)
                umap_result = np.ndarray.flatten(umap_obj.fit_transform(X))
                UMAP_order_indices = rankdata(umap_result)
                tau = evaluate_kendall_abs(true_order, UMAP_order_indices)
                # taus["UMAP"][i] = np.abs(tau)
                # outputs["UMAP"][i,:] = UMAP_order_indices
                taus[run, 0] = tau
                runtimes[run, 0] = time.time() - start

                start = time.time()
                _, fiedler_order_indices = fiedler_permutation(X, sigma = np.sqrt(d)*noise_std)
                tau = evaluate_kendall_abs(true_order, fiedler_order_indices)
                # taus["fiedler"][i] = np.abs(tau)
                # outputs["fiedler"][i,:] = fiedler_order_indices
                taus[run, 1] = tau
                runtimes[run, 1] = time.time() - start

                start = time.time()
                t_SNE_order_indices = rankdata(np.ndarray.flatten(TSNE(n_components = 1, perplexity = 100).fit_transform(X)))
                tau = evaluate_kendall_abs(true_order, t_SNE_order_indices)
                # taus['t_SNE'][i] = np.abs(tau)
                # outputs["t_SNE"][i,:] = t_SNE_order_indices
                taus[run, 2] = tau
                runtimes[run, 2] = time.time() - start

                start = time.time()
                recanati_order_indices = np.argsort(spectral_ordering(X, sigma = np.sqrt(d)*noise_std))
                tau = evaluate_kendall_abs(true_order, recanati_order_indices)
                # taus['spectral_order'][i] = np.abs(tau)
                # outputs["spectral_order"][i,:] = spectral_order_order_indices
                taus[run, 3] = tau
                runtimes[run, 3] = time.time() - start

                start = time.time()
                _, stage_order_indices = stage_embedding(X, k = 50, pca_full_dim=True, embedding="linreg")
                tau = evaluate_kendall_abs(true_order, stage_order_indices)
                # taus['spectral_lin_reg'][i] = np.abs(tau)
                # outputs["spectral_lin_reg"][i,:] = spectral_lin_reg_order_indices
                taus[run, 4] = tau
                runtimes[run, 4] = time.time() - start
            print("N = ", n_points)
            print("Runs took ", time.time() - runs_start, " seconds")
            avg_tau = np.mean(taus, axis = 0)
            print(avg_tau)
            std_tau = np.std(taus, axis = 0)
            avg_run = np.mean(runtimes, axis = 0)
            results[d][noise_std][n_points]["UMAP"] = {"mean": avg_tau[0], "sd": std_tau[0], "avg_run": avg_run[0]}
            results[d][noise_std][n_points]["Fiedler"] = {"mean": avg_tau[1], "sd": std_tau[1], "avg_run": avg_run[1] }
            results[d][noise_std][n_points]["t-SNE"] = {"mean": avg_tau[2], "sd": std_tau[2], "avg_run": avg_run[2]}
            results[d][noise_std][n_points]["Recanati"] = {"mean": avg_tau[3], "sd": std_tau[3], "avg_run": avg_run[3]}
            results[d][noise_std][n_points]["STAGE"] = {"mean": avg_tau[4], "sd": std_tau[4], "avg_run": avg_run[4]}


for d in dims:
    for n_points in samples:
        df1 = pd.DataFrame(index = ["UMAP", "Fiedler", "t-SNE", "Recanati", "STAGE"], columns = noise_stds)
        df2 = pd.DataFrame(index = ["UMAP", "Fiedler", "t-SNE", "Recanati", "STAGE"], columns = noise_stds)
        for alg in ["UMAP", "Fiedler", "t-SNE", "Recanati", "STAGE"]:
            for noise_std in noise_stds:
                df1.loc[[alg],[noise_std]] = "{} ({})".format(round(results[d][noise_std][n_points][alg]["mean"]*100, 2) , round(results[d][noise_std][n_points][alg]["sd"]*100, 2) )
                df2.loc[[alg],[noise_std]] = "{}".format(round(results[d][noise_std][n_points][alg]["avg_run"], 4))
        print("d =", d)
        print("N =", n_points)
        print(df1)
        print("Average Runtimes")
        print(df2)
