import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
import os
from sklearn import metrics
import wandb

def get_skew_and_kurtosis(encodings, hypno):
    ks = []
    sks = []
    hypno = np.asarray(hypno)
    hypno_unique = np.unique(hypno)
    for label in hypno_unique:
        label_encodings = encodings[hypno == label, :]
        ks.append(kurtosis(label_encodings, axis=0))
        sks.append(np.abs(skew(label_encodings, axis=0)))
    return np.mean(sks), np.mean(ks)

metrics_real_data = pd.read_csv('/mnt/home/tt1131/neighbor_vae_expts/results/metrics_hp_real_data_091723.npy')
encodings_real_data = np.load('/mnt/home/tt1131/neighbor_vae_expts/results/encodings_per_hp_real_data_091723.npy', allow_pickle=True).item()
run = wandb.init()
artifact = run.use_artifact('engellab/neighbor-vae/probe12_subject0_test0:v0', type='dataset')
artifact_dir = artifact.download()
test_hypno = np.load(os.path.join(artifact_dir, 'hypno.npy'))[0]
for i in range(metrics_real_data.shape[0]):
    query = list(metrics_real_data.loc[i, ['n_layers','layer_dims', 'kl', 'batch_size', 'lr']])
    query_str = "_".join(str(s) for s in query[:5])
    encoding_repeats = encodings_real_data[query_str]
    silhouettes = []
    skews = []
    kurtosises = []
    for repeat in encoding_repeats:
        test_hypno = test_hypno[:len(repeat)]
        silhouettes.append(metrics.silhouette_score(repeat, test_hypno))
        sk, k = get_skew_and_kurtosis(repeat, test_hypno)
        skews.append(sk)
        kurtosises.append(k)
    metrics_real_data.loc[i, 'silhouette mean'] = np.mean(silhouettes)
    metrics_real_data.loc[i, 'silhouette std'] = np.std(silhouettes)
    metrics_real_data.loc[i,'skew mean'] = np.mean(skews)
    metrics_real_data.loc[i,'skew std'] = np.std(skews)
    metrics_real_data.loc[i,'kurtosis mean'] = np.mean(kurtosises)
    metrics_real_data.loc[i,'kurtosis std'] = np.std(kurtosises)
metrics_real_data.to_csv('metrics_real_data')
