from random import choices, seed
import jsonlines
from collections import defaultdict
import pickle
import numpy as np
import scipy
from scipy import stats
from typing import *
from matplotlib import pyplot as plt
import seaborn as sns
import sys

from os.path import dirname, realpath

filepath = realpath(".")

dir_of_file = dirname(filepath)
parent_dir_of_file = dirname(dir_of_file)
parents_parent_dir_of_file = dirname(parent_dir_of_file)

sys.path.insert(1, f'{parents_parent_dir_of_file}')
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from typing import *
from collections import defaultdict,Counter
from soe.testing import StochasticOrderTesting
from soe.meanrisk import MRM
from soe.mvtesting import MVStochasticOrderTesting






def get_samples(og_dict, samples, iterations=2):
    
    modified_dicts = [{} for _ in range(iterations)]
    for i in range(iterations):
        seed(i)
        rand_list = choices(range(0, 5000), k=samples)    
        for k,v in og_dict.items():
            sub_dict = {}
            for m,vals in v.items():
                sub_dict[m] = np.array(vals)[rand_list]
            modified_dicts[i][k] = sub_dict
    return modified_dicts
        


#### Portfolio

def metric_cdf_normalizer(list_of_scores):
    # list of scores contains concatenated scores of model_a, model_b, model_c 
    lengths= [ l.size for l in list_of_scores]
    scores = np.concatenate(list_of_scores, axis=None)
    scores_sorted = np.sort(scores)
    cdf = [np.searchsorted(scores_sorted, x, side='right') for x in scores]
    cdf= np.array(cdf)/scores.size
    norm_scores = np.split(cdf, np.cumsum(lengths))
    # print(len(norm_scores))
    return norm_scores[:-1]



def get_portfolio(w, norm_list_metrics_model):
        # list to array samples times metrics
        sample_all_metrics = np.stack(norm_list_metrics_model, axis=-1) # nsamples x nmetrics
        # print(sample_all_metrics.shape)
        # print(w.shape)
        # geometric mean 
        portfolio = stats.mstats.gmean(sample_all_metrics, axis=1, dtype=None, weights=w) # nsample , archimedian copula
        return portfolio 
    
    

def build_portfolio(data):
    metric_wise_dict = {}
    model_names = list(data.keys())
    metric_names = list(data[model_names[0]].keys())
    for m in metric_names:
        scores_list = []
        for mname in model_names:
            scores_list.append(data[mname][m])
        metric_wise_dict[m] = scores_list
    normalized_metric_dict = {}
    for key,value in metric_wise_dict.items():
        normalized_metric_dict[key] = metric_cdf_normalizer(np.array(value))
    model_normalized_scores = defaultdict(list)
    model_normalized_scores_per_metric = defaultdict(dict)
    for i in range(0,len(model_names)):
        for metric in normalized_metric_dict.keys():
            model_normalized_scores[model_names[i]].append(normalized_metric_dict[metric][i])
            model_normalized_scores_per_metric[model_names[i]][metric] = normalized_metric_dict[metric][i]
    portfolio = {}
    for k, v in model_normalized_scores.items():
        portfolio[k] = {"portfolio":get_portfolio(None,v)}
    return portfolio




##### Relative testing computation

def get_testing(data):
    model_names = list(data.keys())
    metric_wise_dict = combined_dict(list(data.values()))
    stochastic_order = {}
    for k,v in metric_wise_dict.items():   

        test = StochasticOrderTesting(v, n_bootstrap=1000)

        rank_rel_qs, rank_rel_iqs = test.compute_relative_test(alpha=0.05)

        rank_abs_qs, rank_abs_iqs = test.compute_absolute_test(alpha=0.05, tau=0.25)
        l_rel_fo = [model_names[rank] for rank in rank_rel_qs]
        l_rel_so = [model_names[rank] for rank in rank_rel_iqs]
        df_rel_fo = pd.DataFrame(l_rel_fo)
        df_rel_fo.index = np.arange(1, len(df_rel_fo) + 1)
        df_rel_so = pd.DataFrame(l_rel_so)
        df_rel_so.index = np.arange(1, len(df_rel_so) + 1)


        stochastic_order[k] = {"fo":rank_rel_qs, "so":rank_rel_iqs, "abs_fo":rank_abs_qs, "abs_so":rank_abs_iqs,\
                              
                              "df_rfo":df_rel_fo, "df_rso":df_rel_so, "list_rfo":l_rel_fo, "list_rso":l_rel_so}
    return stochastic_order



def combined_dict(dictonaries):
    dd = defaultdict(list)

    for d in dictonaries: 
        for key, value in d.items():
                dd[key].append(np.array(value))
    return dd


def combine_dicts_mrm(dictonaries):
    dd = defaultdict(list)

    for d in dictonaries: 
        for key, value in d.items():
            
            if isinstance(value, list):
                dd[key]+= value
            else:
                dd[key].append(value)
    return dd    




def get_MRM_scores(data_dict,p=None):
       
    mean_risk_models = {} 
    for k, v in data_dict.items():
        metric_based_dict = {}
        for metric, array in v.items():
            if p:
                mrm = MRM(array,p_default=p)
            else:
                mrm = MRM(array)
            metric_based_dict[metric] = mrm.compute()
        mean_risk_models[k] = metric_based_dict
    return mean_risk_models



def get_max(score_dict,model_names):
    max_model_dict = {}
    for key, value in score_dict.items():
        value= np.array(value)
        max_per_col = np.amax(value,axis=0)
        winner = []
        for idx,col_max in enumerate(max_per_col):
            winner.append(np.argwhere(value[:,idx] == col_max).flatten())
        winner_models = []
        for num in np.concatenate(winner):
            winner_models.append(model_names[num])
        # print(winner_models)
        max_model_dict[key] = winner_models
    return max_model_dict



def get_mv_testing(data,loss="logistic",abs_test=False,abs_tau=0.25,beta=8):        
    model_names = list(data.keys())
    scores_list = list(data.values())
    
    test = MVStochasticOrderTesting(scores_list, n_bootstrap=1000, use_sinkhorn=True, cost=loss, verbose=True,\
                                   use_cuda=True,cost_kwargs={'beta': beta})
    if abs_test:
        print(f"Starting Absolute Testing with tau={abs_tau}")
        ranks = test.compute_absolute_test(alpha=0.05,tau=abs_tau)
        
    else:  
        print(f"Starting Relative Testing")
        # Relative stochastic order test
        ranks = test.compute_relative_test(alpha=0.05)
    
    l_mv = [model_names[rank] for rank in ranks]
    df_mv = pd.DataFrame(l_mv)
    df_mv.index = np.arange(1, len(df_mv) + 1)
    return df_mv




def cdf_normalization(data,log=False):
    metric_wise_dict = {}
    model_names = list(data.keys())
    metric_names = list(data[model_names[0]].keys())
    for m in metric_names:
        scores_list = []
        for mname in model_names:
            scores_list.append(data[mname][m])
        metric_wise_dict[m] = scores_list
    normalized_metric_dict = {}
    for key,value in metric_wise_dict.items():
        if log:
#             print("Computing log CDF")
            normalized_metric_dict[key] = np.log(metric_cdf_normalizer(np.array(value)))
        else:
            normalized_metric_dict[key] = metric_cdf_normalizer(np.array(value))
    model_normalized_scores = defaultdict(list)
    model_normalized_scores_per_metric = defaultdict(dict)
    for i in range(0,len(model_names)):
        for metric in normalized_metric_dict.keys():
            model_normalized_scores[model_names[i]].append(normalized_metric_dict[metric][i])
            model_normalized_scores_per_metric[model_names[i]][metric] = normalized_metric_dict[metric][i]
    return model_normalized_scores_per_metric

    

