


import argparse
import time
from random import choices, seed
from collections import defaultdict
import pickle
import numpy as np
import scipy
from scipy import stats
import sys
from typing import *
from collections import defaultdict,Counter
from copulae import EmpiricalCopula, pseudo_obs
import pandas as pd
import re


   
#### Portfolio

def metric_cdf_normalizer(list_of_scores):
    # list of scores contains concatenated scores of model_a, model_b, model_c 
    lengths= [ l.size for l in list_of_scores]
    scores = np.concatenate(list_of_scores, axis=None)
    scores_sorted = np.sort(scores)
    cdf = [np.searchsorted(scores_sorted, x, side='right') for x in scores]
    cdf= np.array(cdf)/scores.size
    norm_scores = np.split(cdf, np.cumsum(lengths))
    # print(len(norm_scores))
    return norm_scores[:-1]



def get_portfolio(w, norm_list_metrics_model):
        # list to array samples times metrics
        sample_all_metrics = np.stack(norm_list_metrics_model, axis=-1) # nsamples x nmetrics
        # print(sample_all_metrics.shape)
        # print(w.shape)
        # geometric mean 
        portfolio = stats.mstats.gmean(sample_all_metrics, axis=1, dtype=None, weights=w) # nsample , archimedian copula
        return portfolio 
    
    

def build_portfolio(data):
    metric_wise_dict = {}
    model_names = list(data.keys())
    metric_names = list(data[model_names[0]].keys())
    for m in metric_names:
        scores_list = []
        for mname in model_names:
            scores_list.append(data[mname][m])
        metric_wise_dict[m] = scores_list
    normalized_metric_dict = {}
    for key,value in metric_wise_dict.items():
        normalized_metric_dict[key] = metric_cdf_normalizer(np.array(value))
    model_normalized_scores = defaultdict(list)
    model_normalized_scores_per_metric = defaultdict(dict)
    for i in range(0,len(model_names)):
        for metric in normalized_metric_dict.keys():
            model_normalized_scores[model_names[i]].append(normalized_metric_dict[metric][i])
            model_normalized_scores_per_metric[model_names[i]][metric] = normalized_metric_dict[metric][i]
    portfolio = {}
    for k, v in model_normalized_scores.items():
        portfolio[k] = {"portfolio":get_portfolio(None,v)}
    return portfolio


#### Dependent Copula
def build_portfolio_dependent_copula(data):
    metric_wise_dict = {}
    model_names = list(data.keys())
    metric_names = list(data[model_names[0]].keys())
    lengths_dict = {}
    
    for m in metric_names:
        scores_list = []
        for mname in model_names:
            scores_list.append(data[mname][m])
        
        lengths= [ np.array(l).size for l in scores_list]
        lengths_dict[m] =  lengths
        metric_wise_dict[m] = scores_list
    for m,v in metric_wise_dict.items():
        # metric_wise_dict[m] = np.concatenate(np.array(v), axis=None)
        metric_wise_dict[m] = np.concatenate(v)
    df = pd.DataFrame.from_dict(metric_wise_dict)   
    u = pseudo_obs(df)
    emp_cop = EmpiricalCopula(u, smoothing="beta")
    data_cop = emp_cop.data
    portfolio_mixed = emp_cop.cdf(data_cop)
    splits = np.split(portfolio_mixed, np.cumsum(lengths_dict[metric_names[0]]))[:-1]
    portfolio_dict = {}
    for i,m in enumerate(model_names):
        portfolio_dict[m] = {"portfolio": splits[i]}
    return portfolio_dict




def main(args):
    
    with open(args.datapath,"rb") as handle:
        og_data = pickle.load(handle)
        samples = og_data['data']
        
        portfolio_creation_time = []
        portfolios = []
        
        copula_portfolio_creation_time = []
        copula_portfolios = []
        for s in samples:    
            
            start_p_creation = time.time()
            portfolios.append(build_portfolio(s))
            end_p_creation = time.time()
            portfolio_creation_time.append(end_p_creation-start_p_creation)
            
            start_p_copula_creation = time.time()
            copula_portfolios.append(build_portfolio_dependent_copula(s))
            end_p_copula_creation = time.time()
            copula_portfolio_creation_time.append(end_p_copula_creation-start_p_copula_creation)
            
            
        
        
        op = {}
        op['data'] = samples
        op['portfolios'] = portfolios
        op['copula_portfolios'] = copula_portfolios
        
        with open(args.savepath+"_portfolios", 'wb') as handle:
            pickle.dump(op, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
        timing = {}
        
        timing['portfolio_creation'] = portfolio_creation_time
        timing['copula_portfolio_creation'] = copula_portfolio_creation_time
        
        
        
        with open(args.savepath+"_timing", 'wb') as handle:
            pickle.dump(timing, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return 


















if __name__ == "__main__":    
    
    parser = argparse.ArgumentParser(description='Sample Based Ranks Script')
    # parser.add_argument('--samplesize', type=int, help='Sample Size')
    # parser.add_argument('--iter', type=int, help='Iterations')
    parser.add_argument('--datapath', help='Pickle path')
    parser.add_argument('--savepath', help='Output Pickle path')
    args = parser.parse_args()
    main(args)
    