# !/usr/bin/env python
# coding: utf-8

# Importing python packages
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import os.path

import warnings
warnings.filterwarnings("ignore")

# Plotting functions
from plotting_functions import cumulative_regret_plotting_reverse

# Environment
from environment import problem_instance_fixed

# Online fair division algorithms
from learners import ofd_linear
from learners import ofd_uniform


# ### Varying number of agents ###
def varying_agents(dim, num_items, num_copies, rho_value, R, save_regret_data):
    # Different algorithms and weights
    algos       = ['OFD-UCB', 'OFD-TS'] 
    agents_num  = [5, 10, 15, 20, 25]
        
    for alg in algos:
        # Algorithms cases
        alg_cases   = [alg + r' (N = ' + str(k) + ')' for k in agents_num]
        
        # File to save or read
        file_name = F"vary_agents_{alg}_{num_items}_{dim}_{rho_value}_{R}"
        path_to_file = "results/data/{}.npy".format(file_name)
        
        if os.path.exists(path_to_file):
            # Loading existing regret data
            agents_regret = np.load(path_to_file) 
        
        else:     
            agents_regret = []
            
            # Running for different number of agents
            for a in agents_num:
                algorithm_parameters = problem_instance_fixed(dim, num_items, num_copies, a, rho_value)
                print(F"Running for algorithm: {alg} with {a} agents")
                run_regret = []
                iter_regret = []
                
                # Running for R runs
                for _ in tqdm(range(R)):
                    np.random.shuffle(algorithm_parameters[0])
                    if alg == 'OFD-UCB':
                        iter_regret, _ = ofd_linear(algorithm_parameters, strategy='ucb')

                    elif alg == 'OFD-TS':
                        iter_regret, _ = ofd_linear(algorithm_parameters, strategy='ts')

                    run_regret.append(iter_regret)

                agents_regret.append(run_regret)

            # Save regret data
            if save_regret_data:
                np.save(path_to_file, agents_regret)

        # ### Plotting Regret ###
        file_to_save = "results/plots/{}.png".format(file_name)
        cumulative_regret_plotting_reverse(agents_regret, alg_cases, file_to_save, 'upper right')     
   

# Variations of the above function: using regret scaling  
def varying_agents_scaled(dim, num_items, num_copies, rho_value, R, save_regret_data):
    # Different algorithms and weights
    algos       = ['OFD-UCB', 'OFD-TS'] 
    agents_num  = [5, 10, 15, 20, 25]
        
    for alg in algos:
        # Algorithms cases
        alg_cases   = [alg + r' (N = ' + str(k) + ')' for k in agents_num]
        
        # File to save or read
        file_name = F"vary_agents_scaled_{alg}_{num_items}_{dim}_{rho_value}_{R}"
        path_to_file = "results/data/{}.npy".format(file_name)
        print (path_to_file)
        if os.path.exists(path_to_file):
            # Loading existing regret data
            agents_regret = np.load(path_to_file) 
        
        else:     
            agents_regret = []
            
            # Running for different number of agents
            for a in agents_num:
                algorithm_parameters = problem_instance_fixed(dim, num_items, num_copies, a, rho_value)
                print(F"Running for algorithm: {alg} with {a} agents")
                run_regret = []
                iter_regret = []
                
                # Running for R runs
                for _ in tqdm(range(R)):
                    np.random.shuffle(algorithm_parameters[0])
                    if alg == 'OFD-UCB':
                        iter_regret, _ = ofd_linear(algorithm_parameters, strategy='ucb')

                    elif alg == 'OFD-TS':
                        iter_regret, _ = ofd_linear(algorithm_parameters, strategy='ts')

                    run_regret.append(list(np.array(iter_regret)*a))

                agents_regret.append(run_regret)

            # Save regret data
            if save_regret_data:
                np.save(path_to_file, agents_regret)

        # ### Plotting Regret ###
        file_to_save = "results/plots/{}.png".format(file_name)
        cumulative_regret_plotting_reverse(agents_regret, alg_cases, file_to_save, 'upper right')     
    

# Comparing characterstics of algorithms for different rho values
def compare_all_algos_agents(dim, num_items, num_copies, rho_value, R, save_regret_data):
    # Different algorithms and weights
    algos       = ['OFD-Uniform', 'OFD-Greedy', 'OFD-UCB', 'OFD-TS'] 
    agents_num  = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

    # File to save or read
    file_name = F"compare_agents_{num_items}_{rho_value}_{rho}_{R}"
    path_to_file = "results/data/{}.npz".format(file_name)
    
    if os.path.exists(path_to_file):
        # Loading existing regret data
        load_data = np.load(path_to_file)    
        overall_regret = load_data['overall_regret']
        overall_total_utility = load_data['overall_total_utility']
        overall_gini_coefficient = load_data['overall_gini_coefficient']
        overall_min_total_utility_ratio = load_data['overall_min_total_utility_ratio']
        
    else:
        overall_regret = []
        overall_total_utility = []
        overall_gini_coefficient = []
        overall_min_total_utility_ratio = []
        
        for alg in algos:
            cases = len(agents_num)            
            algos_regret = []
            algos_total_utility = []
            algos_gini_coefficient = []
            algos_min_total_utility_ratio = []
            print(F"Running for algorithm: {alg}")
            for _ in tqdm(range(R)):
                run_regret = []
                run_total_utility = []
                run_gini_coefficient = []
                run_min_total_utility_ratio = []

                for a in agents_num:
                    # print(F"Running for algorithm: {alg} with {a} agents")
                    algorithm_parameters = problem_instance_fixed(dim, num_items, num_copies, a, rho_value)
                    if alg == 'OFD-Uniform':
                        iter_regret, all_stats = ofd_uniform(algorithm_parameters)
                    
                    elif alg == 'OFD-Greedy':
                        iter_regret, all_stats = ofd_linear(algorithm_parameters, strategy='greedy')
                        
                    elif alg == 'OFD-UCB':
                        iter_regret, all_stats = ofd_linear(algorithm_parameters, strategy='ucb')

                    elif alg == 'OFD-TS':
                        iter_regret, all_stats = ofd_linear(algorithm_parameters, strategy='ts')
                            
                    run_regret.append(sum(iter_regret))
                    run_total_utility.append(all_stats[0][-1])
                    run_gini_coefficient.append(all_stats[1][-1])
                    run_min_total_utility_ratio.append(all_stats[2][-1])
                    
                algos_regret.append(run_regret)
                algos_total_utility.append(run_total_utility)
                algos_gini_coefficient.append(run_gini_coefficient)
                algos_min_total_utility_ratio.append(run_min_total_utility_ratio)
            
            overall_regret.append(algos_regret)
            overall_total_utility.append(algos_total_utility)
            overall_gini_coefficient.append(algos_gini_coefficient)
            overall_min_total_utility_ratio.append(algos_min_total_utility_ratio)
            
        # Save regret data
        if save_regret_data:
            np.savez(path_to_file,
                    overall_regret = overall_regret,
                    overall_total_utility = overall_total_utility,
                    overall_gini_coefficient = overall_gini_coefficient,
                    overall_min_total_utility_ratio = overall_min_total_utility_ratio
                )        

    # ### Plotting Regret ###
    file_to_save = "results/plots/{}_regret.png".format(file_name)
    x_label = r'Number of agents ($N$)'
    y_label = "Cumulative Regret"
    # print (overall_regret)
    average_plotting_x_axis(overall_regret, agents_num, algos, file_to_save, 'upper right', runs, x_label=x_label, y_label=y_label)
    
    # ### Plotting Total Utility ###
    file_to_save = "results/plots/{}_total_utility.png".format(file_name)
    y_label = "Total Utility"
    average_plotting_x_axis(overall_total_utility, agents_num, algos, file_to_save, 'upper left', runs, x_label=x_label, y_label=y_label)
    
    # ### Plotting Gini Coefficient ###
    file_to_save = "results/plots/{}_gini_coefficient.png".format(file_name)
    y_label = "Gini Coefficient"
    average_plotting_x_axis(overall_gini_coefficient, agents_num, algos, file_to_save, 'upper left', runs, x_label=x_label, y_label=y_label)
    
    # ### Plotting ratio between minimum utility and total utility ###
    file_to_save = "results/plots/{}_min_total_utility_ratio.png".format(file_name)
    y_label = "Minimum Utility/Total Utility"
    average_plotting_x_axis(overall_min_total_utility_ratio, agents_num, algos, file_to_save, 'lower left', runs, x_label=x_label, y_label=y_label)
    

# ### Varying dimension ###
def varying_dimensions(num_items, num_copies, num_agents, rho_value, R, save_regret_data):
    # Different algorithms and weights
    algos   = ['OFD-UCB', 'OFD-TS'] 
    dims    = [5, 10, 15, 20, 25]
        
    for alg in algos:
        # Algorithms cases
        alg_cases   = [alg + r' (d = ' + str(2*d) + ')' for d in dims]
        
        # File to save or read
        file_name = F"vary_dims_{alg}_{num_items}_{num_agents}_{rho_value}_{R}"
        path_to_file = "results/data/{}.npy".format(file_name)
        
        if os.path.exists(path_to_file):
            # Loading existing regret data
            agents_regret = np.load(path_to_file) 
        
        else:     
            agents_regret = []
            
            # Running for different number of agents
            for d in dims:
                algorithm_parameters = problem_instance_fixed(d, num_items, num_copies, num_agents, rho_value)
                print(F"Running for algorithm: {alg} with {d} dimensions")
                run_regret = []
                iter_regret = []
                
                # Running for R runs
                for _ in tqdm(range(R)):
                    np.random.shuffle(algorithm_parameters[0])
                    if alg == 'OFD-UCB':
                        iter_regret, _ = ofd_linear(algorithm_parameters, strategy='ucb')

                    elif alg == 'OFD-TS':
                        iter_regret, _ = ofd_linear(algorithm_parameters, strategy='ts')

                    run_regret.append(iter_regret)

                agents_regret.append(run_regret)

            # Save regret data
            if save_regret_data:
                np.save(path_to_file, agents_regret)

        # ### Plotting Regret ###
        file_to_save = "results/plots/{}.png".format(file_name)
        cumulative_regret_plotting_reverse(agents_regret, alg_cases, file_to_save, 'upper right')     
        

# ### Varying number of copies for each item ###
def varying_item_copies(dim, num_items, num_agents, rho_value, R, save_regret_data):
    # Different algorithms and weights
    algos       = ['OFD-UCB', 'OFD-TS'] 
    item_copies = [10, 25, 50, 100, 200]
        
    for alg in algos:
        # Algorithms cases
        alg_cases   = [alg + r'(c = ' + str(c) + ')' for c in item_copies]
        
        # File to save or read
        file_name = F"vary_item_copies_{alg}_{num_items}_{num_agents}_{dim}_{rho_value}_{R}"
        path_to_file = "results/data/{}.npy".format(file_name)
        
        if os.path.exists(path_to_file):
            # Loading existing regret data
            item_copies_regret = np.load(path_to_file) 
        
        else:     
            item_copies_regret = []
            
            # Running for different number of agents
            for c in item_copies:
                num_items_c = num_items // c
                algorithm_parameters = problem_instance_fixed(dim, num_items_c, c, num_agents, rho_value)
                print(F"Running for algorithm: {alg} with {c} item copies")
                run_regret = []
                iter_regret = []
                
                # Running for R runs
                for _ in tqdm(range(R)):
                    np.random.shuffle(algorithm_parameters[0])
                    if alg == 'OFD-UCB':
                        iter_regret, _ = ofd_linear(algorithm_parameters, strategy='ucb')

                    elif alg == 'OFD-TS':
                        iter_regret, _ = ofd_linear(algorithm_parameters, strategy='ts')

                    run_regret.append(iter_regret)

                item_copies_regret.append(run_regret)

            # Save regret data
            if save_regret_data:
                np.save(path_to_file, item_copies_regret)

        # ### Plotting Regret ###
        file_to_save = "results/plots/{}.png".format(file_name)
        cumulative_regret_plotting_reverse(item_copies_regret, alg_cases, file_to_save, 'upper right')     
        

# ########################### Bandit problem ###########################
# ### Problem Instance ###
d           = 40
items       = 1000
item_copies = 1
agents      = 10
rho         = 0.85  # 0: Max-min <= rho <= 1: Efficiency
runs        = 20

# Initializing the problem instance
np.random.seed(0)
save_data = False

# Varying agents
varying_agents(d, items, item_copies, rho, runs, save_data)

# # Varying dimensions
varying_dimensions(items, item_copies, agents, rho, runs, save_data)

# # Varying agents
varying_item_copies(d, items, agents, rho, runs, save_data)

# Varying agents with scaled regret
# varying_agents_scaled(d, items, item_copies, rho, runs, save_data)

# Varrying number of agents for all algorithms
# compare_all_algos_agents(d, items, item_copies, rho, runs, save_data)