import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def save_graph():
    print("============================================================================================")
    # env_name = 'CartPole-v1'
    # env_name = 'Acrobot-v1'
    # env_name = 'LunarLander-v2'
    # env_name = 'Humanoid-v2'  # 4, 10, 26, 0
    env_name = 'Hopper-v2'    # 13, 27, 164, 0
    # env_name = 'HalfCheetah-v2'  # 0, 0, 6, 0
    # env_name = 'Walker2d-v2'   # 4, 5, 4, 18
    # env_name = 'Walker2d-v3'
    # env_name = 'Swimmer-v3'
    # env_name = 'ALE-Atlantis-v5'
    # env_name = 'Atlantis-v4'
    # env_name = 'RoboschoolWalker2d-v1'

    fig_num = 4  #### change this to prevent overwriting figures in same env_name folder
    fig_width = 10
    fig_height = 8

    # smooth out rewards to get a smooth and a less smooth (var) plot lines
    window_len_smooth = 50 # 20
    min_window_len_smooth = 1
    linewidth_smooth = 1.5
    alpha_smooth = 1

    window_len_var = 5
    min_window_len_var = 1
    linewidth_var = 5
    alpha_var = 0.25

    colors = ['red', 'blue', 'green', 'orange', 'purple', 'olive', 'brown', 'magenta', 'cyan', 'crimson', 'gray',
              'black']

    linestyles = ['solid', 'dotted', 'dashed', 'dashdot']

    # make directory for saving figures
    figures_dir = "./PPO_figs"
    if not os.path.exists(figures_dir):
        os.makedirs(figures_dir)

    # make environment directory for saving figures
    figures_dir = figures_dir + '/' + env_name
    if not os.path.exists(figures_dir):
        os.makedirs(figures_dir)

    fig_save_path = figures_dir + '/PPO_' + env_name + '_fig_' + str(fig_num) + '.pdf'

    # get number of log files in directory
    log_dir = "./PPO_logs" + '/' + env_name + '/'

    current_num_files = next(os.walk(log_dir))[2]
    num_runs = len(current_num_files)

    # read all results

    # ppo
    log_f_name = log_dir + '/PPO_ppo_' + env_name + "_log_" + str(13) + ".csv"
    print("loading data from : " + log_f_name)
    data_ppo = pd.read_csv(log_f_name)
    data_ppo = pd.DataFrame(data_ppo)
    print("data_ppo shape : ", data_ppo.shape)
    print("--------------------------------------------------------------------------------------------")

    # bkppo
    log_f_name = log_dir + '/PPO_bkppo_' + env_name + "_log_" + str(27) + ".csv"
    print("loading data from : " + log_f_name)
    data_bkppo = pd.read_csv(log_f_name)
    data_bkppo = pd.DataFrame(data_bkppo)
    print("data_bkppo shape : ", data_bkppo.shape)
    print("--------------------------------------------------------------------------------------------")

    # fwvpo
    log_f_name = log_dir + '/PPO_fwppo_' + env_name + "_log_" + str(164) + ".csv"
    print("loading data from : " + log_f_name)
    data_fwppo = pd.read_csv(log_f_name)
    data_fwppo = pd.DataFrame(data_fwppo)
    print("data_fwppo shape : ", data_fwppo.shape)
    print("--------------------------------------------------------------------------------------------")

    # # fkppo
    log_f_name = log_dir + '/PPO_fkppo_' + env_name + "_log_" + str(0) + ".csv"
    print("loading data from : " + log_f_name)
    data_fkppo = pd.read_csv(log_f_name)
    data_fkppo = pd.DataFrame(data_fkppo)
    print("data_fkppo shape : ", data_fkppo.shape)
    print("--------------------------------------------------------------------------------------------")

    ax = plt.gca()


    # ppo
    # smooth out rewards to get a smooth and a less smooth (var) plot lines
    data_ppo['reward_smooth_'] = data_ppo['reward'].rolling(window=window_len_smooth, win_type='triang',
                                                                 min_periods=min_window_len_smooth).mean()
    data_ppo['reward_var_'] = data_ppo['reward'].rolling(window=window_len_var, win_type='triang',
                                                        min_periods=min_window_len_var).std()
    # plot the lines
    data_ppo.plot(kind='line', x='timestep', y='reward_smooth_', ax=ax, color='red',
             linewidth=linewidth_smooth, alpha=alpha_smooth, linestyle=linestyles[0], label='PPO')
    plt.fill_between(data_ppo['timestep'], (data_ppo['reward_smooth_'] + 0.5 * data_ppo['reward_var_']),
                     (data_ppo['reward_smooth_'] - 0.5 * data_ppo['reward_var_']),
                     facecolor='red', alpha=alpha_var)

    # bkppo
    # smooth out rewards to get a smooth and a less smooth (var) plot lines
    data_bkppo['reward_smooth_'] = data_bkppo['reward'].rolling(window=window_len_smooth, win_type='triang',
                                                            min_periods=min_window_len_smooth).mean()
    data_bkppo['reward_var_'] = data_bkppo['reward'].rolling(window=window_len_var, win_type='triang',
                                                         min_periods=min_window_len_var).std()
    # plot the lines
    data_bkppo.plot(kind='line', x='timestep', y='reward_smooth_', ax=ax, color='orange',
                  linewidth=linewidth_smooth, alpha=alpha_smooth, linestyle=linestyles[0], label='BKPPO')
    plt.fill_between(data_bkppo['timestep'], (data_bkppo['reward_smooth_'] + 0.5 * data_bkppo['reward_var_']),
                     (data_bkppo['reward_smooth_'] - 0.5 * data_bkppo['reward_var_']),
                     facecolor='orange', alpha=alpha_var)

    # fkpo
    # smooth out rewards to get a smooth and a less smooth (var) plot lines
    data_fkppo['reward_smooth_'] = data_fkppo['reward'].rolling(window=window_len_smooth, win_type='triang',
                                                                min_periods=min_window_len_smooth).mean()
    data_fkppo['reward_var_'] = data_fkppo['reward'].rolling(window=window_len_smooth, win_type='triang',
                                                             min_periods=min_window_len_smooth).std()
    # plot the lines
    data_fkppo.plot(kind='line', x='timestep', y='reward_smooth_', ax=ax, color='green',
                    linewidth=linewidth_smooth, alpha=alpha_smooth, linestyle=linestyles[0], label='FKVPO')
    plt.fill_between(data_fkppo['timestep'], (data_fkppo['reward_smooth_'] + 0.5 * data_fkppo['reward_var_']),
                     (data_fkppo['reward_smooth_'] - 0.5 * data_fkppo['reward_var_']),
                     facecolor='green', alpha=alpha_var)

    # fwvpo
    # smooth out rewards to get a smooth and a less smooth (var) plot lines
    data_fwppo['reward_smooth_'] = data_fwppo['reward'].rolling(window=window_len_smooth, win_type='triang',
                                                                min_periods=min_window_len_smooth).mean()
    data_fwppo['reward_var_'] = data_fwppo['reward'].rolling(window=window_len_smooth, win_type='triang',
                                                             min_periods=min_window_len_smooth).std()
    # plot the lines
    data_fwppo.plot(kind='line', x='timestep', y='reward_smooth_', ax=ax, color='blue',
                    linewidth=linewidth_smooth, alpha=alpha_smooth, linestyle=linestyles[0], label='FWVPO')
    plt.fill_between(data_fwppo['timestep'], (data_fwppo['reward_smooth_'] + 0.5 * data_fwppo['reward_var_']),
                     (data_fwppo['reward_smooth_'] - 0.5 * data_fwppo['reward_var_']),
                     facecolor='blue', alpha=alpha_var)

    # keep alternate elements (reward_smooth_i) in the legend
    handles, labels = ax.get_legend_handles_labels()
    new_handles = []
    new_labels = []
    for i in range(len(handles)):
        # if (i % 2 == 0):
        new_handles.append(handles[i])
        new_labels.append(labels[i])
    ax.legend(new_handles, new_labels, loc=2, fontsize=20)

    # ax.set_yticks(np.arange(0, 1800, 200))
    # ax.set_xticks(np.arange(0, int(4e6), int(5e5)))
    plt.xlim([0, 2e6])
    # plt.ylim([50, 450])

    ax.grid(color='gray', linestyle='-', linewidth=1, alpha=0.2)

    ax.set_xlabel("Timesteps", fontsize=40)
    ax.set_ylabel("Rewards", fontsize=40)

    plt.title(env_name, fontsize=40)

    fig = plt.gcf()
    fig.set_size_inches(fig_width, fig_height)

    print("============================================================================================")
    plt.savefig(fig_save_path, bbox_inches='tight', pad_inches=0)
    plt.savefig(fig_save_path)
    print("figure saved at : ", fig_save_path)
    print("============================================================================================")

    plt.show()


if __name__ == '__main__':
    save_graph()

















