from cliff_walking import *
parser = argparse.ArgumentParser()
parser.add_argument('--setting', type=int, default=0)
args = parser.parse_args()

date = datetime.date.today()

# env1: time_in_state=True, wind_only_affect_right_action = False,
# env2: time_in_state=False, wind_only_affect_right_action = True,

time_limit = 30
n_episode = int(1e4)
n_seed = 1
world_size_list = [
[8, 6],
]
wind_prob_list = [
    0.2,
                  ]
log_interval = 100
uniform_start = True

for world_size in world_size_list:
    world_width, world_height = world_size
    for wind_prob in wind_prob_list:
        for main_env_setting in (0, 1):
            if main_env_setting == 0:
                time_in_state = False
                wind_only_affect_right_action = True
            else:
                time_in_state = True
                wind_only_affect_right_action = False

            # first obtain optimal values
            _, optimal_q_val_map, _ = value_iteration(world_width, world_height, wind_prob, time_limit,
                                                      wind_only_affect_right_action, time_in_state)
            for multi_update in (True, False):
                for seed in range(n_seed):
                    # when we have all the settings, we get the q diff to optimal q values and the performance score values
                    avg_diff, score, q_update_diff = PerformMonteCarloES(world_width, world_height, wind_prob, time_limit,
                                        n_episode, wind_only_affect_right_action, time_in_state,
                                        multi_update, uniform_start, seed, optimal_q_val_map,
                                        visualize=False, log_interval=log_interval, eval_interval=log_interval, n_eval=10)
                    datafile_name_suffix = '_W%d_H%d_TL%d_WP%s_TinS%s_M%s_seed%d_%s.csv' % (world_width, world_height, time_limit,
                                                                                           str(wind_prob), time_in_state, multi_update, seed, date)

                    avg_diff, score, q_update_diff = np.array(avg_diff), np.array(score), np.array(q_update_diff)

                    data_to_save_list = [avg_diff, score, q_update_diff]
                    datafile_name_prefix_list = ['QOptDiff', 'Score', 'QUpDiff']

                    for z in range(3):
                        save_path = datafile_name_prefix_list[z] + datafile_name_suffix
                        save_path = os.path.join('../data', save_path)
                        data_to_save_list[z].tofile(save_path, sep=',')
