import csv
import itertools

if __name__ == '__main__':
    run_name = "IndivQLearningSlugsTerminateOnCollision"

    map_names = ["Pentagon", "ISR", "MIT", "SUNY"]
    randomize_starts = [True, False]
    shields = [("slugs_centralized", "shields/general_shield", None),
               ("slugs_decentralized", "shields/general_shield", False),
               ("none", None, None)]
    grid_world_obs_types = ["FullObsDiscrete"]
    learner_anneal_eps = [(1.0, 0.05)]
    punish_unsafe_orig_actions = [(True, -10)]
    num_runs = 10

    with open(f"../../parallel_configs/{run_name}Train.csv", "w") as train_file, open(
            f"../../parallel_configs/{run_name}Eval.csv", "w") as eval_file:
        base_params = ["run_name", "shield", "shield_specification", "punish_unsafe_orig_action",
                       "punish_unsafe_orig_action_modifier", "shield_decentralized_random_agent_order",
                       "randomize_starts", "map_type",
                       "grid_world_map_name",
                       "grid_world_obs_type", "learner_type", "learner_anneal_eps_start",
                       "learner_anneal_eps_finish", "max_total_steps", "grid_world_terminate_on_collision",
                       "seed"]
        train_writer = csv.DictWriter(train_file, base_params + ["skip_evaluation"])
        train_writer.writeheader()

        eval_writer = csv.DictWriter(eval_file,
                                     base_params + ["skip_training", "evaluation_run_name", "evaluation_shield",
                                                    "evaluation_shield_specification",
                                                    "evaluation_shield_decentralized_random_agent_order",
                                                    "learner_evaluation_epsilon"])
        eval_writer.writeheader()

        for run_type_idx, (
                map_name, random_start, (shield, shield_specification, rand_agent_order), obs_type,
                (eps_anneal_start, eps_anneal_finish),
                (punish_unsafe_action, unsafe_action_rew_modifier)) in enumerate(
            itertools.product(map_names, randomize_starts, shields, grid_world_obs_types, learner_anneal_eps,
                              punish_unsafe_orig_actions)):

            for run_num_of_same_type in range(num_runs):
                global_run_idx = run_type_idx * num_runs + run_num_of_same_type

                concat_run_name = run_name + "/" + str(global_run_idx) + "_" + str(run_type_idx) + "_" + str(
                    run_num_of_same_type)

                base_param_values = {
                    "run_name": concat_run_name,
                    "shield": shield,
                    "shield_specification": shield_specification,
                    "shield_decentralized_random_agent_order": rand_agent_order,
                    "punish_unsafe_orig_action": punish_unsafe_action,
                    "punish_unsafe_orig_action_modifier": unsafe_action_rew_modifier,
                    "randomize_starts": random_start,
                    "map_type": "GridWorld",
                    "grid_world_map_name": map_name,
                    "grid_world_obs_type": obs_type,
                    "learner_type": "Individual_Q",
                    "learner_anneal_eps_start": eps_anneal_start,
                    "learner_anneal_eps_finish": eps_anneal_finish,
                    "grid_world_terminate_on_collision": True,
                    "max_total_steps": int(2.5e6),
                    "seed": run_num_of_same_type
                }

                train_dict = dict(base_param_values)
                train_dict["skip_evaluation"] = True

                train_writer.writerow(train_dict)

                for eval_shield, eval_shield_specification, eval_rand_agent_order in shields:
                    eval_dict = dict(base_param_values)
                    eval_dict.update({
                        "skip_training": True,
                        "evaluation_run_name": concat_run_name + "_" + eval_shield,
                        "evaluation_shield": eval_shield,
                        "evaluation_shield_specification": eval_shield_specification,
                        "evaluation_shield_decentralized_random_agent_order": eval_rand_agent_order,
                        "learner_evaluation_epsilon": eps_anneal_finish
                    })

                    eval_writer.writerow(eval_dict)
