import csv
import itertools

if __name__ == '__main__':
    run_name = "IndivQLearningParticle"

    randomize_starts = [True, False]
    shields = [("slugs_centralized", "shields/momentum_shield"), ("slugs_decentralized", "shields/momentum_shield"),
               ("none", None)]
    learner_anneal_eps = [(1.0, 0.05)]
    punish_unsafe_orig_actions = [(True, -10)]
    num_runs = 10
    agents_observe_momentums = [True, False]

    with open(f"../../parallel_configs/{run_name}Train.csv", "w") as train_file, open(
            f"../../parallel_configs/{run_name}Eval.csv", "w") as eval_file:
        base_params = ["run_name", "shield", "shield_specification", "punish_unsafe_orig_action",
                       "punish_unsafe_orig_action_modifier", "randomize_starts", "map_type",
                       "particle_agents_observe_momentum",
                       "particle_terminate_on_collision", "particle_world_size", "particle_collision_penalty",
                       "learner_type", "learner_anneal_eps_start",
                       "learner_anneal_eps_finish", "max_total_steps",
                       "seed"]
        train_writer = csv.DictWriter(train_file, base_params + ["skip_evaluation"])
        train_writer.writeheader()

        eval_writer = csv.DictWriter(eval_file,
                                     base_params + ["skip_training", "evaluation_run_name", "evaluation_shield",
                                                    "evaluation_shield_specification",
                                                    "learner_evaluation_epsilon"])
        eval_writer.writeheader()

        for run_type_idx, (
                random_start, (shield, shield_specification), agents_observe_momentum,
                (eps_anneal_start, eps_anneal_finish),
                (punish_unsafe_action, unsafe_action_rew_modifier)) in enumerate(
            itertools.product(randomize_starts, shields, agents_observe_momentums, learner_anneal_eps,
                              punish_unsafe_orig_actions)):

            for run_num_of_same_type in range(num_runs):
                global_run_idx = run_type_idx * num_runs + run_num_of_same_type

                concat_run_name = run_name + "/" + str(global_run_idx) + "_" + str(run_type_idx) + "_" + str(
                    run_num_of_same_type)

                base_param_values = {
                    "run_name": concat_run_name,
                    "shield": shield,
                    "shield_specification": shield_specification,
                    "punish_unsafe_orig_action": punish_unsafe_action,
                    "punish_unsafe_orig_action_modifier": unsafe_action_rew_modifier,
                    "randomize_starts": random_start,
                    "map_type": "ParticleMomentum",
                    "particle_world_size": 10,
                    "particle_collision_penalty": -30,
                    "particle_terminate_on_collision": False,
                    "particle_agents_observe_momentum": agents_observe_momentum,
                    "learner_type": "Individual_Q",
                    "learner_anneal_eps_start": eps_anneal_start,
                    "learner_anneal_eps_finish": eps_anneal_finish,
                    "max_total_steps": int(2.5e6),
                    "seed": run_num_of_same_type
                }

                train_dict = dict(base_param_values)
                train_dict["skip_evaluation"] = True

                train_writer.writerow(train_dict)

                for eval_shield, eval_shield_specification in shields:
                    eval_dict = dict(base_param_values)
                    eval_dict.update({
                        "skip_training": True,
                        "evaluation_run_name": concat_run_name + "_" + eval_shield,
                        "evaluation_shield": eval_shield,
                        "evaluation_shield_specification": eval_shield_specification,
                        "learner_evaluation_epsilon": eps_anneal_finish
                    })

                    eval_writer.writerow(eval_dict)
