import os

env_ids = ["pacman"]
property_paths = {"pacman" : ["./properties/pacman/property_1.py"],}

num_frames = {"pacman": 300000}

random_action_probabilities = {"pacman": 0.0}

cost_coeff = {"pacman": 10.0,}

episode_lengths = {"pacman": 100,}

discount_factors = {"pacman": 0.99,}

log_every = {"pacman":1000,}

satisfaction_probabilities = {"./properties/pacman/property_1.py" : 0.99,}

num_samples = {"./properties/pacman/property_1.py" : 1000}

shielding_type = ["action_cond_safe"]
model_checking_type = ["exact", "mc"]

for env_id in env_ids:
    for i, property_path in enumerate(property_paths[env_id]):
        for seed in range(10):
            logdir = f"./logdir/{env_id}/property_{i+1}/q_learning_{seed}"
            template = "python train_q_learning.py --property {} --num-frames {} --env {} --random-action-probability {} --episode-length {} --log-every {} --seed {} --logdir {} --df {} --lr 0.05"
            if not os.path.isdir(logdir):
                os.system(template.format(
                    property_path,
                    num_frames[env_id],
                    env_id,
                    random_action_probabilities[env_id],
                    episode_lengths[env_id],
                    log_every[env_id],
                    seed,
                    logdir,
                    discount_factors[env_id]
                ))

            logdir = f"./logdir/{env_id}/property_{i+1}/modified_q_learning_{seed}"
            template = "python train_modified_q_learning.py --property {} --num-frames {} --env {} --random-action-probability {} --episode-length {} --log-every {} --seed {} --logdir {} --cost-coeff {} --df {} --lr 0.05"
            if not os.path.isdir(logdir):
                os.system(template.format(
                    property_path,
                    num_frames[env_id],
                    env_id,
                    random_action_probabilities[env_id],
                    episode_lengths[env_id],
                    log_every[env_id],
                    seed,
                    logdir,
                    cost_coeff[env_id],
                    discount_factors[env_id]
                ))

            for sh_type in shielding_type:
                for mc_type in model_checking_type:
                    if mc_type == "exact":
                        pretrained = "--pretrained-backup"
                        approximate = "" 
                    if mc_type == "mc":
                        pretrained = ""
                        approximate = "--approximate-model"

                    logdir = f"./logdir/{env_id}/property_{i+1}/{mc_type}{'_approx' if bool(approximate) else ''}{'_pretrained' if bool(pretrained) else ''}_{sh_type}_q_learning_{seed}"
                    template = "python train_shielded_q_learning.py --property {} --num-frames {} --env {} --random-action-probability {} --episode-length {} --log-every {} --seed {} --logdir {} --model-checking-type {} {} {} --shielding-type {} --num-samples {} --sat-prob {} --device-type cpu --tp-df {} --tp-lr 0.05 --sp-lr 0.05 --sp-df 0.99 --safe-policy-mode exploit"
                    if not os.path.isdir(logdir):
                        os.system(template.format(
                            property_path,
                            num_frames[env_id],
                            env_id,
                            random_action_probabilities[env_id],
                            episode_lengths[env_id],
                            log_every[env_id],
                            seed,
                            logdir,
                            mc_type,
                            pretrained,
                            approximate,
                            sh_type,
                            num_samples[property_path],
                            satisfaction_probabilities[property_path],
                            discount_factors[env_id],
                        ))
