import os

property_paths = ["./properties/property_1.py", "./properties/property_2.py", "./properties/property_3.py"]
steps = {"./properties/property_1.py" : 100,
         "./properties/property_2.py": 100,
         "./properties/property_3.py": 100}
random_action_probabilities = {"./properties/property_1.py" : [0.25],
                               "./properties/property_2.py" : [0.25],
                               "./properties/property_3.py" : [0.1]}

satisfaction_probabilities = {"./properties/property_1.py" : {"action_cond_safe" : {0.25 : 0.95},
                                                              "task_prod" : {0.25 : 0.95}},
                              "./properties/property_2.py" : {"action_cond_safe" : {0.25 : 0.85},
                                                              "task_prod" : {0.25 : 0.85}},
                              "./properties/property_3.py" : {"action_cond_safe" : {0.1 : 0.5},
                                                              "task_prod" : {0.1 : 0.5}},}

num_samples = {"./properties/property_1.py" : 4096,
               "./properties/property_2.py" : 8192,
               "./properties/property_3.py" : 1024}

device = {"exact": {"./properties/property_1.py" : {"task_prod" : "cpu", "action_cond_safe" : "cpu"},
                  "./properties/property_2.py": {"task_prod" : "gpu", "action_cond_safe" : "gpu"},
                  "./properties/property_3.py": {"task_prod" : "gpu", "action_cond_safe" : "gpu"}},
          "mc" : {"./properties/property_1.py" : {"task_prod" : "gpu", "action_cond_safe" : "gpu"},
                  "./properties/property_2.py": {"task_prod" : "gpu", "action_cond_safe" : "gpu"},
                  "./properties/property_3.py": {"task_prod" : "gpu", "action_cond_safe" : "gpu"}},
         }
shielding_type = ["task_prod"]

for seed in range(10):
    for i, property_path in enumerate(property_paths):
        for rand_act_prob in random_action_probabilities[property_path]:
            # first run q learning

            logdir = f"./logdir/property_{i+1}_{rand_act_prob}/q_learning_{seed}"
            template = "python train_q_learning.py --property {} --steps {} --random_action_probability {} --seed {} --logdir {}"
            if not os.path.isdir(logdir):
                os.system(template.format(
                    property_path,
                    steps[property_path],
                    rand_act_prob,
                    seed,
                    logdir))

            # then run modified q learning with cf

            cf = "--cf"

            logdir = f"./logdir/property_{i+1}_{rand_act_prob}/modified_q_learning_cf_{bool(cf)}_{seed}"
            template = "python train_modified_q_learning.py --property {} --steps {} {} --random_action_probability {} --seed {} --logdir {}"
            if not os.path.isdir(logdir):
                os.system(template.format(
                    property_path,
                    steps[property_path],
                    cf,
                    rand_act_prob,
                    seed,
                    logdir))

            # then run shielded q learning with mc model checking, approximate model with uninformative prior

            mc_type = "mc"
            app = "--approximate_model 1"
            prior_type = "uninformative"
            
            for sh_type in shielding_type:
                logdir = f"./logdir/property_{i+1}_{rand_act_prob}/{mc_type}_approx_{bool(app)}_prior_{prior_type}_{sh_type}_q_learning_{seed}"
                template = "python train_shielded_q_learning.py --property {} --steps {} --model_checking_type {} {} --sat_prob {} --prior_type {} --shielding_type {} --num_samples {} --random_action_probability {} --device_type {} --seed {} --logdir {}"
                if not os.path.isdir(logdir):
                    os.system(template.format(
                        property_path,
                        steps[property_path],
                        mc_type,
                        app,
                        satisfaction_probabilities[property_path][sh_type][rand_act_prob],
                        prior_type,
                        sh_type,
                        num_samples[property_path],
                        rand_act_prob,
                        device[mc_type][property_path][sh_type],
                        seed,
                        logdir))
