####
#
# (c) Anonymous authors (2025)
#
# > Run experiments on benchmark tasks
#
####


import os
import subprocess
import multiprocessing
import tqdm

def run_experiment(env, algo, seed, args, silent, debug, warnings):
    cmd = ["python"]
    if warnings:
        cmd.append(warnings)
    if debug:
        cmd.extend(["-m", "ipdb", "-c", "continue"])

    args.extend(["--seed", str(seed)])
    #Call main_a2c.py
    cmd.extend(["./main_a2c.py", env, algo] + args)
    
    print(" ".join(cmd))
    with open(os.devnull, "w") as fnull:
        result = subprocess.run(cmd, stdout=None if not silent else fnull, stderr=None if not silent else fnull)
    
    if result.returncode == 0:
        print("SUCCESS")
    else:
        print("FAILED")

def run_experiment_wrapper(args):
    return run_experiment(*args)

def chunks(lst, n):
    """Create successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def main():
    os.environ["WANDB_MODE"] = "dryrun" # for online runs: "online"
    os.environ["WANDB_CONSOLE"] = "off"
    os.environ["WANDB_SILENT"] = "true"

    ### Optional settings 
    warnings = "-W ignore"  # Set empty string if warnings should be enabled
    silent = True
    debug = False
    
    # Check for verbose or debug mode
    import sys
    #if "-v" in sys.argv:
    #     silent = True
    #     sys.argv.remove("-v")
    print("running with standard output")
    #else:
    #    print("running without standard output")
    
    #if "--debug" in sys.argv:
    # debug = True
    # silent = False
    #     sys.argv.remove("--debug")
    # print("running with debugging")
    ###

    # Specify which environment to run.
    envs = [
        # "POMDP-heavenhell_3-episodic-v0",
        # "POMDP-shopping_5-episodic-v1",
        # "extra-car-flag-v0",
        # "extra-cleaner-v0",
        # "../gym-gridverse/yaml/gv_memory_four_rooms.7x7.yaml",
        "../gym-gridverse/yaml/gv_memory_four_rooms.9x9.yaml"
    ]

    # Specify which learning algorithms to run
    algos = ["a2c", "asym-a2c", "asym-a2c-state", "informed-asym-a2c"]

    # Set hyperparameters for the selected environment 
    args = [
        "--max-simulation-timesteps", "6250000",
        "--max-episode-timesteps", "100",
        "--simulation-num-episodes", "2",
        "--negentropy-value-from", "0.3",
        "--negentropy-value-to", "0.03",
        "--optim-lr-actor", "0.0003",
        "--optim-lr-critic", "0.001",
        "--gv-state-grid-model-type", "cnn",
        "--gv-observation-grid-model-type", "fc",
        "--gv-information-grid-model-type", "fc"
    ]    
    
    # Set seeds for run
    seeds = list(range(20))

    # Create run configurations
    tasks = [(env, algo, seed, args, silent, debug, warnings) for env in envs for algo in algos for seed in seeds]
    
    # Run experiments parallel in batches of size 'batch_size'
    batch_size = 40
    with tqdm.tqdm(total=len(tasks)) as pbar:
        for batch in chunks(tasks, batch_size):
            with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
                for _ in pool.imap_unordered(run_experiment_wrapper, batch):
                    pbar.update(1)

if __name__ == "__main__":
    main()
