import sys
from stable_baselines3 import PPO

import pyRDDLGym
import json

from pyRDDLGym_rl.core.agent import StableBaselinesAgent
from pyRDDLGym_rl.core.env import SimplifiedActionRDDLEnv

from rddleval.scripts.rddlgym_eval import evaluate


def run(domain: str, instance: str, model_path: str, seed=0):
    # set up the environment
    env = pyRDDLGym.make(
        domain,
        instance,
        base_class=SimplifiedActionRDDLEnv,
        enforce_action_constraints=True,
    )

    model = PPO("MultiInputPolicy", env)
    model = model.load(model_path)

    # wrap the agent in a RDDL policy and evaluate
    ppo_agent = StableBaselinesAgent(model)
    instance_returns = evaluate(env, ppo_agent, episodes=100, seed=seed)

    env.close()
    return list(instance_returns)


def main():
    args = sys.argv[1:]

    base_path = args[1]

    instance_returns = [
        run(
            **{
                "domain": args[0],
                "instance": str(i),
                "model_path": base_path + f"/{args[0]}_{i}_ppo",
            }
        )
        for i in range(1, 11)
    ]

    data = {
        "batch_id": args[2],
        "run_id": args[1],
        "domain": args[0],
        "instance_returns": list(instance_returns),
        "seed": int(args[3]),
    }

    print(json.dumps(data))


if __name__ == "__main__":
    main()
