from abc import ABCMeta, abstractmethod
import numpy as np
import random
import gymnasium as gym
import shutil
from typing import Any, Dict, Optional
import sys
from pyRDDLGym.core.policy import BaseAgent
from pyRDDLGym.core.env import RDDLEnv
from pyRDDLGym.core.debug.exception import RDDLRandPolicyVecNotImplemented


def evaluate(
    env: RDDLEnv,
    agent: BaseAgent,
    episodes: int = 1,
    seed: int = 0,
) -> Dict[str, float]:
    # start simulation
    history = np.zeros((episodes,))
    for episode in range(episodes):
        # restart episode
        total_reward = 0.0
        state, _ = env.reset(seed=seed + episode)

        # simulate to end of horizon
        for step in range(env.horizon):
            # take a step in the environment
            action = agent.sample_action(state)
            next_state, reward, terminated, truncated, _ = env.step(action)
            total_reward += reward
            done = terminated or truncated

            state = next_state
            if done:
                break

        history[episode] = total_reward

    # summary statistics
    return history
