"""Core API for Environment"""

from __future__ import annotations

from typing import Generic, Any, SupportsFloat, TypeVar, TypedDict, Optional
from typing import Tuple, Dict

ObsType = TypeVar("ObsType")
ActType = TypeVar("ActType")


class EnvStepOutput(TypedDict):
    observations: ObsType
    reward: SupportsFloat
    done: bool
    metadata: Optional[Dict[str, Any]] = None


class Env(Generic[ObsType, ActType]):
    """
    The main SkyRL Gym class for implementing Reinforcement Learning Agents environments.

    The main API methods that users of this class need to know are:

    - `step` - Perform actions (e.g. tool calls) in the environment.
        Return the observations, the reward for taking that actions, and a boolean value `done`.

    - `init` - Initializes the environment to an initial state, required before calling step.
        Returns the first observations for a turn and information, i.e. metrics, debug info.

    - `close` - Closes the environment.
        Important when external software is used, i.e. pygame for rendering, databases
    """

    def step(self, action: ActType) -> EnvStepOutput:
        """
        Parse and run one step of action in the environment.

        Args:
            action (ActType): An action provided to the environment.
                For example, in our case, the action can be a [str] response generated by an LLM,
                which must be parsed and executed accordingly.

        Returns:
            observations (ObsType): The resulting observations after executing the action.
                For example, this could involve executing a SQL query derived from the LLM response
                and observing {'role': 'user', 'content': 'str(observations)'} output or any error messages from database.

            reward (SupportsFloat): The reward obtained by taking the action.

            done (bool): A boolean value for if the episode has ended, in which case further `step` calls will
                return undefined results.

            info (Dict): Contains auxiliary diagnostic information (helpful for debugging, learning, and logging).
                This might, for instance, contain: metrics that describe the performance state, variables that are
                hidden from observations, or individual reward terms that are combined to produce the total reward.
        """
        raise NotImplementedError

    def init(self, *kwargs) -> Tuple[ObsType, Dict[str, Any]]:
        """
        Initialize the environment, returning initial observation and optional metadata.

        Returns:
            observations (ObsType): Observations of the initial state. This is analogous to the observations returned by `step`.
            info (Dict): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to
                the ``info`` returned by `step`.
        """
        raise NotImplementedError

    def close(self):
        """
        After the user has finished using the environment, close contains the code necessary to "clean up" the environment.

        This is critical for closing rendering windows, database or HTTP connections.
        Calling ``close`` on an already closed environment has no effect and won't raise an error.
        """
        pass

    def __str__(self):
        """
        Returns a string of the environment.

        Returns:
            A string identifying the environment
        """
        return f"Env({type(self).__name__})"

    def __enter__(self):
        """Support with-statement for the environment."""
        return self

    def __exit__(self, *args: Any):
        """Support with-statement for the environment and closes the environment."""
        self.close()
        # propagate exception
        return False
