import logging
from typing import Any, List

from rllm.agents.agent import Action, Step, Trajectory
from rllm.agents.game_agents.base import BaseGameAgent, SYSTEM_PROMPT_TEMPLATE, USER_PROMPT_TEMPLATE

logger = logging.getLogger(__name__)


class FrozenLakeAgent(BaseGameAgent):
    """
    FrozenLake Agent class
    """
    role = "You are a professional FrozenLake agent. You are given the current FrozenLake grid and must decide the next best move (up, down, left, or right) to reach the goal (G) while avoiding holes (O). Choose exactly one move per turn, planning routes that minimize steps and risk from slippery tiles."

    format_explanation = """Representation of the board:
- Symbols: '_' = Frozen, 'O' = Hole, 'G' = Goal, 'P' = Player.
- The player reaches the goal when P and G overlap (i.e., the player moves onto G).
- Moves can be attempted at any time; at a boundary the player stays in place.
- On slippery ice, the intended move may deviate to a perpendicular direction."""

    action_space = """"1. Move: move('up'), move('down'), move('left'), move('right')
- Each move attempts to shift the player one cell in the chosen direction.
- Due to slipperiness, actual motion may deviate perpendicular to the intended direction."""

    goal = "Reach the goal (G) in the minimum number of steps while avoiding holes (O) and accounting for slippery tiles."

    rule = """1. The grid consists of Frozen tiles ('_'), Holes ('O'), a single Goal ('G'), and a Player ('P').
2. On each action, the player attempts to move one cell in the chosen direction; on slippery ice, the move may deviate perpendicularly.
3. Moves that would leave the grid keep the player in place.
4. The game ends immediately upon reaching 'G' or falling into 'O'."""

    output_requirements = """1. Thought (Long CoT):
Provide a detailed, step-by-step reasoning process explaining your thought process in solving the task.
2. Reason (Simple CoT):
Give a concise explanation summarizing the key logic behind your action.
3. Action: 
Choose exactly one action per turn; it must be legal.

### Output Format
You must generate your thought, reason and action in the following format:
<think>
[Your thought process in solving the task.]
</think>
REASON: [Your reason for the action]
ACTION: ```
[Your action]
```
"""

    def __init__(self, max_steps: int = 30, use_accumulate_thinking: bool = False, history_window: int | None = None, use_multi_turn_format: bool = True, additional_info_path: str = None):
        self._trajectory = Trajectory()
        self.messages = []
        self.step: int = 0
        self.use_accumulate_thinking = use_accumulate_thinking  # controlls whether to accumulate the thinking portion of the response
        self.max_steps = max_steps
        self.history_window = history_window
        self.use_multi_turn_format = use_multi_turn_format  # reasoning models have good performace with single-turn format
        self.additional_info_path = additional_info_path

        # state
        self.current_observation = None
        self.additional_info = None

        self.reset()

    def _make_system_prompt(self) -> str:
        role_text = self.role
        
        role_text += f"\n\n## Format Explanation\n{self.format_explanation}"

        if self.additional_info:
            role_text += f"\n{self.additional_info}"

        system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
            role=role_text,
            output_requirements=self.output_requirements,
        )
        return system_prompt
    
    def _make_init_user_prompt(self, task_info: dict) -> str:
        environment_info_str = f"""### Available Actions
{self.action_space}

### Goal
{self.goal}

### Rules
{self.rule}
"""
        user_prompt = INTRO_USER_PROMPT_TEMPLATE.format(
            environment_info=environment_info_str,
        )
        return user_prompt

    def _make_user_prompt(self, observation: Any) -> str:
        user_prompt = USER_PROMPT_TEMPLATE.format(
            current_observation=observation["observation"],
        )
        return user_prompt

    def _process_observation(self, observation: Any) -> str:
        """
        Process observation from environment.
        """
        if isinstance(observation, str):
            return {
                "observation": observation,
            }
        else:
            raise ValueError(f"Invalid observation type: {type(observation)}")