import logging
from typing import Any, List

from rllm.agents.agent import Action, Step, Trajectory
from rllm.agents.game_agents.base import BaseGameAgent, SYSTEM_PROMPT_TEMPLATE, USER_PROMPT_TEMPLATE, INTRO_USER_PROMPT_TEMPLATE

logger = logging.getLogger(__name__)

one_move_action_space = """1. Move: ```move(vehicle_name, direction)```
- Move a vehicle from one cell to another.
- vehicle_name: name of the vehicle to move (e.g. 'A')
- direction: direction of movement ('up', 'down', 'left', 'right')
- This action is valid only if the vehicle can move one cell in the given direction."""


class RushHourAgent(BaseGameAgent):
    """
    Rush Hour Agent class
    """
    role = "You are a professional Rush Hour solver. You are given a Rush Hour board and you need to solve it."

    format_explanation = """Representation of the board:
The board is given as a rectangular grid of characters.
Each non-'x' character represents part of a vehicle.
Identical letters belong to the same vehicle and occupy contiguous cells either horizontally or vertically.
- 'x' denotes a wall (immovable and impassable).
- '.' denotes an empty cell.
- Each uppercase letter (A, B, C, ...) denotes a car.
- The special car AA is the target car that must reach the exit.
- The exit is located on the right boundary of the row containing AA.
- The puzzle is solved when AA occupies the exit cell."""

    action_space = """1. Move: ```move(vehicle_name, direction, num_moves)```
- Move a vehicle from one cell to another.
- vehicle_name: name of the vehicle to move (e.g. 'A', 'B', 'C', ...)
- direction: direction of movement ('up', 'down', 'left', 'right')
- num_moves: number of cells to move (default: 1)"""

    goal = """Move the target vehicle to the rightmost column based on the rules."""

    rules = """- Each vehicle can move only along its orientation (horizontal or vertical).
- Vehicles may only move into empty cells ('.') and may not pass through other vehicles or walls ('x').
- A move consists of sliding a single vehicle along its allowed direction by one or more cells, as long as the path is unobstructed.
- The length and orientation of each vehicle are fixed.
- The puzzle is solved when the rightmost cell of the target car AA reaches the exit on the right boundary of the board."""

    output_requirements = """1. Thought:
Provide a detailed, step-by-step reasoning process explaining your thought process in solving the task.
2. Reason:
Give a concise explanation summarizing the key logic behind your action.
3. Action:
Generate only one action at a time based on the reasoning process.

### Output Format
You must generate your thought, reason and action in the following format:
<think>
[Your thought process in solving the task.]
</think>
REASON: [Your reason for the action]
ACTION: ```
[Your action]
```
"""
    def __init__(self, max_steps: int = 30, use_accumulate_thinking: bool = False, history_window: int | None = None, use_multi_turn_format: bool = True, additional_info_path: str = None, board_format: str = "base"):
        self._trajectory = Trajectory()
        self.messages = []
        self.step: int = 0
        self.use_accumulate_thinking = use_accumulate_thinking  # controlls whether to accumulate the thinking portion of the response
        self.max_steps = max_steps
        self.history_window = history_window
        self.use_multi_turn_format = use_multi_turn_format  # reasoning models have good performace with single-turn format
        self.additional_info_path = additional_info_path

        # state
        self.current_observation = None
        self.additional_info = None

        self.reset()

    def _make_system_prompt(self, task_info: dict) -> str:
        role_text = self.role + f"\n\n## Format Explanation\n{self.format_explanation}\n"

        system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
            role=role_text,
            output_requirements=self.output_requirements,
        )
        return system_prompt
    
    def _make_init_user_prompt(self, task_info: dict) -> str:
        if task_info.get("add_info", "None") == "only_one_move":
            action_space = one_move_action_space
        else:
            action_space = self.action_space
        environment_info_str = f"""### Available Actions
{action_space}

### Goal
{self.goal}

### Rules
{self.rules}
"""
        user_prompt = INTRO_USER_PROMPT_TEMPLATE.format(
            environment_info=environment_info_str,
        )
        return user_prompt

    def _make_user_prompt(self, observation: Any) -> str:
        user_prompt = USER_PROMPT_TEMPLATE.format(
            current_observation=observation["observation"],
        )
        return user_prompt

    def _process_observation(self, observation: Any) -> str:
        """
        Process observation from environment.
        """
        if isinstance(observation, dict):
            observation_str = observation.get("board_string", "")
            env_message = observation.get("env_message", None)
            if env_message:
                observation_str += f"\n\n- Error message from environment:\n{env_message}"
            return {
                "observation": observation_str,
                "observation_info": {
                    "current_turn": observation.get("current_turn", ""),
                    "max_turns": observation.get("max_turns", ""),
                    "progress": observation.get("progress", ""),
                }
            }
        elif isinstance(observation, str):
            return {
                "observation": observation,
            }
        else:
            raise ValueError(f"Invalid observation type: {type(observation)}")