import logging
from typing import Any, List

from rllm.agents.agent import Action, Step, Trajectory
from rllm.agents.game_agents.base import BaseGameAgent, SYSTEM_PROMPT_TEMPLATE, USER_PROMPT_TEMPLATE

logger = logging.getLogger(__name__)


class TwentyFortyEightAgent(BaseGameAgent):
    """
    TwentyFortyEight Agent class
    """
    role = "You are a professional 2048 game agent. You are given the current 2048 board and must decide the next best move (up, down, left, or right) to maximize score and reach the highest possible tile. You will make one move per turn, planning strategically to maintain stability and avoid dead-ends."

    format_explanation = """Representation of the board:
- Each cell shows either a number (2, 4, 8, 16, 32, 64, ...) or '.' for an empty cell.
- Moves are only legal if they change the board (i.e., tiles slide or merge)."""

    action_space = """1. Move: move('up'), move('down'), move('left'), move('right')
- move 'up' means move the tiles up, 'down' means move the tiles down, 'left' means move the tiles left, 'right' means move the tiles right."""

    goal = "Make strategic moves to maximize your score and reach the highest possible tile while maintaining a stable, well-organized board."

    rule = """1. All tiles slide fully in the chosen direction (up, down, left, right).
2. Tiles with the same value merge into one tile with double the value — only once per move.
3. After each valid move, a new tile (2 or 4) appears randomly in an empty cell.
4. The game ends when there are no valid moves left."""

    output_requirements = """1. Thought (Long CoT):
Provide a detailed, step-by-step reasoning process explaining your thought process in solving the task.
2. Reason (Simple CoT):
Give a concise explanation summarizing the key logic behind your action.
3. Action: 
Choose exactly one action per turn; it must be legal.

### Output Format
You must generate your thought, reason and action in the following format:
<think>
[Your thought process in solving the task.]
</think>
REASON: [Your reason for the action]
ACTION: ```
[Your action]
```
"""

    def __init__(self, max_steps: int = 30, use_accumulate_thinking: bool = False, history_window: int | None = None, use_multi_turn_format: bool = True, additional_info_path: str = None):
        self._trajectory = Trajectory()
        self.messages = []
        self.step: int = 0
        self.use_accumulate_thinking = use_accumulate_thinking  # controlls whether to accumulate the thinking portion of the response
        self.max_steps = max_steps
        self.history_window = history_window
        self.use_multi_turn_format = use_multi_turn_format  # reasoning models have good performace with single-turn format
        self.additional_info_path = additional_info_path

        # state
        self.current_observation = None
        self.additional_info = None

        self.reset()

    def _make_system_prompt(self) -> str:
        role_text = self.role
        
        role_text += f"\n\n## Format Explanation\n{self.format_explanation}"

        if self.additional_info:
            role_text += f"\n{self.additional_info}"

        system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
            role=role_text,
            output_requirements=self.output_requirements,
        )
        return system_prompt
    
    def _make_init_user_prompt(self, task_info: dict) -> str:
        environment_info_str = f"""### Available Actions
{self.action_space}

### Goal
{self.goal}

### Rules
{self.rule}
"""
        user_prompt = INTRO_USER_PROMPT_TEMPLATE.format(
            environment_info=environment_info_str,
        )
        return user_prompt

    def _make_user_prompt(self, observation: Any) -> str:
        user_prompt = USER_PROMPT_TEMPLATE.format(
            current_observation=observation["observation"],
        )
        return user_prompt

    def _process_observation(self, observation: Any) -> str:
        """
        Process observation from environment.
        """
        if isinstance(observation, str):
            return {
                "observation": observation,
            }
        else:
            raise ValueError(f"Invalid observation type: {type(observation)}")