import logging
from typing import Any, List

from rllm.agents.agent import Action, Step, Trajectory
from rllm.agents.game_agents.base import BaseGameAgent, SYSTEM_PROMPT_TEMPLATE, USER_PROMPT_TEMPLATE

logger = logging.getLogger(__name__)


class TetrisAgent(BaseGameAgent):
    """
    Tetris Agent class
    """
    role = "You are a professional Tetris game agent. You are given the current Tetris state and must decide the next best action to clear lines, keep the stack low and flat, and avoid creating holes. You will select one action per turn, planning rotations and horizontal alignment before final placement."

    format_explanation = """Representation of the board:
- The board is 10 columns × 20 rows.
- Use '.' for an empty cell.
- Non-zero values or letters (I, O, T, L, J, S, Z) indicate occupied cells.

Tetromino:
- I: line (4 in a row)
- O: 2×2 square
- T, L, J, S, Z: standard Tetris shapes with 4 rotation states (O has 1)"""

    action_space = """1. Move: move('left', count), move('right', count)
- move 'left' means move the piece to the left, 'right' means move the piece to the right.
- count ∈ {1..10}; apply the chosen action repeatedly for this many frames/steps.
2. Rotate: rotate('clockwise', count), rotate('counterclockwise', count)
- rotate 'clockwise' means rotate the piece clockwise, 'counterclockwise' means rotate the piece counterclockwise.
- count ∈ {1..3}; rotate the active piece 90 degrees per rotation, applied repeatedly for the specified number of frames or steps.
3. Drop: drop('soft') or drop('hard')
- soft: move the active piece down by one row per turn.
- hard: instantly drop the active piece to the lowest valid position (until it collides with the bottom or another piece)."""

    goal = "Maximize line clears and maintain a low, stable stack by strategically rotating, moving, and placing pieces to avoid holes and ensure long-term survivability."

    rule = """1. The board is 10 columns × 20 rows; a new piece spawns near the top with a default rotation.
2. Allowed actions move the active piece left/right/down or rotate it.
3. A line clears when all 10 cells in a row are filled; cleared lines disappear and above cells fall.
4. The game ends when a new piece cannot spawn due to stacked blocks reaching the top."""

    output_requirements = """1. Thought (Long CoT):
Provide a detailed, step-by-step reasoning process explaining your thought process in solving the task.
2. Reason (Simple CoT):
Give a concise explanation summarizing the key logic behind your action.
3. Action: 
Choose exactly one action per turn; it must be legal.

### Output Format
You must generate your thought, reason and action in the following format:
<think>
[Your thought process in solving the task.]
</think>
REASON: [Your reason for the action]
ACTION: ```
[Your action]
```
"""

    def __init__(self, max_steps: int = 30, use_accumulate_thinking: bool = False, history_window: int | None = None, use_multi_turn_format: bool = True, additional_info_path: str = None):
        self._trajectory = Trajectory()
        self.messages = []
        self.step: int = 0
        self.use_accumulate_thinking = use_accumulate_thinking  # controlls whether to accumulate the thinking portion of the response
        self.max_steps = max_steps
        self.history_window = history_window
        self.use_multi_turn_format = use_multi_turn_format  # reasoning models have good performace with single-turn format
        self.additional_info_path = additional_info_path

        # state
        self.current_observation = None
        self.additional_info = None

        self.reset()

    def _make_system_prompt(self) -> str:
        role_text = self.role
        
        role_text += f"\n\n## Format Explanation\n{self.format_explanation}"

        if self.additional_info:
            role_text += f"\n{self.additional_info}"

        system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
            role=role_text,
            output_requirements=self.output_requirements,
        )
        return system_prompt
    
    def _make_init_user_prompt(self, task_info: dict) -> str:
        environment_info_str = f"""### Available Actions
{self.action_space}

### Goal
{self.goal}

### Rules
{self.rule}
"""
        user_prompt = INTRO_USER_PROMPT_TEMPLATE.format(
            environment_info=environment_info_str,
        )
        return user_prompt

    def _make_user_prompt(self, observation: Any) -> str:
        user_prompt = USER_PROMPT_TEMPLATE.format(
            current_observation=observation["observation"],
        )
        return user_prompt

    def _process_observation(self, observation: Any) -> str:
        """
        Process observation from environment.
        """
        if isinstance(observation, str):
            return {
                "observation": observation,
            }
        else:
            raise ValueError(f"Invalid observation type: {type(observation)}")