from textwrap import dedent
from rich import print as rprint

env_description = (
    "Flappy Bird is a simple but challenging side‑scrolling arcade game in which the player controls a bird that moves continuously to the right. "
    "In the actual implementation, the player's x position is fixed while the environment (pipes and background) continuously moves to the left."
    "The only control is to “flap” (making the bird ascend briefly) or do nothing, allowing gravity to pull it down. Vertical movement is automatic when no action is taken. "
    "The objective is to navigate the bird through gaps between vertically‑aligned pipes without colliding with them or the boundaries of the screen. "
    "Each successful pass through a pair of pipes increments the score by one. Colliding with a pipe, the ground, or ceiling ends the game."
)
eval_criteria = "mean_ep_game_score"
reward_func_args = [
    {"name": "last_pipe_x", "type_annotation": "float"},
    {"name": "last_top_pipe_y", "type_annotation": "float"},
    {"name": "last_bottom_pipe_y", "type_annotation": "float"},
    {"name": "next_pipe_x", "type_annotation": "float"},
    {"name": "next_top_pipe_y", "type_annotation": "float"},
    {"name": "next_bottom_pipe_y", "type_annotation": "float"},
    {"name": "next_next_pipe_x", "type_annotation": "float"},
    {"name": "next_next_top_pipe_y", "type_annotation": "float"},
    {"name": "next_next_bottom_pipe_y", "type_annotation": "float"},
    {"name": "player_y", "type_annotation": "float"},
    {"name": "player_y_velocity", "type_annotation": "float"},
    {"name": "player_rotation", "type_annotation": "float"},
    {"name": "player_x", "type_annotation": "float"},
    {"name": "player_width", "type_annotation": "float"},
    {"name": "player_height", "type_annotation": "float"},
    {"name": "screen_width", "type_annotation": "float"},
    {"name": "screen_height", "type_annotation": "float"},
]
reward_func_return_type = "float"
reward_func_definition = dedent("""\
    Args:
        - `last_pipe_x` (float): The horizontal position of the last pipe.
        - `last_top_pipe_y` (float): The vertical position of the last top pipe.
        - `last_bottom_pipe_y` (float): The vertical position of the last bottom pipe.
        - `next_pipe_x` (float): The horizontal position of the next pipe.
        - `next_top_pipe_y` (float): The vertical position of the next top pipe.
        - `next_bottom_pipe_y` (float): The vertical position of the next bottom pipe.
        - `next_next_pipe_x` (float): The horizontal position of the next next pipe.
        - `next_next_top_pipe_y` (float): The vertical position of the next next top pipe.
        - `next_next_bottom_pipe_y` (float): The vertical position of the next next bottom pipe.
        - `player_y` (float): The vertical position of the player.
        - `player_y_velocity` (float): The vertical velocity of the player.
        - `player_rotation` (float): The rotation of the player.
        - `player_x` (float): The horizontal position of the player.
        - `player_width` (float): The width of the player.
        - `player_height` (float): The height of the player.
        - `screen_width` (float): The width of the screen.
        - `screen_height` (float): The height of the screen.

    NOTE: All the above arguments are un-normalized. The observation that the policy network will receive is normalized.
          The normalization is done as follows:
            - for all x values, normalized_x = x / screen_width
            - for all y values, normalized_y = y / screen_height
            - `player_y_velocity` is normalized as `player_y_velocity` /= PLAYER_MAX_VEL_Y, where PLAYER_MAX_VEL_Y=10.
            - `player_rotation` is normalized as `player_rotation` /= 90.
          This note is only for your information. You do not need to compute normalized values as it will be done automatically. 

    Returns:
        You need to return the reward signal for the current step.
""")

reward_func_return_type_baseline = "Tuple[float, Dict[str, float]]"
reward_func_definition_baseline = dedent("""\
    Args:
        - `last_pipe_x` (float): The horizontal position of the last pipe.
        - `last_top_pipe_y` (float): The vertical position of the last top pipe.
        - `last_bottom_pipe_y` (float): The vertical position of the last bottom pipe.
        - `next_pipe_x` (float): The horizontal position of the next pipe.
        - `next_top_pipe_y` (float): The vertical position of the next top pipe.
        - `next_bottom_pipe_y` (float): The vertical position of the next bottom pipe.
        - `next_next_pipe_x` (float): The horizontal position of the next next pipe.
        - `next_next_top_pipe_y` (float): The vertical position of the next next top pipe.
        - `next_next_bottom_pipe_y` (float): The vertical position of the next next bottom pipe.
        - `player_y` (float): The vertical position of the player.
        - `player_y_velocity` (float): The vertical velocity of the player.
        - `player_rotation` (float): The rotation of the player.
        - `player_x` (float): The horizontal position of the player.
        - `player_width` (float): The width of the player.
        - `player_height` (float): The height of the player.
        - `screen_width` (float): The width of the screen.
        - `screen_height` (float): The height of the screen.

    NOTE: All the above arguments are un-normalized. The observation that the policy network will receive is normalized.
          The normalization is done as follows:
            - for all x values, normalized_x = x / screen_width
            - for all y values, normalized_y = y / screen_height
            - `player_y_velocity` is normalized as `player_y_velocity` /= PLAYER_MAX_VEL_Y, where PLAYER_MAX_VEL_Y=10.
            - `player_rotation` is normalized as `player_rotation` /= 90.
          This note is only for your information. You do not need to compute normalized values as it will be done automatically. 

    Returns (Tuple[float, Dict\[str, float]]):
        1. return the reward signal for the current step.
        2. return a dictionary of each individual reward component for the current step.
""")

if __name__ == "__main__":
    print(env_description)