from textwrap import dedent
from rich import print as rprint

env_description = (
    "The Humanoid environment from Gymnasium's MuJoCo environments provides a 3D bipedal robot simulation "
    "designed to mimic human locomotion. The agent controls a humanoid robot with a torso (abdomen), "
    "a pair of legs and arms, and tendons connecting the hips to the knees. Each leg consists of three body parts "
    "(thigh, shin, foot), and each arm consists of two body parts (upper arm, forearm). The agent receives "
    "continuous observations representing joint positions, velocities, center of mass information, inertial data, "
    "and external forces. The action space is continuous, allowing the agent to apply torques at 17 different "
    "hinge joints. The primary objective is to prevent the humanoid from falling while moving forward as much as possible."
)

eval_criteria = "mean_ep_score"

reward_func_args = [
    {"name": "score_info", "type_annotation": "dict"},
    {"name": "action", "type_annotation": "np.ndarray"},
    {"name": "prev_action", "type_annotation": "np.ndarray"},
    {"name": "x_coord", "type_annotation": "float"},
    {"name": "y_coord", "type_annotation": "float"},
    {"name": "prev_x_coord", "type_annotation": "float"},
    {"name": "prev_y_coord", "type_annotation": "float"},
    {"name": "distance_from_origin", "type_annotation": "float"},
    {"name": "prev_distance_from_origin", "type_annotation": "float"},
    {"name": "healthy_z_range", "type_annotation": "Tuple"},
    {"name": "obs", "type_annotation": "np.ndarray"},
    {"name": "prev_obs", "type_annotation": "np.ndarray"},
    {"name": "cfrc_ext", "type_annotation": "dict"},
    {"name": "prev_cfrc_ext", "type_annotation": "dict"},
    {"name": "terminated", "type_annotation": "bool"},
    {"name": "truncated", "type_annotation": "bool"},
]

include_cfrc_ext = dedent("""\
- `cfrc_ext` (dict): Dictionary containing the external contact forces on the body parts. Each body part is a vector that specifies force x,y,z and torque x,y,z.
  · `torso` (np.ndarray): External contact forces on the torso.
  · `lwaist` (np.ndarray): External contact forces on the lwaist.
  · `pelvis` (np.ndarray): External contact forces on the pelvis.
  · `right_thigh` (np.ndarray): External contact forces on the right thigh.
  · `right_shin` (np.ndarray): External contact forces on the right shin.
  · `right_foot` (np.ndarray): External contact forces on the right foot.
  · `left_thigh` (np.ndarray): External contact forces on the left thigh.
  · `left_shin` (np.ndarray): External contact forces on the left shin.
  · `left_foot` (np.ndarray): External contact forces on the left foot.
  · `right_upper_arm` (np.ndarray): External contact forces on the right upper arm.
  · `right_lower_arm` (np.ndarray): External contact forces on the right lower arm.
  · `left_upper_arm` (np.ndarray): External contact forces on the left upper arm.
  · `left_lower_arm` (np.ndarray): External contact forces on the left lower arm.
- `prev_cfrc_ext` (dict): Dictionary containing the external contact forces on the body parts on the previous step.
""")

reward_func_return_type = "float"
reward_func_definition = dedent("""\
    Objective: Obtain the highest score possible, where the exact score composition is defined in the `score_info` dictionary.

    Args:
      - `score_info` (dict): Dictionary containing the score components:
        · `healthy_reward` (float): A reward is given if the Humanoid is alive (Humanoid is alive if the z-coordinate of the torso (the height) is in the closed interval given by the healthy_z_range).
        · `forward_reward` (float): A reward for moving forward, this reward would be positive if the Humanoid moves forward (in the positive x direction / in the right direction).
        · `ctrl_cost` (float): A negative reward to penalize the Humanoid for taking actions that are too large.
        · `contact_cost` (float): A negative reward to penalize the Humanoid if the external contact forces are too large.
      - `action` (np.ndarray): Action vector of shape (17,) containing torques applied to each joint with values constrained to [-0.4, 0.4]. Actions correspond to: 
        0: Torque applied on the hinge in the y-coordinate of the abdomen (N m)
        1: Torque applied on the hinge in the z-coordinate of the abdomen (N m)
        2: Torque applied on the hinge in the x-coordinate of the abdomen (N m)
        3: Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) (N m)
        4: Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) (N m)
        5: Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) (N m)
        6: Torque applied on the rotor between the right hip/thigh and the right shin (N m)
        7: Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) (N m)
        8: Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) (N m)
        9: Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) (N m)
        10: Torque applied on the rotor between the left hip/thigh and the left shin (N m)
        11: Torque applied on the rotor between the torso and right upper arm (coordinate -1) (N m)
        12: Torque applied on the rotor between the torso and right upper arm (coordinate -2) (N m)
        13: Torque applied on the rotor between the right upper arm and right lower arm (N m)
        14: Torque applied on the rotor between the torso and left upper arm (coordinate -1) (N m)
        15: Torque applied on the rotor between the torso and left upper arm (coordinate -2) (N m)
        16: Torque applied on the rotor between the left upper arm and left lower arm (N m)
      - `prev_action` (np.ndarray): Action vector of shape (17,) containing actions on the previous step.
      - `x_coord` (float): The x-coordinate of the torso.
      - `y_coord` (float): The y-coordinate of the torso.
      - `prev_x_coord` (float): The x-coordinate of the torso on the previous step.
      - `prev_y_coord` (float): The y-coordinate of the torso on the previous step.
      - `distance_from_origin` (float): The distance from the origin
      - `prev_distance_from_origin` (float): The distance from the origin on the previous step.
      - `healthy_z_range` (tuple of 2 floats): The closed interval of the height that the Humanoid is considered alive.
      - `obs` (np.ndarray): Observation vector of shape (45, ), containing position and velocity information:
        0: z-coordinate of the torso (center) (m)
        1: w-orientation of the torso (center) (rad)
        2: x-orientation of the torso (center) (rad)
        3: y-orientation of the torso (center) (rad)
        4: z-orientation of the torso (center) (rad)
        5: z-angle of the abdomen (in lower_waist) (rad)
        6: y-angle of the abdomen (in lower_waist) (rad)
        7: x-angle of the abdomen (in pelvis) (rad)
        8: x-coordinate of angle between pelvis and right hip (in right_thigh) (rad)
        9: z-coordinate of angle between pelvis and right hip (in right_thigh) (rad)
        10: y-coordinate of angle between pelvis and right hip (in right_thigh) (rad)
        11: angle between right hip and the right shin (in right_knee) (rad)
        12: x-coordinate of angle between pelvis and left hip (in left_thigh) (rad)
        13: z-coordinate of angle between pelvis and left hip (in left_thigh) (rad)
        14: y-coordinate of angle between pelvis and left hip (in left_thigh) (rad)
        15: angle between left hip and the left shin (in left_knee) (rad)
        16: coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) (rad)
        17: coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) (rad)
        18: angle between right upper arm and right_lower_arm (rad)
        19: coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) (rad)
        20: coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) (rad)
        21: angle between left upper arm and left_lower_arm (rad)
        22: x-coordinate velocity of the torso (centre) (m/s)
        23: y-coordinate velocity of the torso (centre) (m/s)
        24: z-coordinate velocity of the torso (centre) (m/s)
        25: x-coordinate angular velocity of the torso (centre) (rad/s)
        26: y-coordinate angular velocity of the torso (centre) (rad/s)
        27: z-coordinate angular velocity of the torso (centre) (rad/s)
        28: z-coordinate of angular velocity of the abdomen (in lower_waist) (rad/s)
        29: y-coordinate of angular velocity of the abdomen (in lower_waist) (rad/s)
        30: x-coordinate of angular velocity of the abdomen (in pelvis) (rad/s)
        31: x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) (rad/s)
        32: z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) (rad/s)
        33: y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) (rad/s)
        34: angular velocity of the angle between right hip and the right shin (in right_knee) (rad/s)
        35: x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) (rad/s)
        36: z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) (rad/s)
        37: y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) (rad/s)
        38: angular velocity of the angle between left hip and the left shin (in left_knee) (rad/s)
        39: coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) (rad/s)
        40: coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) (rad/s)
        41: angular velocity of the angle between right upper arm and right_lower_arm (rad/s)
        42: coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) (rad/s)
        43: coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) (rad/s)
        44: angular velocity of the angle between left upper arm and left_lower_arm (rad/s)
      - `prev_obs` (np.ndarray): Observation vector of shape (45, ), containing position and velocity information on the previous step.
      - `cfrc_ext` (dict): Dictionary containing the external contact forces on the body parts. Each body part is a vector that specifies force x,y,z and torque x,y,z.
        · `torso` (np.ndarray): External contact forces on the torso.
        · `lwaist` (np.ndarray): External contact forces on the lwaist.
        · `pelvis` (np.ndarray): External contact forces on the pelvis.
        · `right_thigh` (np.ndarray): External contact forces on the right thigh.
        · `right_shin` (np.ndarray): External contact forces on the right shin.
        · `right_foot` (np.ndarray): External contact forces on the right foot.
        · `left_thigh` (np.ndarray): External contact forces on the left thigh.
        · `left_shin` (np.ndarray): External contact forces on the left shin.
        · `left_foot` (np.ndarray): External contact forces on the left foot.
        · `right_upper_arm` (np.ndarray): External contact forces on the right upper arm.
        · `right_lower_arm` (np.ndarray): External contact forces on the right lower arm.
        · `left_upper_arm` (np.ndarray): External contact forces on the left upper arm.
        · `left_lower_arm` (np.ndarray): External contact forces on the left lower arm.
      - `prev_cfrc_ext` (dict): Dictionary containing the external contact forces on the body parts on the previous step.
      - `terminated` (bool): Whether the episode has terminated due to the humanoid falling.
      - `truncated` (bool): Whether the episode was truncated due to reaching the maximum timestep limit.

    Returns:
      A float representing the custom reward signal for the current step.
""")


reward_func_return_type_baseline = "Tuple[float, Dict[str, float]]"
reward_func_definition_baseline = dedent("""\
    Objective: Obtain the highest score possible, where the exact score composition is defined in the `score_info` dictionary.

    Args:
      - `score_info` (dict): Dictionary containing the score components:
        · `healthy_reward` (float): A reward is given if the Humanoid is alive (Humanoid is alive if the z-coordinate of the torso (the height) is in the closed interval given by the healthy_z_range).
        · `forward_reward` (float): A reward for moving forward, this reward would be positive if the Humanoid moves forward (in the positive x direction / in the right direction).
        · `ctrl_cost` (float): A negative reward to penalize the Humanoid for taking actions that are too large.
        · `contact_cost` (float): A negative reward to penalize the Humanoid if the external contact forces are too large.
      - `action` (np.ndarray): Action vector of shape (17,) containing torques applied to each joint with values constrained to [-0.4, 0.4]. Actions correspond to: 
        0: Torque applied on the hinge in the y-coordinate of the abdomen (N m)
        1: Torque applied on the hinge in the z-coordinate of the abdomen (N m)
        2: Torque applied on the hinge in the x-coordinate of the abdomen (N m)
        3: Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) (N m)
        4: Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) (N m)
        5: Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) (N m)
        6: Torque applied on the rotor between the right hip/thigh and the right shin (N m)
        7: Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) (N m)
        8: Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) (N m)
        9: Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) (N m)
        10: Torque applied on the rotor between the left hip/thigh and the left shin (N m)
        11: Torque applied on the rotor between the torso and right upper arm (coordinate -1) (N m)
        12: Torque applied on the rotor between the torso and right upper arm (coordinate -2) (N m)
        13: Torque applied on the rotor between the right upper arm and right lower arm (N m)
        14: Torque applied on the rotor between the torso and left upper arm (coordinate -1) (N m)
        15: Torque applied on the rotor between the torso and left upper arm (coordinate -2) (N m)
        16: Torque applied on the rotor between the left upper arm and left lower arm (N m)
      - `prev_action` (np.ndarray): Action vector of shape (17,) containing actions on the previous step.
      - `x_coord` (float): The x-coordinate of the torso.
      - `y_coord` (float): The y-coordinate of the torso.
      - `prev_x_coord` (float): The x-coordinate of the torso on the previous step.
      - `prev_y_coord` (float): The y-coordinate of the torso on the previous step.
      - `distance_from_origin` (float): The distance from the origin
      - `prev_distance_from_origin` (float): The distance from the origin on the previous step.
      - `healthy_z_range` (tuple of 2 floats): The closed interval of the height that the Humanoid is considered alive.
      - `obs` (np.ndarray): Observation vector of shape (45, ), containing position and velocity information:
        0: z-coordinate of the torso (center) (m)
        1: w-orientation of the torso (center) (rad)
        2: x-orientation of the torso (center) (rad)
        3: y-orientation of the torso (center) (rad)
        4: z-orientation of the torso (center) (rad)
        5: z-angle of the abdomen (in lower_waist) (rad)
        6: y-angle of the abdomen (in lower_waist) (rad)
        7: x-angle of the abdomen (in pelvis) (rad)
        8: x-coordinate of angle between pelvis and right hip (in right_thigh) (rad)
        9: z-coordinate of angle between pelvis and right hip (in right_thigh) (rad)
        10: y-coordinate of angle between pelvis and right hip (in right_thigh) (rad)
        11: angle between right hip and the right shin (in right_knee) (rad)
        12: x-coordinate of angle between pelvis and left hip (in left_thigh) (rad)
        13: z-coordinate of angle between pelvis and left hip (in left_thigh) (rad)
        14: y-coordinate of angle between pelvis and left hip (in left_thigh) (rad)
        15: angle between left hip and the left shin (in left_knee) (rad)
        16: coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) (rad)
        17: coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) (rad)
        18: angle between right upper arm and right_lower_arm (rad)
        19: coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) (rad)
        20: coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) (rad)
        21: angle between left upper arm and left_lower_arm (rad)
        22: x-coordinate velocity of the torso (centre) (m/s)
        23: y-coordinate velocity of the torso (centre) (m/s)
        24: z-coordinate velocity of the torso (centre) (m/s)
        25: x-coordinate angular velocity of the torso (centre) (rad/s)
        26: y-coordinate angular velocity of the torso (centre) (rad/s)
        27: z-coordinate angular velocity of the torso (centre) (rad/s)
        28: z-coordinate of angular velocity of the abdomen (in lower_waist) (rad/s)
        29: y-coordinate of angular velocity of the abdomen (in lower_waist) (rad/s)
        30: x-coordinate of angular velocity of the abdomen (in pelvis) (rad/s)
        31: x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) (rad/s)
        32: z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) (rad/s)
        33: y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) (rad/s)
        34: angular velocity of the angle between right hip and the right shin (in right_knee) (rad/s)
        35: x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) (rad/s)
        36: z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) (rad/s)
        37: y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) (rad/s)
        38: angular velocity of the angle between left hip and the left shin (in left_knee) (rad/s)
        39: coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) (rad/s)
        40: coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) (rad/s)
        41: angular velocity of the angle between right upper arm and right_lower_arm (rad/s)
        42: coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) (rad/s)
        43: coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) (rad/s)
        44: angular velocity of the angle between left upper arm and left_lower_arm (rad/s)
      - `prev_obs` (np.ndarray): Observation vector of shape (45, ), containing position and velocity information on the previous step.
      - `cfrc_ext` (dict): Dictionary containing the external contact forces on the body parts. Each body part is a vector that specifies force x,y,z and torque x,y,z.
        · `torso` (np.ndarray): External contact forces on the torso.
        · `lwaist` (np.ndarray): External contact forces on the lwaist.
        · `pelvis` (np.ndarray): External contact forces on the pelvis.
        · `right_thigh` (np.ndarray): External contact forces on the right thigh.
        · `right_shin` (np.ndarray): External contact forces on the right shin.
        · `right_foot` (np.ndarray): External contact forces on the right foot.
        · `left_thigh` (np.ndarray): External contact forces on the left thigh.
        · `left_shin` (np.ndarray): External contact forces on the left shin.
        · `left_foot` (np.ndarray): External contact forces on the left foot.
        · `right_upper_arm` (np.ndarray): External contact forces on the right upper arm.
        · `right_lower_arm` (np.ndarray): External contact forces on the right lower arm.
        · `left_upper_arm` (np.ndarray): External contact forces on the left upper arm.
        · `left_lower_arm` (np.ndarray): External contact forces on the left lower arm.
      - `prev_cfrc_ext` (dict): Dictionary containing the external contact forces on the body parts on the previous step.
      - `terminated` (bool): Whether the episode has terminated due to the humanoid falling.
      - `truncated` (bool): Whether the episode was truncated due to reaching the maximum timestep limit.

    Returns (Tuple[float, Dict\[str, float]]):
      1. return the reward signal for the current step.
      2. return a dictionary of each individual reward component for the current step.
""")


if __name__ == "__main__":
    print(env_description)