import os
import sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import reward_design.prompts.common_prompt as common_prompt

task_description_all = {
    "halfcheetah": (
        "The HalfCheetah is a 2-dimensional robot consisting of 9 body parts and 8 joints connecting them (including two paws). The goal is to apply torque to the joints to make the cheetah run forward (right) as fast as possible, with a positive reward based on the distance moved forward and a negative reward for moving backward. The cheetah's torso and head are fixed, and torque can only be applied to the other 6 joints over the front and back thighs (which connect to the torso), the shins (which connect to the thighs), and the feet (which connect to the shins)."
    ),
    "hopper": (
        "The hopper is a two-dimensional one-legged figure consisting of four main body parts - the torso at the top, the thigh in the middle, the leg at the bottom, and a single foot on which the entire body rests. The goal is to make hops that move in the forward (right) direction by applying torque to the three hinges that connect the four body parts."
        " The main component of the reward function is based on movement: moving forward yields positive rewards, while moving backward results in negative rewards. In addition, the hopper can be slightly encouraged to maintain a healthy posture and slightly penalized for unhealthy posture. The environment terminates when the hopper is unhealthy. The hopper is unhealthy if any of the following happens:"
        " (1) An element of `observation[1:]` is no longer contained in the closed interval [-100, 100]."
        " (2) The height of the hopper (`observation[0]`) is no longer contained in the closed interval [0.7, +∞] (usually meaning that it has fallen)."
        " (3) The angle of the torso (`observation[1]`) is no longer contained in the closed interval [-0.2, 0.2]."
    ),
    "walker2d": (
        "The walker is a two-dimensional bipedal robot consisting of seven main body parts - a single torso at the top (with the two legs splitting after the torso), two thighs in the middle below the torso, two legs below the thighs, and two feet attached to the legs on which the entire body rests. The goal is to walk in the forward (right) direction by applying torque to the six hinges connecting the seven body parts."
        " The main component of the reward function is based on movement: moving forward yields positive rewards, while moving backward results in negative rewards. In addition, the walker can be slightly encouraged to maintain a healthy posture and slightly penalized for unhealthy posture. The environment terminates when the walker is unhealthy. The walker is unhealthy if any of the following happens:"
        " (1) Any of the state space values is no longer finite."
        " (2) The z-coordinate of the torso (the height) is not in the closed interval [0.8, 1.0]."
        " (3) The absolute value of the angle (`observation[1]`) is not in the closed interval [-1, 1]."
    ),
    "antmaze-large-diverse-v0": (
        "The ant is a 3D quadruped robot consisting of a torso (free rotational body) with four legs attached to it, where each leg has two body parts. The goal is to coordinate the four legs to move in the forward (right) direction by applying torque to the eight hinges connecting the two body parts of each leg and the torso (nine body parts and eight hinges)."
        " Specifically, the reward design should primarily encourage the ants to move forward in the x-coordinate and y-coordinate, which is the most critical objective and should therefore be assigned the highest weight. Secondly, the ants should be encouraged to move toward the target position with a lower weight. Finally, a small weight should be assigned to posture-related rewards or penalties, promoting healthy posture or discouraging unhealthy ones."
        " The environment terminates when the ant is unhealthy. The ant is unhealthy if any of the following happens:"
        " (1) Any of the state space values is no longer finite."
        " (2) The z-coordinate of the torso (the height) is not in the closed interval [0.2, 1.0]."
    ),
    "antmaze-large-play-v0": (
        "The ant is a 3D quadruped robot consisting of a torso (free rotational body) with four legs attached to it, where each leg has two body parts. The goal is to coordinate the four legs to move in the forward (right) direction by applying torque to the eight hinges connecting the two body parts of each leg and the torso (nine body parts and eight hinges)."
        " Specifically, the reward design should primarily encourage the ants to move forward in the x-coordinate and y-coordinate, which is the most critical objective and should therefore be assigned the highest weight. Secondly, the ants should be encouraged to move toward the target position with a lower weight. Finally, a small weight should be assigned to posture-related rewards or penalties, promoting healthy posture or discouraging unhealthy ones."
        " The environment terminates when the ant is unhealthy. The ant is unhealthy if any of the following happens:"
        " (1) Any of the state space values is no longer finite."
        " (2) The z-coordinate of the torso (the height) is not in the closed interval [0.2, 1.0]."
    ),
    "antmaze-medium-diverse-v0": (
        "The ant is a 3D quadruped robot consisting of a torso (free rotational body) with four legs attached to it, where each leg has two body parts. The goal is to coordinate the four legs to move in the forward (right) direction by applying torque to the eight hinges connecting the two body parts of each leg and the torso (nine body parts and eight hinges)."
        " Moving forward in the x-coordinate and y-coordinate yields positive rewards, while moving backward results in negative rewards. A relatively high weighting coefficient should be assigned to these directional rewards to encourage forward movement strongly. In addition, the ants should be guided to move toward the target position through appropriate incentives. Finally, posture-related rewards or penalties should be assigned a smaller weight, to promote healthy posture or discourage unhealthy ones without interfering with the primary movement objectives."
        " The environment terminates when the ant is unhealthy. The ant is unhealthy if any of the following happens:"
        " (1) Any of the state space values is no longer finite."
        " (2) The z-coordinate of the torso (the height) is not in the closed interval [0.2, 1.0]."
    ),
    "antmaze-medium-play-v0": (
        "The ant is a 3D quadruped robot consisting of a torso (free rotational body) with four legs attached to it, where each leg has two body parts. The goal is to coordinate the four legs to move in the forward (right) direction by applying torque to the eight hinges connecting the two body parts of each leg and the torso (nine body parts and eight hinges)."
        " Specifically, the ant should move forward in the x-coordinate and y-coordinate, while heading toward the goal position."
        " The environment terminates when the ant is unhealthy. The ant is unhealthy if any of the following happens:"
        " (1) Any of the state space values is no longer finite."
        " (2) The z-coordinate of the torso (the height) is not in the closed interval [0.2, 1.0]."
    ),
    "antmaze-umaze-diverse-v0": (
        "The ant is a 3D quadruped robot consisting of a torso (free rotational body) with four legs attached to it, where each leg has two body parts. The goal is to coordinate the four legs to move in the forward (right) direction by applying torque to the eight hinges connecting the two body parts of each leg and the torso (nine body parts and eight hinges)."
        " Specifically, the ant should move forward in the x-coordinate and y-coordinate, while heading toward the goal position."
        " The environment terminates when the ant is unhealthy. The ant is unhealthy if any of the following happens:"
        " (1) Any of the state space values is no longer finite."
        " (2) The z-coordinate of the torso (the height) is not in the closed interval [0.2, 1.0]."
    ),
    "antmaze-umaze-v0": (
        "The ant is a 3D quadruped robot consisting of a torso (free rotational body) with four legs attached to it, where each leg has two body parts. The goal is to coordinate the four legs to move in the forward (right) direction by applying torque to the eight hinges connecting the two body parts of each leg and the torso (nine body parts and eight hinges)."
        " Specifically, the reward design should primarily encourage the ants to move forward in the x-coordinate and y-coordinate, which is the most critical objective and should therefore be assigned the highest weight. Secondly, the ants should be encouraged to move toward the target position with a lower weight. Finally, a small weight should be assigned to posture-related rewards or penalties, promoting healthy posture or discouraging unhealthy ones."
        " The environment terminates when the ant is unhealthy. The ant is unhealthy if any of the following happens:"
        " (1) Any of the state space values is no longer finite."
        " (2) The z-coordinate of the torso (the height) is not in the closed interval [0.2, 1.0]."
    ),
    "door": (
        "The environment is based on the Adroit manipulation platform, a 28 degree of freedom system which consists of a 24 degrees of freedom ShadowHand and a 4 degree of freedom arm. The task to be completed consists on undoing the latch and swing the door open. The latch has significant dry friction and a bias torque that forces the door to stay closed. Agent leverages environmental interaction to develop the understanding of the latch as no information about the latch is explicitly provided. The position of the door is randomized. Task is considered complete when the door touches the door stopper at the other end."
        " Specifically, when the Boolean value indicating the door open state is true, the task is considered successful. When the door is locked, the angular position of the door latch and the angular position of the door hinge are 0. When the door is fully open, the angular position of the door latch reaches 1.82, and the angular position of the door hinge reaches 1.57."
        " To further encourage goal achievement, additional large bonuses can be introduced when progress surpasses certain predefined thresholds. Since the final goal may be difficult to reach, setting multiple thresholds can help sustain motivation throughout the process."
    ),
    # , using exponential or quadratic functions to emphasize the importance of reaching the final stages of the task.
    # To unlock the door, the angular position of the door latch needs to increase rather than decrease.
    # An increase in the angular position of the door latch signifies its rotation toward the unlatched (open) direction.
    "hammer": (
        "The environment is based on the Adroit manipulation platform, a 28 degree of freedom system which consists of a 24 degrees of freedom ShadowHand and a 4 degree of freedom arm. The task to be completed consists on picking up a hammer with and drive a nail into a board. The nail position is randomized and has dry friction capable of absorbing up to 15N force. Task is successful when the entire length of the nail is inside the board."
        " Specifically, the palm should move toward and grasp the hammer, and the hammer should be brought near the nail to strike it. To generate sufficient force, the hammer may need to be lifted slightly along the z direction. The greater the insertion displacement of nail, the deeper it is driven into the board. When the nail is fully embedded in the board, the insertion displacement of nail reaches 0.091."
        " To further encourage goal achievement, additional bonuses can be introduced when progress surpasses certain predefined thresholds. Since the final goal may be difficult to reach, setting multiple thresholds can help sustain motivation throughout the process."
    ),
    # " It may be helpful to guide the palm to hold the hammer, guide the hammer to move closer to the nail, and appropriately guide the hammer to move upward along the z direction to accumulate force."
    "pen": (
        "The environment is based on the Adroit manipulation platform, a 28 degree of freedom system which consists of a 24 degrees of freedom ShadowHand and a 4 degree of freedom arm. The task to be completed consists on repositioning the blue pen to match the orientation of the green target. The base of the hand is fixed. The target is also randomized to cover all configurations. The task will be considered successful when the orientations match within tolerance."
        " Specifically, the task is considered successful when the Euclidean distance between the pen and the target is less than 0.075, and the dot product of their directions is greater than 0.95. The task fails immediately if the pen drops from the palm, which is defined as the height of the pen becoming less than 0.075."
        " To further encourage goal achievement, additional large bonuses can be introduced when progress surpasses certain predefined thresholds. Since the final goal may be difficult to reach, setting multiple thresholds can help sustain motivation throughout the process."
    ),
    "relocate": (
        "The environment is based on the Adroit manipulation platform, a 30 degree of freedom system which consists of a 24 degrees of freedom ShadowHand and a 6 degree of freedom arm. The task to be completed consists on moving the blue ball to the green target. The positions of the ball and target are randomized over the entire workspace. The task will be considered successful when the object is within epsilon-ball of the target."
        " Specifically, the task is considered successful when the Euclidean distance between the ball and the target is less than 0.1 meters. It may be helpful to appropriately guide the ball to move upward along the z direction."
        " To further encourage goal achievement, additional large bonuses can be introduced when progress surpasses certain predefined thresholds. Since the final goal may be difficult to reach, setting multiple thresholds can help sustain motivation throughout the process."
    ),
    "kitchen": (
        "The environment is based on the 9 degrees of freedom Franka robot. The Franka robot is placed in a kitchen environment containing several common household items: a microwave, a kettle, an overhead light, cabinets, and an oven. The environment is a multitask goal in which the robot has to interact with the previously mentioned items in order to reach a desired goal configuration."
        """ Specifically, the desired goal is to complete 4 subtasks: "microwave", "kettle", "light switch", and "slide cabinet"."""
        " The following is a table with all the subtasks and their respective joint goal values. The tasks are considered completed when their joint configuration is within a norm threshold of 0.3 with respect to the goal configuration specified."
        """
| Task            | Description                                 | Joint Type | Goal                                   |
|-----------------|---------------------------------------------|------------|----------------------------------------|
| "light switch"  | Turn on the light switch                          | slide      | [-0.69, -0.05]                         |
| "slide cabinet" | Open the slide cabinet                            | slide      | 0.37                                   |
| "microwave"     | Open the microwave door                           | hinge      | -0.75                                   |
| "kettle"        | Move the kettle to the top left burner            | free       | [-0.23, 0.75, 1.62, 0.99, 0., 0., -0.06] |
        """
    ),
}


observation_description_all = {
    "halfcheetah": """
The observation space is a `Box(-Inf, Inf, (17,), float64)` where the elements are as follows:
| Num      | Observation                               | Min   | Max  | Type (Unit)              |
|----------|-------------------------------------------|-------|------|--------------------------|
| 0        | z-coordinate of the front tip             | -Inf  | Inf  | position (m)             |
| 1        | angle of the front tip                    | -Inf  | Inf  | angle (rad)              |
| 2        | angle of the back thigh                   | -Inf  | Inf  | angle (rad)              |
| 3        | angle of the back shin                    | -Inf  | Inf  | angle (rad)              |
| 4        | angle of the back foot                    | -Inf  | Inf  | angle (rad)              |
| 5        | angle of the front thigh                  | -Inf  | Inf  | angle (rad)              |
| 6        | angle of the front shin                   | -Inf  | Inf  | angle (rad)              |
| 7        | angle of the front foot                   | -Inf  | Inf  | angle (rad)              |
| 8        | velocity of the x-coordinate of front tip | -Inf  | Inf  | velocity (m/s)           |
| 9        | velocity of the z-coordinate of front tip | -Inf  | Inf  | velocity (m/s)           |
| 10       | angular velocity of the front tip         | -Inf  | Inf  | angular velocity (rad/s) |
| 11       | angular velocity of the back thigh        | -Inf  | Inf  | angular velocity (rad/s) |
| 12       | angular velocity of the back shin         | -Inf  | Inf  | angular velocity (rad/s) |
| 13       | angular velocity of the back foot         | -Inf  | Inf  | angular velocity (rad/s) |
| 14       | angular velocity of the front thigh       | -Inf  | Inf  | angular velocity (rad/s) |
| 15       | angular velocity of the front shin        | -Inf  | Inf  | angular velocity (rad/s) |
| 16       | angular velocity of the front foot        | -Inf  | Inf  | angular velocity (rad/s) |
""",
    "hopper": """
The observation space is a `Box(-Inf, Inf, (11,), float64)` where the elements are as follows:
| Num      | Observation                                      | Min   | Max   | Type (Unit)              |
|----------|--------------------------------------------------|-------|-------|---------------------------|
| 0        | z-coordinate of the torso (height of hopper)     | -Inf  | Inf   | position (m)             |
| 1        | angle of the torso                               | -Inf  | Inf   | angle (rad)              |
| 2        | angle of the thigh joint                         | -Inf  | Inf   | angle (rad)              |
| 3        | angle of the leg joint                           | -Inf  | Inf   | angle (rad)              |
| 4        | angle of the foot joint                          | -Inf  | Inf   | angle (rad)              |
| 5        | velocity of the x-coordinate of the torso        | -Inf  | Inf   | velocity (m/s)           |
| 6        | velocity of the z-coordinate (height) of torso   | -Inf  | Inf   | velocity (m/s)           |
| 7        | angular velocity of the angle of the torso       | -Inf  | Inf   | angular velocity (rad/s) |
| 8        | angular velocity of the thigh hinge              | -Inf  | Inf   | angular velocity (rad/s) |
| 9        | angular velocity of the leg hinge                | -Inf  | Inf   | angular velocity (rad/s) |
| 10       | angular velocity of the foot hinge               | -Inf  | Inf   | angular velocity (rad/s) |
""",
    "walker2d": """
The observation space is a `Box(-Inf, Inf, (17,), float64)` where the elements are as follows:
| Num      | Observation                                       | Min   | Max   | Type (Unit)              |
|----------|---------------------------------------------------|-------|-------|---------------------------|
| 0        | z-coordinate of the torso (height of Walker2d)   | -Inf  | Inf   | position (m)             |
| 1        | angle of the torso                               | -Inf  | Inf   | angle (rad)              |
| 2        | angle of the thigh joint                         | -Inf  | Inf   | angle (rad)              |
| 3        | angle of the leg joint                           | -Inf  | Inf   | angle (rad)              |
| 4        | angle of the foot joint                          | -Inf  | Inf   | angle (rad)              |
| 5        | angle of the left thigh joint                    | -Inf  | Inf   | angle (rad)              |
| 6        | angle of the left leg joint                      | -Inf  | Inf   | angle (rad)              |
| 7        | angle of the left foot joint                     | -Inf  | Inf   | angle (rad)              |
| 8        | velocity of the x-coordinate of the torso        | -Inf  | Inf   | velocity (m/s)           |
| 9        | velocity of the z-coordinate (height) of torso   | -Inf  | Inf   | velocity (m/s)           |
| 10       | angular velocity of the angle of the torso       | -Inf  | Inf   | angular velocity (rad/s) |
| 11       | angular velocity of the thigh hinge              | -Inf  | Inf   | angular velocity (rad/s) |
| 12       | angular velocity of the leg hinge                | -Inf  | Inf   | angular velocity (rad/s) |
| 13       | angular velocity of the foot hinge               | -Inf  | Inf   | angular velocity (rad/s) |
| 14       | angular velocity of the thigh hinge              | -Inf  | Inf   | angular velocity (rad/s) |
| 15       | angular velocity of the leg hinge                | -Inf  | Inf   | angular velocity (rad/s) |
| 16       | angular velocity of the foot hinge               | -Inf  | Inf   | angular velocity (rad/s) |
""",
    "antmaze": """
The observation space is a `Box(-Inf, Inf, (31,), float64)`, as follows:
| Num       | Observation                                             | Min   | Max   | Type (Unit)            |
|-----------|---------------------------------------------------------|-------|-------|------------------------|
| 0         | x-coordinate of the torso (centre)                     | -Inf  | Inf   | position (m)            |
| 1         | y-coordinate of the torso (centre)                     | -Inf  | Inf   | position (m)            |
| 2         | z-coordinate of the torso (centre)                     | -Inf  | Inf   | position (m)           |
| 3         | w-orientation of the torso (centre)                    | -Inf  | Inf   | angle (rad)            |
| 4         | x-orientation of the torso (centre)                    | -Inf  | Inf   | angle (rad)            |
| 5         | y-orientation of the torso (centre)                    | -Inf  | Inf   | angle (rad)            |
| 6         | z-orientation of the torso (centre)                    | -Inf  | Inf   | angle (rad)            |
| 7         | angle between torso and first link on front left       | -Inf  | Inf   | angle (rad)            |
| 8         | angle between the two links on the front left          | -Inf  | Inf   | angle (rad)            |
| 9         | angle between torso and first link on front right      | -Inf  | Inf   | angle (rad)            |
| 10         | angle between the two links on the front right         | -Inf  | Inf   | angle (rad)            |
| 11         | angle between torso and first link on back left        | -Inf  | Inf   | angle (rad)            |
| 12        | angle between the two links on the back left           | -Inf  | Inf   | angle (rad)            |
| 13        | angle between torso and first link on back right       | -Inf  | Inf   | angle (rad)            |
| 14        | angle between the two links on the back right          | -Inf  | Inf   | angle (rad)            |
| 15        | x-coordinate velocity of the torso                     | -Inf  | Inf   | velocity (m/s)         |
| 16        | y-coordinate velocity of the torso                     | -Inf  | Inf   | velocity (m/s)         |
| 17        | z-coordinate velocity of the torso                     | -Inf  | Inf   | velocity (m/s)         |
| 18        | x-coordinate angular velocity of the torso             | -Inf  | Inf   | angular velocity (rad/s) |
| 19        | y-coordinate angular velocity of the torso             | -Inf  | Inf   | angular velocity (rad/s) |
| 20        | z-coordinate angular velocity of the torso             | -Inf  | Inf   | angular velocity (rad/s) |
| 21        | angular velocity of angle between torso and front left link | -Inf | Inf | angle (rad)            |
| 22        | angular velocity of the angle between front left links | -Inf  | Inf   | angle (rad)            |
| 23        | angular velocity of angle between torso and front right link | -Inf | Inf | angle (rad)            |
| 24        | angular velocity of the angle between front right links| -Inf  | Inf   | angle (rad)            |
| 25        | angular velocity of angle between torso and back left link | -Inf | Inf | angle (rad)            |
| 26        | angular velocity of the angle between back left links  | -Inf  | Inf   | angle (rad)            |
| 27        | angular velocity of angle between torso and back right link | -Inf | Inf | angle (rad)            |
| 28        | angular velocity of the angle between back right links | -Inf  | Inf   | angle (rad)            |
| 29        | x-coordinate of the goal position                      | -Inf  | Inf     | position (m)            |
| 30        | y-coordinate of the goal position                      | -Inf  | Inf     | position (m)            |
""",
    "door": """
The observation space is of the type `Box(-inf, inf, (39,), float64)`. It contains information about the angular position of the finger joints, the pose of the palm of the hand, as well as state of the latch and door.
| Num | Observation                                                        | Min  | Max  | Unit                     |
|-----|--------------------------------------------------------------------|------|------|--------------------------|
| 0   | Angular position of the vertical arm joint                         | -Inf | Inf  | angle (rad)             |
| 1   | Angular position of the horizontal arm joint                       | -Inf | Inf  | angle (rad)             |
| 2   | Roll angular value of the arm                                      | -Inf | Inf  | angle (rad)             |
| 3   | Angular position of the horizontal wrist joint                     | -Inf | Inf  | angle (rad)             |
| 4   | Angular position of the vertical wrist joint                       | -Inf | Inf  | angle (rad)             |
| 5   | Horizontal angular position of the MCP joint of the forefinger    | -Inf | Inf  | angle (rad)             |
| 6   | Vertical angular position of the MCP joint of the forefinge       | -Inf | Inf  | angle (rad)             |
| 7   | Angular position of the PIP joint of the forefinger               | -Inf | Inf  | angle (rad)             |
| 8   | Angular position of the DIP joint of the forefinger               | -Inf | Inf  | angle (rad)             |
| 9   | Horizontal angular position of the MCP joint of the middle finger | -Inf | Inf  | angle (rad)             |
| 10  | Vertical angular position of the MCP joint of the middle finger   | -Inf | Inf  | angle (rad)             |
| 11  | Angular position of the PIP joint of the middle finger            | -Inf | Inf  | angle (rad)             |
| 12  | Angular position of the DIP joint of the middle finger            | -Inf | Inf  | angle (rad)             |
| 13  | Horizontal angular position of the MCP joint of the ring finger   | -Inf | Inf  | angle (rad)             |
| 14  | Vertical angular position of the MCP joint of the ring finger     | -Inf | Inf  | angle (rad)             |
| 15  | Angular position of the PIP joint of the ring finger              | -Inf | Inf  | angle (rad)             |
| 16  | Angular position of the DIP joint of the ring finger              | -Inf | Inf  | angle (rad)             |
| 17  | Angular position of the CMC joint of the little finger            | -Inf | Inf  | angle (rad)             |
| 18  | Horizontal angular position of the MCP joint of the little finger | -Inf | Inf  | angle (rad)             |
| 19  | Vertical angular position of the MCP joint of the little finger   | -Inf | Inf  | angle (rad)             |
| 20  | Angular position of the PIP joint of the little finger            | -Inf | Inf  | angle (rad)             |
| 21  | Angular position of the DIP joint of the little finger            | -Inf | Inf  | angle (rad)             |
| 22  | Horizontal angular position of the CMC joint of the thumb finger  | -Inf | Inf  | angle (rad)             |
| 23  | Vertical Angular position of the CMC joint of the thumb finger    | -Inf | Inf  | angle (rad)             |
| 24  | Horizontal angular position of the MCP joint of the thumb finger  | -Inf | Inf  | angle (rad)             |
| 25  | Vertical angular position of the MCP joint of the thumb finger    | -Inf | Inf  | angle (rad)             |
| 26  | Angular position of the IP joint of the thumb finger              | -Inf | Inf  | angle (rad)             |
| 27  | Angular position of the door latch                                 | -Inf | Inf  | angle (rad)             |
| 28  | Angular position of the door hinge                                 | -Inf | Inf  | angular velocity (rad/s)|
| 29  | Position of the center of the palm in the x direction             | -Inf | Inf  | position (m)            |
| 30  | Position of the center of the palm in the y direction             | -Inf | Inf  | position (m)            |
| 31  | Position of the center of the palm in the z direction             | -Inf | Inf  | position (m)            |
| 32  | x position of the handle of the door                              | -Inf | Inf  | position (m)            |
| 33  | y position of the handle of the door                              | -Inf | Inf  | position (m)            |
| 34  | z position of the handle of the door                              | -Inf | Inf  | position (m)            |
| 35  | x positional difference from palm to door handle                  | -Inf | Inf  | position (m)            |
| 36  | y positional difference from palm to door handle                  | -Inf | Inf  | position (m)            |
| 37  | z positional difference from palm to door handle                  | -Inf | Inf  | position (m)            |
| 38  | 1 if the door is open, otherwise -1                               | -1   | 1    | bool                    |
""",
    "hammer": """
The observation space is of the type `Box(-inf, inf, (46,), float64)`. It contains information about the angular position of the finger joints, the pose of the palm of the hand, the pose of the hammer and nail, and external forces on the nail.
| Num | Observation                                            | Min  | Max  | Unit               |
|-----|--------------------------------------------------------|------|------|--------------------|
| 0   | Angular position of the vertical arm joint             | -Inf | Inf  | angle (rad)        |
| 1   | Angular position of the horizontal arm joint           | -Inf | Inf  | angle (rad)        |
| 2   | Angular position of the horizontal wrist joint         | -Inf | Inf  | angle (rad)        |
| 3   | Angular position of the vertical wrist joint            | -Inf | Inf  | angle (rad)        |
| 4   | Horizontal angular position of the MCP joint of forefinger | -Inf | Inf  | angle (rad)        |
| 5   | Vertical angular position of the MCP joint of forefinger | -Inf | Inf  | angle (rad)        |
| 6   | Angular position of the PIP joint of the forefinger     | -Inf | Inf  | angle (rad)        |
| 7   | Angular position of the DIP joint of the forefinger     | -Inf | Inf  | angle (rad)        |
| 8   | Horizontal angular position of the MCP joint of middle finger | -Inf | Inf  | angle (rad)        |
| 9   | Vertical angular position of the MCP joint of middle finger | -Inf | Inf  | angle (rad)        |
| 10  | Angular position of the PIP joint of middle finger      | -Inf | Inf  | angle (rad)        |
| 11  | Angular position of the DIP joint of middle finger      | -Inf | Inf  | angle (rad)        |
| 12  | Horizontal angular position of the MCP joint of ring finger | -Inf | Inf  | angle (rad)        |
| 13  | Vertical angular position of the MCP joint of ring finger | -Inf | Inf  | angle (rad)        |
| 14  | Angular position of the PIP joint of ring finger        | -Inf | Inf  | angle (rad)        |
| 15  | Angular position of the DIP joint of ring finger        | -Inf | Inf  | angle (rad)        |
| 16  | Angular position of the CMC joint of the little finger  | -Inf | Inf  | angle (rad)        |
| 17  | Horizontal angular position of the MCP joint of little finger | -Inf | Inf  | angle (rad)        |
| 18  | Vertical angular position of the MCP joint of little finger | -Inf | Inf  | angle (rad)        |
| 19  | Angular position of the PIP joint of the little finger  | -Inf | Inf  | angle (rad)        |
| 20  | Angular position of the DIP joint of the little finger  | -Inf | Inf  | angle (rad)        |
| 21  | Horizontal angular position of the CMC joint of thumb   | -Inf | Inf  | angle (rad)        |
| 22  | Vertical Angular position of the CMC joint of thumb     | -Inf | Inf  | angle (rad)        |
| 23  | Horizontal angular position of the MCP joint of thumb   | -Inf | Inf  | angle (rad)        |
| 24  | Vertical angular position of the MCP joint of thumb     | -Inf | Inf  | angle (rad)        |
| 25  | Angular position of the IP joint of thumb                | -Inf | Inf  | angle (rad)        |
| 26  | Insertion displacement of nail                           | -Inf | Inf  | position (m)       |
| 27  | Linear velocity of the hammer in the x direction        | -1   | 1    | velocity (m/s)     |
| 28  | Linear velocity of the hammer in the y direction        | -1   | 1    | velocity (m/s)     |
| 29  | Linear velocity of the hammer in the z direction        | -1   | 1    | velocity (m/s)     |
| 30  | Angular velocity of the hammer around x axis            | -1   | 1    | angular velocity (rad/s) |
| 31  | Angular velocity of the hammer around y axis            | -1   | 1    | angular velocity (rad/s) |
| 32  | Angular velocity of the hammer around z axis            | -1   | 1    | angular velocity (rad/s) |
| 33  | Position of the center of the palm in the x direction   | -Inf | Inf  | position (m)       |
| 34  | Position of the center of the palm in the y direction   | -Inf | Inf  | position (m)       |
| 35  | Position of the center of the palm in the z direction   | -Inf | Inf  | position (m)       |
| 36  | Position of the hammer's center of mass in the x direction | -Inf | Inf  | position (m)       |
| 37  | Position of the hammer's center of mass in the y direction | -Inf | Inf  | position (m)       |
| 38  | Position of the hammer's center of mass in the z direction | -Inf | Inf  | position (m)       |
| 39  | Relative rotation of the hammer's center of mass with respect to x axis | -Inf | Inf  | angle (rad)        |
| 40  | Relative rotation of the hammer's center of mass with respect to y axis | -Inf | Inf  | angle (rad)        |
| 41  | Relative rotation of the hammer's center of mass with respect to z axis | -Inf | Inf  | angle (rad)        |
| 42  | Position of the nail in the x direction                  | -Inf | Inf  | position (m)       |
| 43  | Position of the nail in the y direction                  | -Inf | Inf  | position (m)       |
| 44  | Position of the nail in the z direction                  | -Inf | Inf  | position (m)       |
| 45  | Linear force exerted on the head of the nail             | -1   | 1    | Newton (N)         |
""",
    "pen": """
The observation space is of the type `Box(-inf, inf, (45,), float64)`. It contains information about the angular position of the finger joints, the pose of the palm of the hand, as well as the pose of the real pen and target goal.
| Num | Observation                                               | Min  | Max  | Unit               |
|-----|-----------------------------------------------------------|------|------|--------------------|
| 0   | Angular position of the horizontal wrist joint            | -Inf | Inf  | angle (rad)        |
| 1   | Angular position of the vertical wrist joint              | -Inf | Inf  | angle (rad)        |
| 2   | Horizontal angular position of the MCP joint of the forefinger | -Inf | Inf  | angle (rad)        |
| 3   | Vertical angular position of the MCP joint of the forefinge  | -Inf | Inf  | angle (rad)        |
| 4   | Angular position of the PIP joint of the forefinger        | -Inf | Inf  | angle (rad)        |
| 5   | Angular position of the DIP joint of the forefinger        | -Inf | Inf  | angle (rad)        |
| 6   | Horizontal angular position of the MCP joint of the middle finger | -Inf | Inf  | angle (rad)        |
| 7   | Vertical angular position of the MCP joint of the middle finger  | -Inf | Inf  | angle (rad)        |
| 8   | Angular position of the PIP joint of the middle finger     | -Inf | Inf  | angle (rad)        |
| 9   | Angular position of the DIP joint of the middle finger     | -Inf | Inf  | angle (rad)        |
| 10  | Horizontal angular position of the MCP joint of the ring finger | -Inf | Inf  | angle (rad)        |
| 11  | Vertical angular position of the MCP joint of the ring finger  | -Inf | Inf  | angle (rad)        |
| 12  | Angular position of the PIP joint of the ring finger       | -Inf | Inf  | angle (rad)        |
| 13  | Angular position of the DIP joint of the ring finger       | -Inf | Inf  | angle (rad)        |
| 14  | Angular position of the CMC joint of the little finger     | -Inf | Inf  | angle (rad)        |
| 15  | Horizontal angular position of the MCP joint of the little finger | -Inf | Inf  | angle (rad)        |
| 16  | Vertical angular position of the MCP joint of the little finger  | -Inf | Inf  | angle (rad)        |
| 17  | Angular position of the PIP joint of the little finger     | -Inf | Inf  | angle (rad)        |
| 18  | Angular position of the DIP joint of the little finger     | -Inf | Inf  | angle (rad)        |
| 19  | Horizontal angular position of the CMC joint of the thumb finger | -Inf | Inf  | angle (rad)        |
| 20  | Vertical Angular position of the CMC joint of the thumb finger  | -Inf | Inf  | angle (rad)        |
| 21  | Horizontal angular position of the MCP joint of the thumb finger | -Inf | Inf  | angle (rad)        |
| 22  | Vertical angular position of the MCP joint of the thumb finger  | -Inf | Inf  | angle (rad)        |
| 23  | Angular position of the IP joint of the thumb finger       | -Inf | Inf  | angle (rad)        |
| 24  | Position of the pen's center of mass in the x direction    | -Inf | Inf  | position (m)       |
| 25  | Position of the pen's center of mass in the y direction    | -Inf | Inf  | position (m)       |
| 26  | Position of the pen's center of mass in the z direction    | -Inf | Inf  | position (m)       |
| 27  | Linear velocity of the pen in the x direction              | -Inf | Inf  | velocity (m/s)     |
| 28  | Linear velocity of the pen in the y direction              | -Inf | Inf  | velocity (m/s)     |
| 29  | Linear velocity of the pen in the z direction              | -Inf | Inf  | velocity (m/s)     |
| 30  | Angular velocity of the pen around x axis                   | -Inf | Inf  | angular velocity (rad/s) |
| 31  | Angular velocity of the pen around y axis                   | -Inf | Inf  | angular velocity (rad/s) |
| 32  | Angular velocity of the pen around z axis                   | -Inf | Inf  | angular velocity (rad/s) |
| 33  | Relative rotation of the pen's center of mass with respect to the x axis | -Inf | Inf  | angle (rad)        |
| 34  | Relative rotation of the pen's center of mass with respect to the y axis | -Inf | Inf  | angle (rad)        |
| 35  | Relative rotation of the pen's center of mass with respect to the z axis | -Inf | Inf  | angle (rad)        |
| 36  | Relative rotation of the target's center of mass with respect to the x axis | -Inf | Inf  | angle (rad)        |
| 37  | Relative rotation of the target's center of mass with respect to the y axis | -Inf | Inf  | angle (rad)        |
| 38  | Relative rotation of the target's center of mass with respect to the z axis | -Inf | Inf  | angle (rad)        |
| 39  | x linear distance from pen to target goal                    | -Inf | Inf  | position (m)       |
| 40  | y linear distance from pen to target goal                    | -Inf | Inf  | position (m)       |
| 41  | z linear distance from pen to target goal                    | -Inf | Inf  | position (m)       |
| 42  | Rotational distance from pen to target goal with respect to the x axis | -Inf | Inf  | angle (rad)        |
| 43  | Rotational distance from pen to target goal with respect to the x axis | -Inf | Inf  | angle (rad)        |
| 44  | Rotational distance from pen to target goal with respect to the x axis | -Inf | Inf  | angle (rad)        |
""",
    "relocate": """
The observation space is of the type `Box(-inf, inf, (39,), float64)`. It contains information about the angular position of the finger joints, the pose of the palm of the hand, as well as kinematic information about the ball and target.
| Num | Observation                                                     | Min   | Max   | Unit          |
|-----|------------------------------------------------------------------|-------|-------|---------------|
| 0   | Translation of the arm in the x direction                       | -Inf  | Inf   | position (m)  |
| 1   | Translation of the arm in the y direction                       | -Inf  | Inf   | position (m)  |
| 2   | Translation of the arm in the z direction                       | -Inf  | Inf   | position (m)  |
| 3   | Angular position of the vertical arm joint                      | -Inf  | Inf   | angle (rad)   |
| 4   | Angular position of the horizontal arm joint                    | -Inf  | Inf   | angle (rad)   |
| 5   | Roll angular value of the arm                                   | -Inf  | Inf   | angle (rad)   |
| 6   | Angular position of the horizontal wrist joint                  | -Inf  | Inf   | angle (rad)   |
| 7   | Angular position of the vertical wrist joint                    | -Inf  | Inf   | angle (rad)   |
| 8   | Horizontal angular position of the MCP joint of the forefinger | -Inf  | Inf   | angle (rad)   |
| 9   | Vertical angular position of the MCP joint of the forefinge    | -Inf  | Inf   | angle (rad)   |
| 10  | Angular position of the PIP joint of the forefinger             | -Inf  | Inf   | angle (rad)   |
| 11  | Angular position of the DIP joint of the forefinger             | -Inf  | Inf   | angle (rad)   |
| 12  | Horizontal angular position of the MCP joint of the middle finger | -Inf  | Inf | angle (rad)   |
| 13  | Vertical angular position of the MCP joint of the middle finger | -Inf  | Inf  | angle (rad)   |
| 14  | Angular position of the PIP joint of the middle finger          | -Inf  | Inf   | angle (rad)   |
| 15  | Angular position of the DIP joint of the middle finger          | -Inf  | Inf   | angle (rad)   |
| 16  | Horizontal angular position of the MCP joint of the ring finger | -Inf  | Inf  | angle (rad)   |
| 17  | Vertical angular position of the MCP joint of the ring finger   | -Inf  | Inf  | angle (rad)   |
| 18  | Angular position of the PIP joint of the ring finger            | -Inf  | Inf   | angle (rad)   |
| 19  | Angular position of the DIP joint of the ring finger            | -Inf  | Inf   | angle (rad)   |
| 20  | Angular position of the CMC joint of the little finger          | -Inf  | Inf   | angle (rad)   |
| 21  | Horizontal angular position of the MCP joint of the little finger | -Inf | Inf  | angle (rad)   |
| 22  | Vertical angular position of the MCP joint of the little finger | -Inf  | Inf  | angle (rad)   |
| 23  | Angular position of the PIP joint of the little finger          | -Inf  | Inf   | angle (rad)   |
| 24  | Angular position of the DIP joint of the little finger          | -Inf  | Inf   | angle (rad)   |
| 25  | Horizontal angular position of the CMC joint of the thumb finger | -Inf | Inf  | angle (rad)   |
| 26  | Vertical Angular position of the CMC joint of the thumb finger  | -Inf  | Inf  | angle (rad)   |
| 27  | Horizontal angular position of the MCP joint of the thumb finger | -Inf | Inf  | angle (rad)   |
| 28  | Vertical angular position of the MCP joint of the thumb finger  | -Inf  | Inf  | angle (rad)   |
| 29  | Angular position of the IP joint of the thumb finger            | -Inf  | Inf   | angle (rad)   |
| 30  | x positional difference from the palm of the hand to the ball   | -Inf  | Inf   | position (m)  |
| 31  | y positional difference from the palm of the hand to the ball   | -Inf  | Inf   | position (m)  |
| 32  | z positional difference from the palm of the hand to the ball   | -Inf  | Inf   | position (m)  |
| 33  | x positional difference from the palm of the hand to the target | -Inf  | Inf   | position (m)  |
| 34  | y positional difference from the palm of the hand to the target | -Inf  | Inf   | position (m)  |
| 35  | z positional difference from the palm of the hand to the target | -Inf  | Inf   | position (m)  |
| 36  | x positional difference from the ball to the target             | -Inf  | Inf   | position (m)  |
| 37  | y positional difference from the ball to the target             | -Inf  | Inf   | position (m)  |
| 38  | z positional difference from the ball to the target             | -Inf  | Inf   | position (m)  |
""",
    "kitchen": """
In the observation space, the array indices corresponding to the current values of each task goal are as follows:
OBS_ELEMENT_INDICES = {
    'light switch': np.array([17, 18]),
    'slide cabinet': np.array([19]),
    'microwave': np.array([22]),
    'kettle': np.array([23, 24, 25, 26, 27, 28, 29]),
    }
""",

}

action_description_all = {
    "halfcheetah": """
The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action                                  | Control Min | Control Max | Type (Unit)     |
|-----|------------------------------------------|--------------|--------------|------------------|
| 0   | Torque applied on the back thigh rotor  | -1          | 1            | torque (N m)     |
| 1   | Torque applied on the back shin rotor   | -1          | 1            | torque (N m)     |
| 2   | Torque applied on the back foot rotor   | -1          | 1            | torque (N m)     |
| 3   | Torque applied on the front thigh rotor | -1          | 1            | torque (N m)     |
| 4   | Torque applied on the front shin rotor  | -1          | 1            | torque (N m)     |
| 5   | Torque applied on the front foot rotor  | -1          | 1            | torque (N m)     |
""",
    "hopper": """
The action space is a `Box(-1, 1, (3,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action                            | Control Min | Control Max | Type (Unit)   |
|-----|-----------------------------------|-------------|-------------|---------------|
| 0   | Torque applied on the thigh rotor | -1          | 1           | torque (N m)  |
| 1   | Torque applied on the leg rotor   | -1          | 1           | torque (N m)  |
| 2   | Torque applied on the foot rotor  | -1          | 1           | torque (N m)  |
""",
    "walker2d": """
The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action                                | Control Min | Control Max | Type (Unit)     |
|-----|---------------------------------------|-------------|-------------|-----------------|
| 0   | Torque applied on the thigh rotor     | -1          | 1           | torque (N m)    |
| 1   | Torque applied on the leg rotor       | -1          | 1           | torque (N m)    |
| 2   | Torque applied on the foot rotor      | -1          | 1           | torque (N m)    |
| 3   | Torque applied on the left thigh rotor| -1          | 1           | torque (N m)    |
| 4   | Torque applied on the left leg rotor  | -1          | 1           | torque (N m)    |
| 5   | Torque applied on the left foot rotor | -1          | 1           | torque (N m)    |
""",
    "antmaze": """
The action space is a `Box(-1, 1, (8,), float32)`. An action represents the torques applied at the hinge joints.
| Num | Action                                                           | Control Min | Control Max | Type (Unit)    |
|-----|------------------------------------------------------------------|-------------|-------------|----------------|
| 0   | Torque applied on the rotor between the torso and back right hip | -1          | 1           | torque (N m)   |
| 1   | Torque applied on the rotor between the back right two links     | -1          | 1           | torque (N m)   |
| 2   | Torque applied on the rotor between the torso and front left hip | -1          | 1           | torque (N m)   |
| 3   | Torque applied on the rotor between the front left two links     | -1          | 1           | torque (N m)   |
| 4   | Torque applied on the rotor between the torso and front right hip| -1          | 1           | torque (N m)   |
| 5   | Torque applied on the rotor between the front right two links    | -1          | 1           | torque (N m)   |
| 6   | Torque applied on the rotor between the torso and back left hip  | -1          | 1           | torque (N m)   |
| 7   | Torque applied on the rotor between the back left two links      | -1          | 1           | torque (N m)   |
""",
    "door": """
The action space is a `Box(-1.0, 1.0, (28,), float32)`. The control actions are absolute angular positions of the Adroit hand joints. The input of the control actions is set to a range between -1 and 1 by scaling the real actuator angle ranges in radians. The elements of the action array are the following:
| Num | Action                                                                 | Control Min | Control Max | Unit           |
|-----|------------------------------------------------------------------------|-------------|-------------|----------------|
| 0   | Linear translation of the full arm towards the door                   | -1          | 1           | position (m)   |
| 1   | Angular up and down movement of the full arm                          | -1          | 1           | angle (rad)    |
| 2   | Angular left and right and down movement of the full arm             | -1          | 1           | angle (rad)    |
| 3   | Roll angular movement of the full arm                                 | -1          | 1           | angle (rad)    |
| 4   | Angular position of the horizontal wrist joint (radial/ulnar deviation) | -1          | 1           | angle (rad)    |
| 5   | Angular position of the horizontal wrist joint (flexion/extension)   | -1          | 1           | angle (rad)    |
| 6   | Horizontal angular position of the MCP joint of the forefinger       | -1          | 1           | angle (rad)    |
| 7   | Vertical angular position of the MCP joint of the forefinger         | -1          | 1           | angle (rad)    |
| 8   | Angular position of the PIP joint of the forefinger                  | -1          | 1           | angle (rad)    |
| 9   | Angular position of the DIP joint of the forefinger                  | -1          | 1           | angle (rad)    |
| 10  | Horizontal angular position of the MCP joint of the middle finger     | -1          | 1           | angle (rad)    |
| 11  | Vertical angular position of the MCP joint of the middle finger       | -1          | 1           | angle (rad)    |
| 12  | Angular position of the PIP joint of the middle finger               | -1          | 1           | angle (rad)    |
| 13  | Angular position of the DIP joint of the middle finger               | -1          | 1           | angle (rad)    |
| 14  | Horizontal angular position of the MCP joint of the ring finger       | -1          | 1           | angle (rad)    |
| 15  | Vertical angular position of the MCP joint of the ring finger         | -1          | 1           | angle (rad)    |
| 16  | Angular position of the PIP joint of the ring finger                 | -1          | 1           | angle (rad)    |
| 17  | Angular position of the DIP joint of the ring finger                 | -1          | 1           | angle (rad)    |
| 18  | Angular position of the CMC joint of the little finger               | -1          | 1           | angle (rad)    |
| 19  | Horizontal angular position of the MCP joint of the little finger     | -1          | 1           | angle (rad)    |
| 20  | Vertical angular position of the MCP joint of the little finger       | -1          | 1           | angle (rad)    |
| 21  | Angular position of the PIP joint of the little finger               | -1          | 1           | angle (rad)    |
| 22  | Angular position of the DIP joint of the little finger               | -1          | 1           | angle (rad)    |
| 23  | Horizontal angular position of the CMC joint of the thumb finger      | -1          | 1           | angle (rad)    |
| 24  | Vertical Angular position of the CMC joint of the thumb finger        | -1          | 1           | angle (rad)    |
| 25  | Horizontal angular position of the MCP joint of the thumb finger      | -1          | 1           | angle (rad)    |
| 26  | Vertical angular position of the MCP joint of the thumb finger        | -1          | 1           | angle (rad)    |
| 27  | Angular position of the IP joint of the thumb finger                 | -1          | 1           | angle (rad)    |
""",
    "hammer": """
The action space is a `Box(-1.0, 1.0, (26,), float32)`. The control actions are absolute angular positions of the Adroit hand joints. The input of the control actions is set to a range between -1 and 1 by scaling the real actuator angle ranges in radians. The elements of the action array are the following:
| Num | Action                                                                                       | Control Min | Control Max | Unit        |
|-----|----------------------------------------------------------------------------------------------|-------------|-------------|-------------|
| 0   | Angular up and down movement of the full arm                                                 | -1          | 1           | angle (rad) |
| 1   | Angular left and right and down movement of the full arm                                     | -1          | 1           | angle (rad) |
| 2   | Angular position of the horizontal wrist joint (radial/ulnar deviation)                      | -1          | 1           | angle (rad) |
| 3   | Angular position of the horizontal wrist joint (flexion/extension)                           | -1          | 1           | angle (rad) |
| 4   | Horizontal angular position of the MCP joint of the forefinger (adduction/abduction)         | -1          | 1           | angle (rad) |
| 5   | Vertical angular position of the MCP joint of the forefinger (flexion/extension)             | -1          | 1           | angle (rad) |
| 6   | Angular position of the PIP joint of the forefinger (flexion/extension)                      | -1          | 1           | angle (rad) |
| 7   | Angular position of the DIP joint of the forefinger                                          | -1          | 1           | angle (rad) |
| 8   | Horizontal angular position of the MCP joint of the middle finger (adduction/abduction)      | -1          | 1           | angle (rad) |
| 9   | Vertical angular position of the MCP joint of the middle finger (flexion/extension)          | -1          | 1           | angle (rad) |
| 10  | Angular position of the PIP joint of the middle finger (flexion/extension)                   | -1          | 1           | angle (rad) |
| 11  | Angular position of the DIP joint of the middle finger                                       | -1          | 1           | angle (rad) |
| 12  | Horizontal angular position of the MCP joint of the ring finger (adduction/abduction)        | -1          | 1           | angle (rad) |
| 13  | Vertical angular position of the MCP joint of the ring finger (flexion/extension)            | -1          | 1           | angle (rad) |
| 14  | Angular position of the PIP joint of the ring finger                                         | -1          | 1           | angle (rad) |
| 15  | Angular position of the DIP joint of the ring finger                                         | -1          | 1           | angle (rad) |
| 16  | Angular position of the CMC joint of the little finger                                       | -1          | 1           | angle (rad) |
| 17  | Horizontal angular position of the MCP joint of the little finger (adduction/abduction)      | -1          | 1           | angle (rad) |
| 18  | Vertical angular position of the MCP joint of the little finger (flexion/extension)          | -1          | 1           | angle (rad) |
| 19  | Angular position of the PIP joint of the little finger (flexion/extension)                   | -1          | 1           | angle (rad) |
| 20  | Angular position of the DIP joint of the little finger                                       | -1          | 1           | angle (rad) |
| 21  | Horizontal angular position of the CMC joint of the thumb finger                             | -1          | 1           | angle (rad) |
| 22  | Vertical Angular position of the CMC joint of the thumb finger                               | -1          | 1           | angle (rad) |
| 23  | Horizontal angular position of the MCP joint of the thumb finger (adduction/abduction)       | -1          | 1           | angle (rad) |
| 24  | Vertical angular position of the MCP joint of the thumb finger (flexion/extension)           | -1          | 1           | angle (rad) |
| 25  | Angular position of the IP joint of the thumb finger (flexion/extension)                     | -1          | 1           | angle (rad) |
""",
    "pen": """
The action space is a `Box(-1.0, 1.0, (24,), float32)`. The control actions are absolute angular positions of the Adroit hand joints. The input of the control actions is set to a range between -1 and 1 by scaling the real actuator angle ranges in radians. The elements of the action array are the following:
| Num | Action                                                                                   | Control Min | Control Max | Unit         |
|-----|------------------------------------------------------------------------------------------|-------------|-------------|--------------|
| 0   | Angular position of the horizontal wrist joint (radial/ulnar deviation)                  | -1          | 1           | angle (rad)  |
| 1   | Angular position of the horizontal wrist joint (flexion/extension)                      | -1          | 1           | angle (rad)  |
| 2   | Horizontal angular position of the MCP joint of the forefinger (adduction/abduction)    | -1          | 1           | angle (rad)  |
| 3   | Vertical angular position of the MCP joint of the forefinger (flexion/extension)        | -1          | 1           | angle (rad)  |
| 4   | Angular position of the PIP joint of the forefinger (flexion/extension)                 | -1          | 1           | angle (rad)  |
| 5   | Angular position of the DIP joint of the forefinger                                      | -1          | 1           | angle (rad)  |
| 6   | Horizontal angular position of the MCP joint of the middle finger (adduction/abduction) | -1          | 1           | angle (rad)  |
| 7   | Vertical angular position of the MCP joint of the middle finger (flexion/extension)     | -1          | 1           | angle (rad)  |
| 8   | Angular position of the PIP joint of the middle finger (flexion/extension)              | -1          | 1           | angle (rad)  |
| 9   | Angular position of the DIP joint of the middle finger                                  | -1          | 1           | angle (rad)  |
| 10  | Horizontal angular position of the MCP joint of the ring finger (adduction/abduction)   | -1          | 1           | angle (rad)  |
| 11  | Vertical angular position of the MCP joint of the ring finger (flexion/extension)       | -1          | 1           | angle (rad)  |
| 12  | Angular position of the PIP joint of the ring finger                                    | -1          | 1           | angle (rad)  |
| 13  | Angular position of the DIP joint of the ring finger                                    | -1          | 1           | angle (rad)  |
| 14  | Angular position of the CMC joint of the little finger                                  | -1          | 1           | angle (rad)  |
| 15  | Horizontal angular position of the MCP joint of the little finger (adduction/abduction) | -1          | 1           | angle (rad)  |
| 16  | Vertical angular position of the MCP joint of the little finger (flexion/extension)     | -1          | 1           | angle (rad)  |
| 17  | Angular position of the PIP joint of the little finger (flexion/extension)              | -1          | 1           | angle (rad)  |
| 18  | Angular position of the DIP joint of the little finger                                  | -1          | 1           | angle (rad)  |
| 19  | Horizontal angular position of the CMC joint of the thumb finger                        | -1          | 1           | angle (rad)  |
| 20  | Vertical Angular position of the CMC joint of the thumb finger                          | -1          | 1           | angle (rad)  |
| 21  | Horizontal angular position of the MCP joint of the thumb finger (adduction/abduction) | -1          | 1           | angle (rad)  |
| 22  | Vertical angular position of the MCP joint of the thumb finger (flexion/extension)     | -1          | 1           | angle (rad)  |
| 23  | Angular position of the IP joint of the thumb finger (flexion/extension)                | -1          | 1           | angle (rad)  |
""",
    "relocate": """
The action space is a `Box(-1.0, 1.0, (30,), float32)`. The control actions are absolute angular positions of the Adroit hand joints. The input of the control actions is set to a range between -1 and 1 by scaling the real actuator angle ranges in radians. The elements of the action array are the following:
| Num | Observation                                                                                      | Control Min | Control Max | Unit         |
|-----|--------------------------------------------------------------------------------------------------|-----|-----|--------------|
| 0   | Linear translation of the full arm in x direction                                                | -1  | 1   | position (m) |
| 1   | Linear translation of the full arm in y direction                                                | -1  | 1   | position (m) |
| 2   | Linear translation of the full arm in z direction                                                | -1  | 1   | position (m) |
| 3   | Angular up and down movement of the full arm                                                     | -1  | 1   | angle (rad)  |
| 4   | Angular left and right and down movement of the full arm                                         | -1  | 1   | angle (rad)  |
| 5   | Roll angular movement of the full arm                                                            | -1  | 1   | angle (rad)  |
| 6   | Angular position of the horizontal wrist joint (radial/ulnar deviation)                          | -1  | 1   | angle (rad)  |
| 7   | Angular position of the horizontal wrist joint (flexion/extension)                               | -1  | 1   | angle (rad)  |
| 8   | Horizontal angular position of the MCP joint of the forefinger (adduction/abduction)             | -1  | 1   | angle (rad)  |
| 9   | Vertical angular position of the MCP joint of the forefinger (flexion/extension)                 | -1  | 1   | angle (rad)  |
| 10  | Angular position of the PIP joint of the forefinger (flexion/extension)                          | -1  | 1   | angle (rad)  |
| 11  | Angular position of the DIP joint of the forefinger                                              | -1  | 1   | angle (rad)  |
| 12  | Horizontal angular position of the MCP joint of the middle finger (adduction/abduction)          | -1  | 1   | angle (rad)  |
| 13  | Vertical angular position of the MCP joint of the middle finger (flexion/extension)              | -1  | 1   | angle (rad)  |
| 14  | Angular position of the PIP joint of the middle finger (flexion/extension)                       | -1  | 1   | angle (rad)  |
| 15  | Angular position of the DIP joint of the middle finger                                           | -1  | 1   | angle (rad)  |
| 16  | Horizontal angular position of the MCP joint of the ring finger (adduction/abduction)            | -1  | 1   | angle (rad)  |
| 17  | Vertical angular position of the MCP joint of the ring finger (flexion/extension)                | -1  | 1   | angle (rad)  |
| 18  | Angular position of the PIP joint of the ring finger                                             | -1  | 1   | angle (rad)  |
| 19  | Angular position of the DIP joint of the ring finger                                             | -1  | 1   | angle (rad)  |
| 20  | Angular position of the CMC joint of the little finger                                           | -1  | 1   | angle (rad)  |
| 21  | Horizontal angular position of the MCP joint of the little finger (adduction/abduction)          | -1  | 1   | angle (rad)  |
| 22  | Vertical angular position of the MCP joint of the little finger (flexion/extension)              | -1  | 1   | angle (rad)  |
| 23  | Angular position of the PIP joint of the little finger (flexion/extension)                       | -1  | 1   | angle (rad)  |
| 24  | Angular position of the DIP joint of the little finger                                           | -1  | 1   | angle (rad)  |
| 25  | Horizontal angular position of the CMC joint of the thumb finger                                 | -1  | 1   | angle (rad)  |
| 26  | Vertical Angular position of the CMC joint of the thumb finger                                   | -1  | 1   | angle (rad)  |
| 27  | Horizontal angular position of the MCP joint of the thumb finger (adduction/abduction)           | -1  | 1   | angle (rad)  |
| 28  | Vertical angular position of the MCP joint of the thumb finger (flexion/extension)               | -1  | 1   | angle (rad)  |
| 29  | Angular position of the IP joint of the thumb finger (flexion/extension)                         | -1  | 1   | angle (rad)  |
""",
    "kitchen": """
The default joint actuators in the Franka MuJoCo model are position controlled. However, the action space of the environment are joint velocities clipped between -1 and 1 rad/s. The space is a `Box(-1.0, 1.0, (9,), float32)`. The desired joint position control input is estimated in each time step with the current joint position values and the desired velocity action:
| Num | Action                                                       | Action Min | Action Max | Joint | Unit  |
|-----|--------------------------------------------------------------|------------|------------|--------|--------|
| 0   | `robot:panda0_joint1` angular velocity                       | -1         | 1          | hinge  | rad/s  |
| 1   | `robot:panda0_joint2` angular velocity                       | -1         | 1          | hinge  | rad/s  |
| 2   | `robot:panda0_joint3` angular velocity                       | -1         | 1          | hinge  | rad/s  |
| 3   | `robot:panda0_joint4` angular velocity                       | -1         | 1          | hinge  | rad/s  |
| 4   | `robot:panda0_joint5` angular velocity                       | -1         | 1          | hinge  | rad/s  |
| 5   | `robot:panda0_joint6` angular velocity                       | -1         | 1          | hinge  | rad/s  |
| 6   | `robot:panda0_joint7` angular velocity                       | -1         | 1          | hinge  | rad/s  |
| 7   | `robot:r_gripper_finger_joint` linear velocity               | -1         | 1          | slide  | m/s    |
| 8   | `robot:l_gripper_finger_joint` linear velocity               | -1         | 1          | slide  | m/s    |
""",
}

# delta_t_description_all = {
#     "halfcheetah": """
# The `frame_skip` parameter is 5, where the frametime is 0.01, making the *dt* = 5 * 0.01 = 0.05.
# """,
#     "antmaze": """
# The `frame_skip` parameter is 5, where the frametime is 0.01, making the *dt* = 5 * 0.01 = 0.05.
# """,
# }

def get_d4rl_prompt(full_env_name: str, design_mode: str) -> str:
    env_name = full_env_name.split("-")[0]
    if "antmaze" in full_env_name:
        task_desc = task_description_all[full_env_name]
    else:
        task_desc = task_description_all[env_name]
    obs_desc = observation_description_all[env_name]
    action_desc = action_description_all[env_name]
    # delta_t_desc = delta_t_description_all[env_name]
    prompt = (
        common_prompt.role_instructor.format(
            reward_template=common_prompt.reward_template_dict[design_mode],
            common_tips_template=common_prompt.common_tips_dict[design_mode],
        )
        + common_prompt.task_description_template.format(task_description=task_desc)
        + common_prompt.observation_description_template.format(
            observation_description=obs_desc
        )
    )
    if "a" in design_mode:
        prompt += common_prompt.action_description_template.format(
            action_description=action_desc
        )
    # prompt += common_prompt.delta_t_description_template.format(
    #     delta_t_description=delta_t_desc
    # )
    return prompt
