from abc import abstractmethod
from typing import final

import gymnasium as gym
import stable_baselines3
from stable_baselines3 import PPO
import numpy as np
import torch
import torch.nn as nn
from stable_baselines3 import A2C, PPO

from config import *
from tqdm import tqdm
from together import Together
from Testers.Test import LMFeedbackVerifier
from abc import ABC, abstractmethod
import json, requests


def robomimic_obs_extractor(data, condition_list):
    used_keys = ['robot0_gripper_qpos', 'robot0_eef_quat', 'object', 'robot0_eef_pos']
    picked_dict = {key: np.asarray(data["state"][key]) for key in data["state"]}

    # if not "filtering_obs" in condition_list:
    #     obs_string = ""
    #     obs_string += "Gripper qpos: " + str(picked_dict["robot0_gripper_qpos"]) + "\n"
    #     obs_string += "End effector quaternion: " + str(picked_dict["robot0_eef_quat"]) + "\n"
    #     obs_string += "Object position: " + str(picked_dict["object"][0:3]) + "\n"
    #     obs_string += "Object quaternion: " + str(picked_dict["object"][3:7]) + "\n"
    #     obs_string += "Object relative position to end effector: " + str(picked_dict["object"][7:]) + "\n"
    #     obs_string += "End effector position: " + str(picked_dict["robot0_eef_pos"]) + "\n"
    # else:
    if "egocentric" in condition_list:
        obs_string = ""
        object_position = picked_dict["object"][0:3]
        lr = "left" if object_position[1] < 0 else "right"
        lr_d = abs(object_position[1])
        fb = "in front of" if object_position[0] > 0 else "behind"
        fb_d = abs(object_position[0])
        up = "above" if object_position[2] > 0 else "below"
        up_d = abs(object_position[2])
        lr_template = f"The object is {lr_d} to the {lr} of your gripper."
        fb_template = f"The object is {fb_d} to the {fb} of your gripper."
        ud_template = f"The object is {up_d} to the {up} of your gripper."
        obs_string += lr_template + "\n" + fb_template + "\n" + ud_template + "\n"

    else:
        obs_string = ""
        obs_string += "Gripper position: " + str(picked_dict["robot0_eef_pos"]) + "\n"
        obs_string += "Object position: " + str(picked_dict["object"][0:3]) + "\n"
        obs_string += "Object relative position to gripper: " + str(picked_dict["object"][7:]) + "\n"
        obs_string += "Gripper finger position: " + str(picked_dict["robot0_gripper_qpos"]) + "\n"
        gripper_state = picked_dict["robot0_gripper_qpos"][0] - picked_dict["robot0_gripper_qpos"][1]
        #This threshold is based on the object size, that is to say, the possible minimum distance between the gripper fingers
        if gripper_state > 0.054:
            obs_string += "Gripper status: " + "+1" + "\n"
        else:
            obs_string += "Gripper status: " + "-1" + "\n"
    return obs_string


def robomimic_text_rendering_object_position(object_position):
    threshold = 0.28
    explanation_str = """
Here is how will your action affect the object relative position to the end effector:
Move backward: decrease the x position 
Move forward: increase the x position 
Move left: decrease the y position 
Move right: increase the y position 
Move up: increase the z position 
Move down: decrease the z position
    """.strip()
    final_string = "Based on the object relative position to end effector, the end effector is on the "
    moving_direction_list = []
    if object_position[0] > threshold:
        moving_direction_list.append("front")
    elif object_position[0] < -threshold:
        moving_direction_list.append("back")
    if object_position[1] > threshold:
        moving_direction_list.append("right")
    elif object_position[1] < -threshold:
        moving_direction_list.append("left")

    if object_position[2] > threshold:
        moving_direction_list.append("upper")
    elif object_position[2] < -threshold:
        moving_direction_list.append("lower")
    final_string += ", ".join(moving_direction_list) + " side of the object."
    return explanation_str + "\n" + final_string

def robomimic_image_representation_extractor(data, condition_list):
    return data["image"]

class RobomimicVerifier(LMFeedbackVerifier):
    def __init__(self, env, feedback_type, data_path, condition_list, **kwargs):
        super().__init__(env, feedback_type, data_path, condition_list, **kwargs)
        self.use_action_map_dict = False
        self.obs_representation_extractor = robomimic_obs_extractor
        self.continuous_action = True
        self.continuous_action_threshold = 0.28
        self.use_action_map_dict = False
        self.image_representation_extractor = robomimic_image_representation_extractor
        #Base prompt from OLAF
        # self.base_prompt = """
        # You are a robot engineer. You need to help a robot to lift an object.
        # You have a robot arm which is the Franka Emika Panda robot arm, a single robot arm with 7 degrees of freedom.
        # The robot a parallel-jaw gripper equipped with two small finger pads, that comes shipped with the robot arm. The robot comes with a controller that takes in actions.
        # The expected action space of the OSC POSE controller (without a gripper) is [dx, dy, dz, droll, dpitch, dyaw].
        # The manual reads like the following:
        # [ dx, 0, 0, 0, 0, 0, grip] <-- Translation in x-direction (backward/forward)
        # [ 0, dy, 0, 0, 0, 0, grip] <-- Translation in y-direction (left/right)
        # [ 0, 0, dz, 0, 0, 0, grip] <-- Translation in z-direction (up/down)
        # [ 0, 0, 0, droll, 0, 0, grip] <-- Rotation in roll axis
        # [ 0, 0, 0, 0, dpitch, 0, grip] <-- Rotation in pitch axis
        # [ 0, 0, 0, 0, 0, dyaw, grip] <-- Rotation in yaw axis
        # If the grip = 1, the robot is having gripper closed. if the grip = -1, the robot is having gripper open.
        # Note on the gripper: The robot’s gripper should be closed if it is beginning to grasp the object, or when it is holding the object.
        # When it is approaching the object, the gripper is open.
        # If the robot gripper needs to be closed, you should continue to close the gripper, even if it is closed.
        # Similarly, if the robot gripper needs to be open, you should continue to open the gripper, even if it is already open.
        # The actions and observations are normalised to be in the range of [-1, 1].
        # """


        #PLACEHOLDER_FOR_ANOYNOMITY's prompt
        self.base_prompt = """
You are a robot engineer. You need to control a robot arm to lift an object. You need to observe the robot's current state and choose which action to do next in order to lift the object. You can move the robot in the X, Y, or Z direction, as well as open/close its gripper. The robot's state is given in the following format:
    Gripper position: [x y z ]
    Object position: [x y z]
    Object relative distance to gripper: [dx dy dz]
    Gripper finger state: open or closed
You can choose actions that move the gripper in the X, Y, Z direction, with additional control of the gripper. The format of the action is:
    [ resulting x, resulting y, resulting z, ignore, ignore, ignore, resulting gripper state]
Note on the gripper: Set grip to 1 to open the gripper and -1 to close the gripper. The robot’s gripper should be closed if it is beginning to grasp the object, or when it is holding the object. When it is approaching the object, the gripper is open. If the robot gripper needs to be closed, you should continue to close the gripper, even if it is closed. Similarly, if the robot gripper needs to be open, you should continue to open the gripper, even if it is already open.
        """
        if "XYZ_only" in self.condition_list:
            self.base_prompt = """
            You are a robot engineer. You need to control a robot arm to lift an object. You need to observe the robot's current state and choose which action to do next in order to lift the object. You can move the robot in the X, Y, or Z direction, as well as open/close its gripper. The robot's state is given in the following format:
                Gripper position: [x y z ]
                Object position: [x y z]
                Object relative distance to gripper: [dx dy dz]
                Gripper finger state: open or closed
            You can choose actions that move the gripper in the X, Y, Z direction, with additional control of the gripper. The format of the action is:
                [ resulting x, resulting y, resulting z]
            Note on the gripper: Set grip to 1 to open the gripper and -1 to close the gripper. The robot’s gripper should be closed if it is beginning to grasp the object, or when it is holding the object. When it is approaching the object, the gripper is open. If the robot gripper needs to be closed, you should continue to close the gripper, even if it is closed. Similarly, if the robot gripper needs to be open, you should continue to open the gripper, even if it is already open.
                    """
        if "XYZ_gripper" in self.condition_list:
            self.base_prompt = """
            You are a robot engineer. You need to control a robot arm to lift an object. You need to observe the robot's current state and choose which action to do next in order to lift the object. You can move the robot in the X, Y, or Z direction, as well as open/close its gripper. The robot's state is given in the following format:
                Gripper position: [x y z ]
                Object position: [x y z]
                Object relative distance to gripper: [dx dy dz]
                Gripper finger state: open or closed
            You can choose actions that move the gripper in the X, Y, Z direction, with additional control of the gripper. The format of the action is:
                [ resulting x, resulting y, resulting z, resulting gripper state]
            Note on the gripper: Set grip to 1 to open the gripper and -1 to close the gripper. The robot’s gripper should be closed if it is beginning to grasp the object, or when it is holding the object. When it is approaching the object, the gripper is open. If the robot gripper needs to be closed, you should continue to close the gripper, even if it is closed. Similarly, if the robot gripper needs to be open, you should continue to open the gripper, even if it is already open.
                    """


        self.base_prompt_egocentric = """
You are a robot arm trying to lift an object. You can move your gripper in the X, Y, or Z direction, as well as open/close your gripper. Each step, you see:
    Relative distance from your gripper to object in x direction
    Relative distance from your gripper to object in y direction
    Relative distance from your gripper in z direction
    Gripper finger state: open or closed
You can choose actions that move the gripper in the X, Y, Z direction, with additional control of the gripper.
Note on the gripper: You should close it when beginning to grasp the object, or when you are holding the object. When you are approaching the object, the gripper should be open. If the gripper needs to be closed, you should continue to close the gripper, even if it is closed. Similarly, if the gripper needs to be open, you should continue to open the gripper, even if it is already open.
            """

#         self.if_optimal_prompt_cot = """
# You current observation is:
# OBSREPRESENTATION
# Is action ACTION the best action you can take? Please think step by step.
# You should consider the position and angle of the robot end effector and object, and how they are related to each other. For example, if the robot end effector is on the left of the object, you should consider moving the robot end effector to the right.
# If the robot end effector is not aligned with the object in rotation, you should consider rotating the robot end effector to align with the object.
# Only give the answer in a new line in JSON format:
# {"reasoning": <REASONING>, "feedback": <FEEDBACK>}
# Where <FEEDBACK> is one of "YES" or "NO", <REASONING> is a string of your thinking steps.
#                 """

        self.if_optimal_prompt_cot = """
You current observation is:
OBSREPRESENTATION
Is action ACTION the best action you can take? Please think step by step.
You should consider the position of the robot gripper and object, and how they are related to each other. For example, if the robot gripper is on the left of the object, you should consider moving the robot gripper to the right.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "feedback": <FEEDBACK>}
Where <FEEDBACK> is one of "YES" or "NO", <REASONING> is a string of your thinking steps.
                        """

        self.action_advising_base_prompt_cot = """
You current observation is:
OBSREPRESENTATION 
Which action do you choose? Please think step by step.
You should consider the position of the robot gripper and object, and how they are related to each other. For example, if the robot gripper is on the left of the object, you should consider moving the robot gripper to the right.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "action": <ACTION>}
Where <ACTION> is the optimal action following the format of (dx, dy, dz, droll, dpitch, dyaw, gripper), <REASONING> is a string of your thinking steps.
                """

        if "XYZ_only" in self.condition_list:
            self.action_advising_base_prompt_cot = """
You current observation is:
OBSREPRESENTATION 
Which action do you choose? Please think step by step.
You should consider the position of the robot gripper and object, and how they are related to each other. For example, if the robot gripper is on the left of the object, you should consider moving the robot gripper to the right.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "action": <ACTION>}
Where <ACTION> is the optimal action following the format of (dx, dy, dz), <REASONING> is a string of your thinking steps.
                """

        if "XYZ_gripper" in self.condition_list:
            self.action_advising_base_prompt_cot = """
You current observation is:
OBSREPRESENTATION 
Which action do you choose? Please think step by step.
You should consider the position of the robot gripper and object, and how they are related to each other. For example, if the robot gripper is on the left of the object, you should consider moving the robot gripper to the right.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "action": <ACTION>}
Where <ACTION> is the optimal action following the format of (dx, dy, dz, gripper), <REASONING> is a string of your thinking steps.
                            """

        self.preference_base_prompt_cot = """
You current observation is:
OBSREPRESENTATION 
Given ACTION1 or ACTION2, which action is better? Please think step by step.
You should consider the position of the robot gripper and object, and how they are related to each other. For example, if the robot gripper is on the left of the object, you should consider moving the robot gripper to the right.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "preference": <PREFERENCE>}
Where <PREFERENCE> is one of "FIRST" or "SECOND", <REASONING> is a string of your thinking steps.
        """

        self.delta_action_prompt_cot = """
You current observation is:
OBSREPRESENTATION 
Given the following actions, which action is better? Please think step by step.
DELTA
You should consider the position of the robot gripper and object, and how they are related to each other. For example, if the robot gripper is on the left of the object, you should consider moving the robot gripper to the right.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "index": <INDEX>}
Where <INDEX> is one of action choice index, <REASONING> is a string of your thinking steps.
        """

        if "OLAF_style_delta_action" in self.condition_list and "XYZ_only" not in self.condition_list:
            self.delta_action_prompt_cot = """
You current observation is:
OBSREPRESENTATION 
Given the following action:
WRONGACTION
Here is a list of corrective actions that you can take. Each action will modify the current gripper state by the action amount. Which action do you choose? Please think step by step.
0. [+0.28, 0, 0, 0, 0, 0, 0]
1. [-0.28, 0, 0, 0, 0, 0, 0]
2. [0, +0.28, 0, 0, 0, 0, 0]
3. [0, -0.28, 0, 0, 0, 0, 0]
4. [0, 0, +0.28, 0, 0, 0, 0]
5. [0, 0, -0.28, 0, 0, 0, 0]
6. [0, 0, 0, +0.28, 0, 0, 0]
7. [0, 0, 0, -0.28, 0, 0, 0]
8. [0, 0, 0, 0, +0.28, 0, 0]
9. [0, 0, 0, 0, -0.28, 0, 0]   
10. [0, 0, 0, 0, 0, +0.28, 0]
11. [0, 0, 0, 0, 0, -0.28, 0]
12. [0, 0, 0, 0, 0, 0, 1]
13. [0, 0, 0, 0, 0, 0, -1]
You should consider the position of the robot gripper and object, and how they are related to each other. For example, if the robot gripper is on the left of the object, you should consider moving the robot gripper to the right.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "index": <INDEX>}
Where <INDEX> is one of action choice index, <REASONING> is a string of your thinking steps.
                    """.strip()

        if "XYZ_only" in self.condition_list and "OLAF_style_delta_action" in self.condition_list:
            self.delta_action_prompt_cot = """
You current observation is:
OBSREPRESENTATION 
Given the following action:
WRONGACTION
Here is a list of corrective actions that you can take. Each action will modify the current gripper state by the action amount. Which action do you choose? Please think step by step.
0. [+0.28, 0, 0, 0, 0, 0, 0]
1. [-0.28, 0, 0, 0, 0, 0, 0]
2. [0, +0.28, 0, 0, 0, 0, 0]
3. [0, -0.28, 0, 0, 0, 0, 0]
4. [0, 0, +0.28, 0, 0, 0, 0]
5. [0, 0, -0.28, 0, 0, 0, 0]
You should consider the position of the robot gripper and object, and how they are related to each other. For example, if the robot gripper is on the left of the object, you should consider moving the robot gripper to the right.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "index": <INDEX>}
Where <INDEX> is one of action choice index, <REASONING> is a string of your thinking steps.
                    """.strip()




        if "XYZ_gripper" in self.condition_list and "OLAF_style_delta_action" in self.condition_list:
            self.delta_action_prompt_cot = """
You current observation is:
OBSREPRESENTATION 
Given the following action:
WRONGACTION
Here is a list of corrective actions that you can take. Each action will modify the current gripper state by the action amount. Which action do you choose? Please think step by step.
    Action 0: Move gripper forward by 0.28
    Action 1: Move gripper backward by 0.28
    Action 2: Move gripper right by 0.28
    Action 3: Move gripper left by 0.28
    Action 4: Move gripper vertically upward by 0.28
    Action 5: Move gripper vertically downward by 0.28
    Action 6: Open gripper
    Action 7: Close gripper
You should consider the position of the robot gripper and object, and how they are related to each other. For example, if the robot gripper is on the left of the object, you should consider moving the robot gripper to the right.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "index": <INDEX>}
Where <INDEX> is one of action choice index, <REASONING> is a string of your thinking steps.
                                """.strip()


    def domain_specific_prompt_process(self, data, prompt):
        # if self.feedback_type == "binary_feedback":
        #     pass
        # if self.feedback_type == "action_advising":
        #     final_string = prompt.replace("HISTORY1", construct_current(data["state"])) \
        #         .replace("HISTORY2", "None\n" if len(data["history"]) == 1 else process_history_list(data["history"])) \
        #         .replace("POSSIBLELIST", join_list_of_strings(data["possible_actions"]))
        # if self.feedback_type == "preference":
        #     final_string = prompt.replace("HISTORY1", construct_current(data["state"])) \
        #         .replace("HISTORY2", "None\n" if len(data["history"]) == 1 else process_history_list(data["history"])) \
        #         .replace("POSSIBLELIST", join_list_of_strings(data["possible_actions"]))
        final_string = prompt
        if self.feedback_type == "delta_action":
            if not "OLAF_style_delta_action" in self.condition_list:
                if "XYZ_only" in self.condition_list:
                    possible_actions = data["delta_actions"]
                    delta_string = ""
                    for index, delta_action in enumerate(possible_actions):
                        if index < 6:
                            delta_string += (f"Index {index}: {delta_action}\n")
                    final_string = prompt.replace("DELTA", delta_string)
                elif "XYZ_gripper" in self.condition_list:
                    possible_actions = data["delta_actions"]
                    delta_string = ""
                    for index, delta_action in enumerate(possible_actions):
                        if index < 6 or index == 12 or index == 13:
                            delta_string += (f"Index {index}: {delta_action}\n")
                    final_string = prompt.replace("DELTA", delta_string)
                else:
                    possible_actions = data["delta_actions"]
                    delta_string = ""
                    for index, delta_action in enumerate(possible_actions):
                        delta_string += (f"Index {index}: {delta_action}\n")
                    final_string = prompt.replace("DELTA", delta_string)

            else:
                final_string = prompt.replace("WRONGACTION", str(data["wrong_action"]))
                # print(final_string)

        if "text_rendering" in self.condition_list:
            final_string += robomimic_text_rendering_object_position(data["state"]["object"][0:3])
        
        def generate_history(data):
            history = data["history"]
            if len(history) == 0:
                return ""
            history_str = "\nHere is the history of your actions:\n"
            for i in range(0, len(history)):
                from Testers.Test import image_to_base64_data_url
                image_template_str = "<PATH>{content}</PATH>"
                current_state = history[i]["image"]
                current_state_str = image_template_str.format(content=image_to_base64_data_url(current_state))
                history_str += "Step {}: You see: \n{},You took action {}.\n".format(
                    i + 1, current_state_str,
                    history[i]["action"],
                    )

            return history_str
        
        def generate_text_history(data, condition_list):
            history = data["history"]
            if len(history) == 0:
                return ""
            history_str = "\nHere is the history of your actions:\n"
            for i in range(0, len(history)):
                current_state = history[i]["state"]
                state_str = robomimic_obs_extractor(data=history[i], condition_list=condition_list)
                history_str += "Step {}: You see: \n{},You took action {}.\n".format(
                    i + 1, state_str,
                    history[i]["action"],
                    )

            return history_str

        
        if "history_context" in self.condition_list:
            # "history_context requires image_observation"
            if "image_observation" in self.condition_list:
                history_str = generate_history(data)
            else:
                history_str = generate_text_history(data, condition_list=self.condition_list)
            final_string = final_string + history_str

        return final_string
