from Testers.MiniGridTest import rotate_to_egocentric
from config import *
from Testers.Test import LMFeedbackVerifier

def cliff_walking_egocentric_str_generate(position):
    cliff_walking_egocentric_base_str = """
    You see LEFT on your left, RIGHT on your right, UP above you, and DOWN below you
    """.strip()
    r = position[0]
    c = position[1]
    left_coor = (r, c - 1)
    right_coor = (r, c + 1)
    up_coor = (r - 1, c)
    down_coor = (r + 1, c)

    def get_position_str(coor):
        if coor[0] == 3 and coor[1] >= 1 and coor[1] <= 10:
            return "a hole"
        elif coor[0] == 3 and coor[1] == 11:
            return "the goal"
        elif coor[0] < 0 or coor[0] > 3 or coor[1] < 0 or coor[1] > 11:
            return "boundary"
        else:
            return "a blank tile"


    return cliff_walking_egocentric_base_str.replace("LEFT", get_position_str(left_coor)).replace("RIGHT", get_position_str(right_coor)).replace("UP", get_position_str(up_coor)).replace("DOWN", get_position_str(down_coor))


def cliff_walking_state_to_coordinate(state):
    r = state // 12
    c = state % 12
    return (r, c)

def cliff_walking_obs_extractor(data, condition_list):
    state = data["state"]
    r = state// 12
    c = state % 12
    final_str = str((r, c))
    if "egocentric" in condition_list:
        final_str += "." + cliff_walking_egocentric_str_generate((r, c))
    return final_str

def action_to_direction(action):
    if action == 0:
        return "UP"
    elif action == 1:
        return "RIGHT"
    elif action == 2:
        return "DOWN"
    elif action == 3:
        return "LEFT"


class CliffWalkingVerifier(LMFeedbackVerifier):
    def __init__(self, env, feedback_type, data_path, condition_list, **kwargs):
        super().__init__(env, feedback_type, data_path, condition_list, **kwargs)


        self.use_action_map_dict = True
        self.number_to_action_dict = {
            0: "UP",
            1: "RIGHT",
            2: "DOWN",
            3: "LEFT"
        }
        self.obs_representation_extractor = cliff_walking_obs_extractor
        if self.feedback_type == "action_advising":
            self.feedback_type_to_translation_dict = {
                "UP": 0, "RIGHT": 1, "DOWN": 2, "LEFT": 3
            }
        self.feedback_type_to_verify_key_dict = \
            {
                "binary_feedback": "feedback",
                "preference": "preference",
                "action_advising": "direction",
                "goal_advising": ["row", "column"]
            }
        self.base_prompt = """
You are an agent trying to move to position (3, 11).
If you are in position (r, c):
- going UP will take you to (r-1, c)
- going DOWN will take you to (r+1, c)
- going LEFT will take you to (r, c-1)
- going RIGHT will take you to (r, c+1)
The grid world is 4x12 with zero-indexed positions.
There are holes in the last row, with positions (3, 1) ... (3, 10).
Do not exceed the boundaries of the world, and do not fall into the holes.
        """

        self.base_prompt_egocentric = """
You are an agent trying to move to goal position (3, 11).
If you are in position (r, c):
- going UP will take you to (r-1, c)
- going DOWN will take you to (r+1, c)
- going LEFT will take you to (r, c-1)
- going RIGHT will take you to (r, c+1)
The grid world is 4x12 with zero-indexed positions.
There are holes in the last row, with positions (3, 1) ... (3, 10).
While you are trying to move to the goal, do not exceed the boundaries of the world, and do not fall into the holes. You can safely move to or stay on a blank tile.
        """

        if "unknown_dynamics" in condition_list:
            self.base_prompt = """
You are an agent trying to move to position (3, 11).
You can take 4 possible actions:
- going UP 
- going DOWN
- going LEFT
- going RIGHT 
The grid world is 4x12 with zero-indexed positions.
There are holes in the last row, with positions (3, 1) ... (3, 10).
Do not exceed the boundaries of the world, and do not fall into the holes.
            """.strip()


        self.if_optimal_prompt_cot = """
You are in position OBSREPRESENTATION. Is action ACTION the best action you can take? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "feedback": <FEEDBACK>}
Where <FEEDBACK> is one of "YES" or "NO", <REASONING> is a string of your thinking steps.
                """.strip()

        self.action_advising_base_prompt_cot = """
You are in position OBSREPRESENTATION. Which direction do you choose? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "direction": <DIRECTION>}
Where <DIRECTION> is one of "UP", "DOWN", "LEFT", or "RIGHT", <REASONING> is a string of your thinking steps.
                """.strip()

        self.preference_base_prompt_cot = """
You are in position OBSREPRESENTATION. Given ACTION1 or ACTION2, which action is better? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>,"preference": <PREFERENCE>}, where <PREFERENCE> is one of "FIRST", "SECOND", <REASONING> is a string of your thinking steps.
        """.strip()

        self.goal_advising_base_prompt_cot = """
You are in position OBSREPRESENTATION. What coordinates should you move to in the next step? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "row": <ROW>, "column": <COLUMN>}
Where <ROW> is the number of the row you should move, and <COLUMN> is the number of column you should move, <REASONING> is a string of your thinking steps.
        """

        self.explicit_thinking_guides = """
You should think about these questions.
What is your current goal?
Based on the rules you know, which coordinates do you need to reach the goal?
Based on what you see and what you known, can you reach any of the coordinates now?
If no, comparing the possible actions you can take, which one leads you closer to the goal? 
        """



    def domain_specific_prompt_process(self, data, prompt):

        def history_context_prompt_process(data):
            history = data["history"]
            if len(history) == 0:
                return ""
            history_str = "\nHere is the history of your actions:\n"
            for i in range(0, len(history)):
                history_str += "Step {}: You were at position {}, and you took action {}, then you reached {}. {}\n".format(i + 1, cliff_walking_state_to_coordinate(history[i]["state"]), action_to_direction(history[i]["action"]), cliff_walking_state_to_coordinate(history[i]["next_state"]), history[i]["extra"])
            return history_str

        if "history_context" in self.condition_list:
            prompt += history_context_prompt_process(data)

        return prompt



def generate_ascii_state_cliff_walking(row, col, direction, return_grid=False):
    """
    Generate an ASCII representation of the cliff walking grid.

    Parameters:
    - row (int): Current row of the player (0-indexed).
    - col (int): Current column of the player (0-indexed).
    - direction (str): Direction the player is facing (e.g., 'UP', 'DOWN', 'RIGHT', 'LEFT').

    Returns:
    - str: ASCII representation of the grid.
    """
    grid = [['.' for _ in range(12)] for _ in range(4)]

    # Mark the start, goal, and cliffs
    grid[3][11] = 'G'  # Goal
    for c in range(1, 11):
        grid[3][c] = 'X'  # Cliffs

    # Mark the player's position and direction
    direction_marker = {'UP': '^', 'DOWN': 'v', 'RIGHT': '>', 'LEFT': '<'}
    if 0 <= row < 4 and 0 <= col < 12 and grid[row][col] not in {'X'}:
        grid[row][col] = direction_marker.get(direction, '?')

    if return_grid:
        return grid
    else:
        return '\n'.join([' '.join(row) for row in grid])


def cliff_walking_obs_extractor_directionality(data, condition_list):
    # coordinate = (data["state"][0], data["state"][1])
    # position_str = "\nYou are at position {}.".format(coordinate)
    position_str = ""
    if "egocentric" in condition_list:
        grid = generate_ascii_state_cliff_walking(data["state"][0], data["state"][1], data["state"][2], return_grid=True)
        ego_grid = rotate_to_egocentric(grid)
        return '\n'.join([' '.join(row) for row in ego_grid]) + position_str
    else:
        ascii_art = generate_ascii_state_cliff_walking(data["state"][0], data["state"][1], data["state"][2])
        return ascii_art + position_str

class CliffWalkingDirectionalityVerifier(LMFeedbackVerifier):
    def __init__(self, env, feedback_type, data_path, condition_list, **kwargs):
        super().__init__(env, feedback_type, data_path, condition_list, **kwargs)


        self.use_action_map_dict = True
        self.number_to_action_dict = {
            0: "TURN LEFT",
            1: "TURN RIGHT",
            2: "MOVE FORWARD"
        }

        self.obs_representation_extractor = cliff_walking_obs_extractor_directionality
        if self.feedback_type == "action_advising":
            self.feedback_type_to_translation_dict = {
                "TURN LEFT": 0, "TURN RIGHT":1, "MOVE FORWARD":2
            }
        self.feedback_type_to_verify_key_dict = \
            {
                "binary_feedback": "feedback",
                "preference": "preference",
                "action_advising": "action"
            }
        self.base_prompt = """
You are an agent trying to move to position (3, 11).
The grid world is 4x12 with zero-indexed positions.
There are holes in the last row, with positions (3, 1) ... (3, 10).
Do not exceed the boundaries of the world, and do not fall into the holes.
The state is described with an ASCII art. In the ASCII art, “G” means goal. “.” means empty tile. "X" means the hole. “^” mean the agent who is facing upwards. “v” mean the agent who is facing downwards. “<” means the agent who is facing leftwards. “>” means the agent who is facing rightwards.
The possible actions are:
1. TURN LEFT, changing the direction to left, for example, the agent “^” will become “<”.
2. TURN RIGHT, changing the direction to right, for example, the agent “^” will become “>”.
3. MOVE FORWARD, moving forwards for one tile
        """

#         self.base_prompt_egocentric = """
# You are an agent trying to move to position (3, 11).
# The grid world is 4x12 with zero-indexed positions.
# There are holes in the last row, with positions (3, 1) ... (3, 10).
# Do not exceed the boundaries of the world, and do not fall into the holes.
# The state is described with an ASCII art. In the ASCII art, “G” means goal. “.” means empty tile. “^” mean the agent.
# The possible actions are:
# 1. TURN LEFT, changing the direction to left
# 2. TURN RIGHT, changing the direction to right
# 3. MOVE FORWARD, moving forwards for one tile
#         """
        self.base_prompt_egocentric = """
You are an agent trying to move to goal.
The grid world is of size 4x12.
There are holes in the grid.
Do not exceed the boundaries of the world, and do not fall into the holes.
The state is described with an ASCII art. In the ASCII art, “G” means goal. “.” means empty tile. “^” means the agent. "X" means the hole.
The possible actions are:
1. TURN LEFT, changing the direction to left
2. TURN RIGHT, changing the direction to right
3. MOVE FORWARD, moving forwards for one tile
        """

        #         if "unknown_dynamics" in condition_list:
#             self.base_prompt = """
# You are an agent trying to move to position (3, 11).
# You can take 4 possible actions:
# - going UP
# - going DOWN
# - going LEFT
# - going RIGHT
# The grid world is 4x12 with zero-indexed positions.
# There are holes in the last row, with positions (3, 1) ... (3, 10).
# Do not exceed the boundaries of the world, and do not fall into the holes.
#             """.strip()

        self.if_optimal_prompt_cot = """
You state is:
OBSREPRESENTATION 
Is action ACTION the best action you can take? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "feedback": <FEEDBACK>}
Where <FEEDBACK> is one of "YES" or "NO", <REASONING> is a string of your thinking steps.
                """.strip()

        self.action_advising_base_prompt_cot = """
You state is:
OBSREPRESENTATION 
Which direction do you choose? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "action": <ACTION>}
Where <ACTION> is one of "TURN LEFT", "TURN RIGHT" or "MOVE FORWARD", <REASONING> is a string of your thinking steps.
                """.strip()

        self.preference_base_prompt_cot = """
You state is:
OBSREPRESENTATION 
Given ACTION1 or ACTION2, which action is better? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>,"preference": <PREFERENCE>}, where <PREFERENCE> is one of "FIRST", "SECOND", <REASONING> is a string of your thinking steps.
        """.strip()



    def domain_specific_prompt_process(self, data, prompt):
        return prompt


if __name__ == "__main__":
    # data_path = PERSISTENT_DATA_PATH + "/CliffWalking/cliffwalking_goal_advising_traverse.npy"
    # verifier = CliffWalkingVerifier("CliffWalking-v0", "goal_advising", data_path, ["egocentric"])
    # verifier.verify(5, "ollama", model="llama3.1:8b-instruct-fp16", url = "http://localhost:11434/api/chat", max_item_num=-1)
    #
    # data_path = PERSISTENT_DATA_PATH + "/CliffWalking/cliffwalking_binary_feedback_traverse.npy"
    # verifier = CliffWalkingVerifier("CliffWalking-v0", "binary_feedback", data_path, ["egocentric"])
    # verifier.verify(5, "ollama", model="llama3.1:8b-instruct-fp16", url = "http://localhost:11434/api/chat", max_item_num=-1)
    #
    # data_path = PERSISTENT_DATA_PATH + "/CliffWalking/cliffwalking_action_advising_traverse.npy"
    # verifier = CliffWalkingVerifier("CliffWalking-v0", "action_advising", data_path, ["egocentric"])
    # verifier.verify(5, "ollama", model="llama3.1:8b-instruct-fp16", url = "http://localhost:11434/api/chat", max_item_num=-1)
    #
    # data_path = PERSISTENT_DATA_PATH + "/CliffWalking/cliffwalking_preference_traverse.npy"
    # verifier = CliffWalkingVerifier("CliffWalking-v0", "preference", data_path, ["egocentric"])
    # verifier.verify(2, "ollama", model="llama3.1:8b-instruct-fp16", url = "http://localhost:11434/api/chat", max_item_num=-1)


    data_path = PERSISTENT_DATA_PATH + "/CliffWalkingDirectionality/cliffwalking_directionality_action_advising_traverse.npy"
    verifier = CliffWalkingDirectionalityVerifier("CliffWalking-v0", "action_advising", data_path, ["egocentric"])
    verifier.verify(5, "together", model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", url = "http://localhost:11434/api/chat", max_item_num=-1, api_key="4ce7195f8c3d7ee5380b880b511ce8ffe200eba5cc51078d0a01a4b0e7ae857c")

    data_path = PERSISTENT_DATA_PATH + "/CliffWalkingDirectionality/cliffwalking_directionality_binary_feedback_traverse.npy"
    verifier = CliffWalkingDirectionalityVerifier("CliffWalking-v0", "binary_feedback", data_path, ["egocentric"])
    verifier.verify(5, "together", model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", url = "http://localhost:11434/api/chat", max_item_num=-1, api_key="4ce7195f8c3d7ee5380b880b511ce8ffe200eba5cc51078d0a01a4b0e7ae857c")


    data_path = PERSISTENT_DATA_PATH + "/CliffWalkingDirectionality/cliffwalking_directionality_preference_traverse.npy"
    verifier = CliffWalkingDirectionalityVerifier("CliffWalking-v0", "preference", data_path, ["egocentric"])
    verifier.verify(1, "together", model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", url = "http://localhost:11434/api/chat", max_item_num=-1, api_key="4ce7195f8c3d7ee5380b880b511ce8ffe200eba5cc51078d0a01a4b0e7ae857c")



