import time
from abc import abstractmethod
from enum import verify
from typing import final

import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
from stable_baselines3 import A2C, PPO
from openai import OpenAI
import os
from groq import Groq

import minigrid
from minigrid.wrappers import ImgObsWrapper, FullyObsWrapper, PositionBonus, ActionBonus
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from torch.ao.quantization.backend_config.onednn import observation_type

import config
from Testers.Test import LMFeedbackVerifier
from config import *
from tqdm import tqdm
from together import Together




def rotate_to_egocentric(m):
    rotation_number_dict = {
        "V": 2,
        "v": 2,
        ">": 1,
        "^": 0,
        "<": 3
    }
    agent_pos = None
    for i, v in np.ndenumerate(m):
        if v in ["V", "v", "^", "<", ">"]:
            agent_pos = i
            break

    if agent_pos == None:
        return m

    tmp = m
    rotation_number = rotation_number_dict[m[agent_pos[0]][agent_pos[1]]]
    tmp[agent_pos[0]][agent_pos[1]] = "^"

    for i in range(rotation_number):
        tmp = np.rot90(tmp)
    return tmp





class MinigridFeaturesExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.Space, features_dim: int = 512,
                 normalized_image: bool = False) -> None:
        super().__init__(observation_space, features_dim)
        n_input_channels = observation_space.shape[0]
        self.cnn = nn.Sequential(
            nn.Conv2d(n_input_channels, 16, (2, 2)),
            nn.ReLU(),
            nn.Conv2d(16, 32, (2, 2)),
            nn.ReLU(),
            nn.Conv2d(32, 64, (2, 2)),
            nn.ReLU(),
            nn.Flatten(),
        )

        # Compute shape by doing one forward pass
        with torch.no_grad():
            n_flatten = self.cnn(torch.as_tensor(observation_space.sample()[None]).float()).shape[1]

        self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU())

    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        return self.linear(self.cnn(observations))






def replace_prompt_string(prompt, placeholder, replace_string):
    return prompt.replace(placeholder, replace_string)


def array_to_string(arr):
    # Join each row into a string, and then join all rows with newline characters
    return "\n".join("".join(row) for row in arr)


def doorkey_obs_representation_extractor(data, condition_list):
    # print(data["state"])
    ascii_art = data["state"]["ObsString"]
    # print(ascii_art)
    if "egocentric" in condition_list:
        ascii_art = rotate_to_egocentric(ascii_art)
    ascii_art = array_to_string(ascii_art)
    # print(ascii_art)
    return ascii_art


def doorkey_image_representation_extractor(data, condition_list):
    return data["state"]["image"]

class DoorKeyVerifier(LMFeedbackVerifier):
    def __init__(self, env, feedback_type, data_path, condition_list, **kwargs):
        super().__init__(env, feedback_type, data_path, condition_list, **kwargs)
        self.obs_representation_extractor = doorkey_obs_representation_extractor
        self.image_representation_extractor = doorkey_image_representation_extractor
        self.number_to_action_dict = {
            0: "TURN LEFT",
            1: "TURN RIGHT",
            2: "MOVE FORWARD",
            3: "PICK UP THE KEY",
            5: "UNLOCK THE DOOR"
        }
        if self.no_check_correct and self.no_load_data:
            # Online case
            self.action_to_number_dict = {
                "TURN LEFT": 0,
                "TURN RIGHT": 1,
                "MOVE FORWARD": 2,
                "PICK UP THE KEY": 3,
                "UNLOCK THE DOOR": 4
            }
        else:
            self.action_to_number_dict = {
                "TURN LEFT": 0,
                "TURN RIGHT": 1,
                "MOVE FORWARD": 2,
                "PICK UP THE KEY": 3,
                "UNLOCK THE DOOR": 5
            }
        self.preference_to_number_dict = {
            "FIRST": 1,
            "SECOND": -1
        }
        self.response_to_numeric_dict = {
            "YES": 1,
            "NO": -1
        }
        if feedback_type == "binary_feedback":
            self.feedback_type_to_translation_dict = self.response_to_numeric_dict
        if feedback_type == "preference":
            self.feedback_type_to_translation_dict = self.preference_to_number_dict
        if feedback_type == "action_advising":
            self.feedback_type_to_translation_dict = self.action_to_number_dict

        self.base_prompt = """
You need to solve Minigrid Doorkey.
Minigrid Doorkey involves crossing a gridworld, where you need to pick up the key, unlock the door and reach the goal. Do not hit the wall.
The state is described with an ASCII art. In the ASCII art, “#” means wall. “K” means key. “D” mean door. “G” means goal. “.” means empty tile. “^” mean the agent who is facing upwards. “V” mean the agent who is facing downwards. “<” means the agent who is facing leftwards. “>” means the agent who is facing rightwards.
The possible actions are:
1. TURN LEFT, changing the direction to left, for example, the agent “^” will become “<”.
2. TURN RIGHT, changing the direction to right, for example, the agent “^” will become “>”.
3. MOVE FORWARD, moving forwards for one tile
4. PICK UP THE KEY, picking up the key, this is only possible when you are facing the tile with key on it
5. UNLOCK THE DOOR, this action is only possible when you have the key.
        """
        #Remove change direction thing TODO
        self.base_prompt_egocentric = """
You need to solve Minigrid Doorkey.
Minigrid Doorkey involves crossing a gridworld, where you need to pick up the key, unlock the door and reach the goal. Do not hit the wall.
The state is described with an ASCII art in first-person perspective. In the ASCII art, “#” means wall. “K” means key. “D” mean door. “G” means goal. “.” means empty tile. 
"^" is where you are. You are always facing forward.
The possible actions are:
1. TURN LEFT.
2. TURN RIGHT.
3. MOVE FORWARD, moving forwards for one tile
4. PICK UP THE KEY, picking up the key, this is only possible when you are facing the tile with key on it.
5. UNLOCK THE DOOR, this action is only possible when you have the key.
        """


        if "unknown_dynamics" in condition_list:
            self.base_prompt = """
            You need to solve Minigrid Doorkey.
            Minigrid Doorkey involves crossing a gridworld, where you need to pick up the key, unlock the door and reach the goal. Do not hit the wall.
            The state is described with an ASCII art. In the ASCII art, “#” means wall. “K” means key. “D” mean door. “G” means goal. “.” means empty tile. “^” mean the agent who is facing upwards. “V” mean the agent who is facing downwards. “<” means the agent who is facing leftwards. “>” means the agent who is facing rightwards.
            The possible actions are:
            1. TURN LEFT
            2. TURN RIGHT
            3. MOVE FORWARD
            4. PICK UP THE KEY
            5. UNLOCK THE DOOR
                    """
            # Remove change direction thing TODO
            self.base_prompt_egocentric = """
            You need to solve Minigrid Doorkey.
            Minigrid Doorkey involves crossing a gridworld, where you need to pick up the key, unlock the door and reach the goal. Do not hit the wall.
            The state is described with an ASCII art in first-person perspective. In the ASCII art, “#” means wall. “K” means key. “D” mean door. “G” means goal. “.” means empty tile. 
            "^" is where you are. You are always facing forward.
            The possible actions are:
            1. TURN LEFT
            2. TURN RIGHT
            3. MOVE FORWARD
            4. PICK UP THE KEY
            5. UNLOCK THE DOOR
                    """

        self.if_optimal_prompt_cot = """
You are in the following state represented in ascii art:
OBSREPRESENTATION
The door is DOORSTATUS. You KEYSTATUS have the key. 
Is action ACTION the best action you can take? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "feedback": <FEEDBACK>}
Where <FEEDBACK> is one of "YES" or "NO", <REASONING> is a string of your thinking steps.
        """

        self.if_optimal_prompt_explicit_action_by_action_cot = """ 
You are in the following state represented in ascii art:
OBSREPRESENTATION
The door is DOORSTATUS. You KEYSTATUS have the key. 
Is action ACTION the best action you can take? Please think step by step and consider all possible actions, which are TURN LEFT, TURN RIGHT, MOVE FORWARD, PICK UP THE KEY, UNLOCK THE DOOR.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "feedback": <FEEDBACK>}
Where <FEEDBACK> is one of "YES" or "NO", <REASONING> is a string of your thinking steps.
        """

        self.action_advising_base_prompt_cot = """
You are in the following state represented in ascii art:
OBSREPRESENTATION
The door is DOORSTATUS. You KEYSTATUS have the key. 
Which action do you choose? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "action": <ACTION>}
Where <ACTION> is one of "TURN LEFT", "TURN RIGHT", "MOVE FORWARD", "PICK UP THE KEY" or "UNLOCK THE DOOR", <REASONING> is a string of your thinking steps.
        """

        self.action_advising_base_prompt_explicit_action_by_action_cot = """
You are in the following state represented in ascii art:
OBSREPRESENTATION
The door is DOORSTATUS. You KEYSTATUS have the key. 
Which action do you choose? Please think step by step and consider all possible actions, which are TURN LEFT, TURN RIGHT, MOVE FORWARD, PICK UP THE KEY, UNLOCK THE DOOR.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "action": <ACTION>}
Where <ACTION> is one of "TURN LEFT", "TURN RIGHT", "MOVE FORWARD", "PICK UP THE KEY" or "UNLOCK THE DOOR", <REASONING> is a string of your thinking steps.
        """

        self.preference_base_prompt_cot = """
You are in the following state represented in ascii art:
OBSREPRESENTATION
The door is DOORSTATUS. You KEYSTATUS have the key. 
Given ACTION1 or ACTION2, which action is better? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "preference": <PREFERENCE>}
Where <PREFERENCE> is one of "FIRST" or "SECOND", <REASONING> is a string of your thinking steps.
        """

        self.explicit_thinking_guides = """
You should think about these questions.
What is your current goal?
Based on the rules you know, what subgoals do you need to achieve?
Based on what you see and what you have, what subgoals have you achieved?
If you have not achieved all of the subgoals, based on the rules and doable actions, can you achieve any subgoals now?
If yes, which subgoal do you want to achieve next?
If no, what do you need or where should you go next?
            """.strip()



    def domain_specific_prompt_process(self, data, prompt):

        def matrix_to_string(matrix):
            # print(matrix)
            if "egocentric" in self.condition_list:
                ascii_art = rotate_to_egocentric(matrix)
            else:
                ascii_art = matrix
            ascii_art = array_to_string(ascii_art)
            # print(ascii_art)
            return ascii_art

        def generate_history(data):
            history = data["state"]["history"]
            if len(history) == 0:
                return ""
            history_str = "\nHere is the history of your actions:\n"
            for i in range(0, len(history)):
                if "image_observation" in self.condition_list:
                    from Testers.Test import image_to_base64_data_url
                    image_template_str = "<PATH>{content}</PATH>"
                    current_state = history[i]["image"]
                    next_state = history[i]["next_image"]
                    current_state_str = image_template_str.format(content=image_to_base64_data_url(current_state))
                    next_state_str = image_template_str.format(content=image_to_base64_data_url(next_state))
                    history_str += "Step {}: You see: \n{},You took action {}.\n".format(
                        i + 1, current_state_str,
                        self.number_to_action_dict[history[i]["action"]],
                        )
                else:
                    history_str += "Step {}: You see: \n{}\n, {} You took action {}.\n".format(
                        i + 1, matrix_to_string(history[i]["state"]), history[i - 1]["extra"] if i > 1 else "You don't have the key and the door is locked.",
                        self.number_to_action_dict[history[i]["action"]],
                        )
            return history_str

        if self.feedback_type == "binary_feedback":
            final_string = prompt.replace("DOORSTATUS", "unlocked" if data["state"]["DoorUnlocked"] else "locked").replace( \
                                "KEYSTATUS", "" if data["state"]["HoldingKey"] else "do not")
        if self.feedback_type == "action_advising":
            final_string = prompt.replace("DOORSTATUS", "unlocked" if data["state"]["DoorUnlocked"] else "locked").replace( \
                                "KEYSTATUS", "" if data["state"]["HoldingKey"] else "do not")
        if self.feedback_type == "preference":
            final_string = prompt.replace("DOORSTATUS", "unlocked" if data["state"]["DoorUnlocked"] else "locked").replace( \
                                "KEYSTATUS", "" if data["state"]["HoldingKey"] else "do not")

        if "history_context" in self.condition_list:
            history_str = generate_history(data)
            final_string = final_string + history_str

        return final_string

    # def generate_prompt(self, data):
    #
    #     base_prompt = self.base_prompt
    #
    #     if "egocentric" in self.condition_list:
    #         base_prompt = self.base_prompt_egocentric
    #
    #     action_dict = self.action_to_number_dict
    #     if_optimal_prompt_cot = self.if_optimal_prompt_cot
    #     action_advising_base_prompt_cot = self.action_advising_base_prompt_cot
    #     preference_base_prompt_cot = self.preference_base_prompt_cot
    #
    #     ascii_art = data["state"]["ObsString"]
    #     if "egocentric" in self.condition_list:
    #         ascii_art = rotate_to_egocentric(ascii_art)
    #     ascii_art = array_to_string(ascii_art)
    #
    #     if self.feedback_type == "binary_feedback":
    #         final_string = (base_prompt +
    #                         if_optimal_prompt_cot.replace("ASCIIART", ascii_art)
    #                         .replace("DOORSTATUS", "unlocked" if data["state"]["DoorUnlocked"] else "locked").replace(
    #                             "KEYSTATUS", "" if data["state"]["HoldingKey"] else "do not")
    #                         .replace("ACTION", action_dict[data["action"]]))
    #     if self.feedback_type == "action_advising":
    #         final_string = (base_prompt +
    #                         action_advising_base_prompt_cot.replace("ASCIIART", ascii_art)
    #                         .replace("DOORSTATUS", "unlocked" if data["state"]["DoorUnlocked"] else "locked").replace(
    #                             "KEYSTATUS", "" if data["state"]["HoldingKey"] else "do not"))
    #     if self.feedback_type == "preference":
    #         final_string = (base_prompt +
    #                         preference_base_prompt_cot.replace("ASCIIART", ascii_art)
    #                         .replace("DOORSTATUS", "unlocked" if data["state"]["DoorUnlocked"] else "locked").replace(
    #                             "KEYSTATUS", "" if data["state"]["HoldingKey"] else "do not")
    #                         .replace("ACTION1", action_dict[data["action1"]]).replace("ACTION2", action_dict[data["action2"]]))
    #     return final_string


    # # TODO code reuse among these three functions?
    # def verify_binary_feedback(self, response, data):
    #     response_to_numeric_dict = {
    #         "YES": 1,
    #         "NO": -1
    #     }
    #     ret_dict = {"State": data, "Response": response}
    #
    #     expert_feedback = data["feedback"]
    #     try:
    #         response = response.split("{")
    #         response = response[-1]
    #         response = "{" + response
    #         llm = json.loads(response)
    #     except Exception as e:
    #         print(e)
    #         ret_dict["Correct"] = False
    #         ret_dict["JSONCorrect"] = False
    #         return ret_dict
    #     llm_feedback = response_to_numeric_dict[llm["feedback"].upper()]
    #     ret_dict["JSONCorrect"] = True
    #     if llm_feedback == expert_feedback:
    #         ret_dict["Correct"] = True
    #     else:
    #         ret_dict["Correct"] = False
    #
    #     return ret_dict
    #
    # def verify_action_advising(self, response, data):
    #     action_to_number_dict = {
    #         "TURN LEFT": 0,
    #         "TURN RIGHT": 1,
    #         "MOVE FORWARD": 2,
    #         "PICK UP THE KEY": 3,
    #         "UNLOCK THE DOOR": 5
    #     }
    #     ret_dict = {"State": data, "Response": response}
    #
    #     expert_feedback = data["feedback"]
    #     try:
    #         response = response.split("{")
    #         response = response[-1]
    #         response = "{" + response
    #         llm = json.loads(response)
    #     except Exception as e:
    #         print(e)
    #         ret_dict["Correct"] = False
    #         ret_dict["JSONCorrect"] = False
    #         return ret_dict
    #     llm_feedback = action_to_number_dict[llm["action"].upper()]
    #     ret_dict["JSONCorrect"] = True
    #     # print(data)
    #     # print(llm_feedback, expert_feedback)
    #     if llm_feedback in expert_feedback:
    #         ret_dict["Correct"] = True
    #     else:
    #         ret_dict["Correct"] = False
    #
    #     return ret_dict
    #
    #
    # def verify_preference(self, response, data):
    #
    #     preference_to_number_dict = {
    #         "FIRST": 1,
    #         "SECOND": -1
    #     }
    #     ret_dict = {"State": data, "Response": response}
    #
    #     expert_feedback = data["feedback"]
    #     try:
    #         response = response.split("{")
    #         response = response[-1]
    #         response = "{" + response
    #         llm = json.loads(response)
    #     except Exception as e:
    #         print(e)
    #         ret_dict["Correct"] = False
    #         ret_dict["JSONCorrect"] = False
    #         return ret_dict
    #     llm_feedback = preference_to_number_dict[llm["preference"].upper()]
    #     ret_dict["JSONCorrect"] = True
    #     if llm_feedback == expert_feedback or expert_feedback == 0:
    #         ret_dict["Correct"] = True
    #     else:
    #         ret_dict["Correct"] = False
    #
    #     return ret_dict



class FourRoomsVerifier(LMFeedbackVerifier):
    def __init__(self, env, feedback_type, data_path, condition_list, **kwargs):
        super().__init__(env, feedback_type, data_path, condition_list, **kwargs)
        self.obs_representation_extractor = doorkey_obs_representation_extractor
        self.number_to_action_dict = {
            0: "TURN LEFT",
            1: "TURN RIGHT",
            2: "MOVE FORWARD",
        }
        self.action_to_number_dict = {
            "TURN LEFT": 0,
            "TURN RIGHT": 1,
            "MOVE FORWARD": 2,
        }
        self.preference_to_number_dict = {
            "FIRST": 1,
            "SECOND": -1
        }
        self.response_to_numeric_dict = {
            "YES": 1,
            "NO": -1
        }
        if feedback_type == "binary_feedback":
            self.feedback_type_to_translation_dict = self.response_to_numeric_dict
        if feedback_type == "preference":
            self.feedback_type_to_translation_dict = self.preference_to_number_dict
        if feedback_type == "action_advising":
            self.feedback_type_to_translation_dict = self.action_to_number_dict
        self.base_prompt = """
You need to solve Minigrid FourRooms.
Minigrid FourRooms involves crossing a gridworld, where you need reach the goal. Do not hit the wall.
The state is described with an ASCII art. In the ASCII art, “#” means wall. “G” means goal. “.” means empty tile. “^” mean the agent who is facing upwards. “V” mean the agent who is facing downwards. “<” means the agent who is facing leftwards. “>” means the agent who is facing rightwards.
The possible actions are:
1. TURN LEFT, changing the direction to left, for example, the agent “^” will become “<”.
2. TURN RIGHT, changing the direction to right, for example, the agent “^” will become “>”.
3. MOVE FORWARD, moving forwards for one tile
                """
        self.base_prompt_egocentric = """
You need to solve Minigrid FourRooms.
Minigrid FourRooms involves crossing a gridworld, where you need reach the goal. Do not hit the wall.
The state is described with an ASCII art. In the ASCII art, “#” means wall. “G” means goal. “.” means empty tile. 
"^" is where you are. You are receiving egocentric observations, therefore you will always see yourself facing upwards.
1. TURN LEFT, changing the direction to left, for example, the agent “^” will become “<”.
2. TURN RIGHT, changing the direction to right, for example, the agent “^” will become “>”.
3. MOVE FORWARD, moving forwards for one tile 
                """

        if "unknown_dynamics" in condition_list:
            self.base_prompt = """
            You need to solve Minigrid FourRooms.
            Minigrid FourRooms involves crossing a gridworld, where you need reach the goal. Do not hit the wall.
            The state is described with an ASCII art. In the ASCII art, “#” means wall. “G” means goal. “.” means empty tile. “^” mean the agent who is facing upwards. “V” mean the agent who is facing downwards. “<” means the agent who is facing leftwards. “>” means the agent who is facing rightwards.
            The possible actions are:
            1. TURN LEFT
            2. TURN RIGHT
            3. MOVE FORWARD
            """
            self.base_prompt_egocentric = """
            You need to solve Minigrid FourRooms.
            Minigrid FourRooms involves crossing a gridworld, where you need reach the goal. Do not hit the wall.
            The state is described with an ASCII art. In the ASCII art, “#” means wall. “G” means goal. “.” means empty tile. 
            "^" is where you are. You are receiving egocentric observations, therefore you will always see yourself facing upwards.
            1. TURN LEFT
            2. TURN RIGHT
            3. MOVE FORWARD
            """

        self.explicit_thinking_guides = """
You should think about these questions.
What is your current goal?
Based on the rules you know, what subgoals do you need to achieve?
Based on what you see and what you have, what subgoals have you achieved?
If you have not achieved all of the subgoals, based on the rules and doable actions, can you achieve any subgoals now?
If yes, which subgoal do you want to achieve next?
If no, what do you need or where should you go next?
            """.strip()

        self.if_optimal_prompt_cot = """
You are in the following state represented in ascii art:
OBSREPRESENTATION
Is action ACTION the best action you can take? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "feedback": <FEEDBACK>}
Where <FEEDBACK> is one of "YES" or "NO", <REASONING> is a string of your thinking steps.
                """

        self.if_optimal_prompt_explicit_action_by_action_cot = """ 
You are in the following state represented in ascii art:
OBSREPRESENTATION
Is action ACTION the best action you can take? Please think step by step and consider all possible actions, which are TURN LEFT, TURN RIGHT, MOVE FORWARD.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "feedback": <FEEDBACK>}
Where <FEEDBACK> is one of "YES" or "NO", <REASONING> is a string of your thinking steps.
                """

        self.action_advising_base_prompt_cot = """
You are in the following state represented in ascii art:
OBSREPRESENTATION
Which action do you choose? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "action": <ACTION>}
Where <ACTION> is one of "TURN LEFT", "TURN RIGHT" or "MOVE FORWARD", <REASONING> is a string of your thinking steps.
                """

        self.action_advising_base_prompt_explicit_action_by_action_cot = """
You are in the following state represented in ascii art:
OBSREPRESENTATION
Which action do you choose? Please think step by step and consider all possible actions, which are TURN LEFT, TURN RIGHT, MOVE FORWARDå.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "action": <ACTION>}
Where <ACTION> is one of "TURN LEFT", "TURN RIGHT" or "MOVE FORWARD", <REASONING> is a string of your thinking steps.
                """

        self.preference_base_prompt_cot = """
You are in the following state represented in ascii art:
OBSREPRESENTATION
Given ACTION1 or ACTION2, which action is better? Please think step by step.
Only give the answer in a new line in JSON format:
{"reasoning": <REASONING>, "preference": <PREFERENCE>}
Where <PREFERENCE> is one of "FIRST" or "SECOND", <REASONING> is a string of your thinking steps.
                """

    def domain_specific_prompt_process(self, data, prompt):
        # if "explicit_thinking_guides" in self.condition_list:
        #     prompt += self.explicit_thinking_guides
        return prompt

#     def generate_prompt(self, data):
#         base_prompt = """
# You need to solve Minigrid FourRooms.
# Minigrid FourRooms involves crossing a gridworld, where you need reach the goal. Do not hit the wall.
# The state is described with an ASCII art. In the ASCII art, “#” means wall. “G” means goal. “.” means empty tile. “^” mean the agent who is facing upwards. “V” mean the agent who is facing downwards. “<” means the agent who is facing leftwards. “>” means the agent who is facing rightwards.
# The possible actions are:
# 1. TURN LEFT, changing the direction to left, for example, the agent “^” will become “<”.
# 2. TURN RIGHT, changing the direction to right, for example, the agent “^” will become “>”.
# 3. MOVE FORWARD, moving forwards for one tile
#         """
#
#         if "egocentric" in self.condition_list:
#             base_prompt = """
# You need to solve Minigrid FourRooms.
# Minigrid FourRooms involves crossing a gridworld, where you need reach the goal. Do not hit the wall.
# The state is described with an ASCII art. In the ASCII art, “#” means wall. “G” means goal. “.” means empty tile.
# "^" is where you are. You are receiving egocentric observations, therefore you will always see yourself facing upwards.
# 1. TURN LEFT, changing the direction to left, for example, the agent “^” will become “<”.
# 2. TURN RIGHT, changing the direction to right, for example, the agent “^” will become “>”.
# 3. MOVE FORWARD, moving forwards for one tile
#             """
#
#
#         action_dict = self.number_to_action_dict
#
#         if_optimal_prompt_cot = """
# You are in the following state represented in ascii art:
# ASCIIART
# Is action ACTION the best action you can take? Please think step by step.
# Only give the answer in a new line in JSON format:
# {"reasoning": <REASONING>, "feedback": <FEEDBACK>}
# Where <FEEDBACK> is one of "YES" or "NO", <REASONING> is a string of your thinking steps.
#         """
#
#         action_advising_base_prompt_cot = """
# You are in the following state represented in ascii art:
# ASCIIART
# Which action do you choose? Please think step by step.
# Only give the answer in a new line in JSON format:
# {"reasoning": <REASONING>, "action": <ACTION>}
# Where <ACTION> is one of "TURN LEFT", "TURN RIGHT", "MOVE FORWARD", "PICK UP THE KEY" or "UNLOCK THE DOOR", <REASONING> is a string of your thinking steps.
#         """
#
#         preference_base_prompt_cot = """
# You are in the following state represented in ascii art:
# ASCIIART
# Given ACTION1 or ACTION2, which action is better? Please think step by step.
# Only give the answer in a new line in JSON format:
# {"reasoning": <REASONING>, "preference": <PREFERENCE>}
# Where <PREFERENCE> is one of "FIRST" or "SECOND", <REASONING> is a string of your thinking steps.
# """
#
#         ascii_art = data["state"]["ObsString"]
#         if "egocentric" in self.condition_list:
#             ascii_art = rotate_to_egocentric(ascii_art)
#         ascii_art = array_to_string(ascii_art)
#
#         if self.feedback_type == "binary_feedback":
#             final_string = (base_prompt +
#                             if_optimal_prompt_cot.replace("ASCIIART", ascii_art)
#                             .replace("ACTION", action_dict[data["action"]]))
#         if self.feedback_type == "action_advising":
#             final_string = (base_prompt +
#                             action_advising_base_prompt_cot.replace("ASCIIART", ascii_art))
#         if self.feedback_type == "preference":
#             final_string = (base_prompt +
#                             preference_base_prompt_cot.replace("ASCIIART", ascii_art)
#                             .replace("ACTION1", action_dict[data["action1"]]).replace("ACTION2", action_dict[data["action2"]]))
#         return final_string


    # # TODO code reuse among these three functions?
    # def verify_binary_feedback(self, response, data):
    #     response_to_numeric_dict = {
    #         "YES": 1,
    #         "NO": -1
    #     }
    #     ret_dict = {"State": data, "Response": response}
    #
    #     expert_feedback = data["feedback"]
    #     try:
    #         response = response.split("{")
    #         response = response[-1]
    #         response = "{" + response
    #         llm = json.loads(response)
    #     except Exception as e:
    #         print(e)
    #         ret_dict["Correct"] = False
    #         ret_dict["JSONCorrect"] = False
    #         return ret_dict
    #     llm_feedback = response_to_numeric_dict[llm["feedback"].upper()]
    #     ret_dict["JSONCorrect"] = True
    #     if llm_feedback == expert_feedback:
    #         ret_dict["Correct"] = True
    #     else:
    #         ret_dict["Correct"] = False
    #
    #     return ret_dict
    #
    # def verify_action_advising(self, response, data):
    #     action_to_number_dict = {
    #         "TURN LEFT": 0,
    #         "TURN RIGHT": 1,
    #         "MOVE FORWARD": 2,
    #         "PICK UP THE KEY": 3,
    #         "UNLOCK THE DOOR": 5
    #     }
    #     ret_dict = {"State": data, "Response": response}
    #
    #     expert_feedback = data["feedback"]
    #     try:
    #         response = response.split("{")
    #         response = response[-1]
    #         response = "{" + response
    #         llm = json.loads(response)
    #     except Exception as e:
    #         print(e)
    #         ret_dict["Correct"] = False
    #         ret_dict["JSONCorrect"] = False
    #         return ret_dict
    #     llm_feedback = action_to_number_dict[llm["action"].upper()]
    #     ret_dict["JSONCorrect"] = True
    #     # print(data)
    #     # print(llm_feedback, expert_feedback)
    #     if llm_feedback in expert_feedback:
    #         ret_dict["Correct"] = True
    #     else:
    #         ret_dict["Correct"] = False
    #
    #     return ret_dict
    #
    #
    # def verify_preference(self, response, data):
    #
    #     preference_to_number_dict = {
    #         "FIRST": 1,
    #         "SECOND": -1
    #     }
    #     ret_dict = {"State": data, "Response": response}
    #
    #     expert_feedback = data["feedback"]
    #     try:
    #         response = response.split("{")
    #         response = response[-1]
    #         response = "{" + response
    #         llm = json.loads(response)
    #     except Exception as e:
    #         print(e)
    #         ret_dict["Correct"] = False
    #         ret_dict["JSONCorrect"] = False
    #         return ret_dict
    #     llm_feedback = preference_to_number_dict[llm["preference"].upper()]
    #     ret_dict["JSONCorrect"] = True
    #     # print(data["state"]["expert_paths"])
    #     if llm_feedback == expert_feedback or expert_feedback == 0:
    #         ret_dict["Correct"] = True
    #     else:
    #         ret_dict["Correct"] = False
    #
    #     return ret_dict









import sys, argparse
# from ALFTest import ALFVerifier
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Language Model verifier")
    parser.add_argument('--env', type=str, help="domain", default="MiniGrid-DoorKey-5x5-v0")
    # Adding arguments
    parser.add_argument('--type', type=str, help="Feedback type", default="binary_feedback")  # Required positional argument
    parser.add_argument('--distribution', type=str, default="1",
                        help="Expert percentage in sampling distribution")  # Optional argument with default
    parser.add_argument('--source', type=str, default="vec-inf",
                        help="Inference backend")
    parser.add_argument('--url', type=str, default="http://gpu005:8080/v1",
                        help="Inference backend url")
    parser.add_argument('--model', type=str, default="Meta-Llama-3.1-8B-Instruct",
                        help="Model")
    parser.add_argument('--rounds', type=int, default=1, help="rounds of each state/action to get responses")
    parser.add_argument('--egocentric', type=bool, default=False, help="use egocentric observation")
    parser.add_argument('--explicit_action_by_action_cot', type=bool, default=False, help="use explicit action by action cot")
    parser.add_argument('--max_item_num', type=int, default=-1, help="maximum of data entries to test, -1 means all")
    args = parser.parse_args()

    # dkv = DoorKeyVerifier("MiniGrid-DoorKey-5x5-v0", args.type,
    #                       PERSISTENT_DATA_PATH + "/Doorkey/doorkey_{type}_{distribution}.npy".format(type=args.type, distribution=args.distribution), [])
    # # results = dkv.verify(1, "ollama")
    # results = dkv.verify(1, "vec-inf", url="http://gpu005:8080/v1")

    # dkv = DoorKeyVerifier("MiniGrid-DoorKey-5x5-v0", args.type,
    #                       PERSISTENT_DATA_PATH + "/Doorkey/doorkey_{type}_{distribution}.npy".format(type=args.type, distribution=args.distribution), [])
    # # results = dkv.verify(1, "ollama")
    # results = dkv.verify(1, "vec-inf", url="http://gpu005:8080/v1")

    # frv = FourRoomsVerifier("MiniGrid-FourRooms-v0", "binary_feedback", PERSISTENT_DATA_PATH + "/FourRooms/FourRoomsbinary_feedback_1.npy", [])
    # frv = FourRoomsVerifier("MiniGrid-FourRooms-v0", "action_advising",
    #                         PERSISTENT_DATA_PATH + "/FourRooms/FourRoomsaction_advising_1.npy", [])
    # frv = FourRoomsVerifier("MiniGrid-FourRooms-v0", "preference",
    #                         PERSISTENT_DATA_PATH + "/FourRooms/FourRoomspreference_1.npy", [])
    # results = frv.verify(1, "ollama", "http://localhost:11434/api/chat", "llama3.1:8b-instruct-fp16")
    # frv = FourRoomsVerifier("MiniGrid-FourRooms-v0", args.type,
    #                         PERSISTENT_DATA_PATH + "/FourRooms/FourRooms{type}_{distribution}.npy".format(type=args.type, distribution=args.distribution), [])
    # results = frv.verify(1, args.source, args.url, "Meta-Llama-3.1-8B-Instruct")

    
    verifier_dict = {
        "MiniGrid-DoorKey-5x5-v0": DoorKeyVerifier,
        "MiniGrid-FourRooms-v0": FourRoomsVerifier,
        # "ALFWolrd": ALFVerifier
    }

    path_dict = {
        "MiniGrid-DoorKey-5x5-v0": PERSISTENT_DATA_PATH + "/Doorkey/doorkey_{type}_{distribution}.npy".format(type=args.type, distribution=args.distribution),
        "MiniGrid-FourRooms-v0": PERSISTENT_DATA_PATH + "/FourRooms/FourRooms{type}_{distribution}.npy".format(type=args.type, distribution=args.distribution),
        # "ALFWorld": PERSISTENT_DATA_PATH + "/ALF/ALF{type}_{distribution}.npy".format(type=args.type, distribution=args.distribution)
    }
    print(args)
    all_condition_list = ["egocentric", "explicit_action_by_action_cot"]
    condition_list = []
    for condition in all_condition_list:
        if getattr(args, condition):
            condition_list.append(condition)

    
    verifier = verifier_dict[args.env]
    instance_verifier = verifier(args.env, args.type, path_dict[args.env], condition_list)
    results = instance_verifier.verify(args.rounds, args.source, url=args.url, model=args.model, max_item_num=args.max_item_num)
    # dkv = DoorKeyVerifier("MiniGrid-DoorKey-5x5-v0", args.type,
    #                       PERSISTENT_DATA_PATH + "/Doorkey/doorkey_{type}_{distribution}.npy".format(type=args.type, distribution=args.distribution), ["egocentric"])
    # # results = dkv.verify(1, "ollama")
    # results = dkv.verify(1, args.source, url=args.url, model=args.model)
