from openai import AzureOpenAI
import pyscreenshot as ImageGrab
import os
import random
import json
import base64

def read_json_file(filename):
    try:
        with open(filename, 'r') as file:
            return json.load(file)
    except (FileNotFoundError, json.JSONDecodeError):
        return {}

class Agent:
    def __init__(self, role="EXPERT", conversation_format="concatenated") -> None:
        """
        Initialize the Agent.

        Args:
            role (str): Role of the agent ("EXPERT" or "SOLVER").
            conversation_format (str): Format of the conversation history.
                                       Options: "concatenated", "structured".
        """
        # List of messages. Each element is a dict with roles and messages.
        self.conversation = []
        self.role = role
        self.conversation_format = conversation_format

        config = read_json_file(os.path.join("config", "keys.json"))
        self.prompts = read_json_file(os.path.join("config", "prompts.json"))

        if role == "EXPERT":
            self.prompt = self.prompts["EXPERT"]
        elif role == "SOLVER":
            self.prompt = self.prompts["SOLVER"]
        else:
            raise Exception("Invalid role")
        self.history = self.prompts["history"]
        self.API_KEY = config.get("AZURE_OPENAI_API_KEY")
        self.manual_image = None
        self.manual = self.prompts.get("SimpleWirePuzzle", "")

        # Define save directory for screenshots
        self.save_dir = os.path.join("images", "screenshots")

    def clear(self):
        self.conversation = []

    def capture_screen_area(self, left, top, width, height):
        bbox = (left, top, left + width, top + height)
        screen_image = ImageGrab.grab(bbox)

        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)

        for i in range(100):
            temp_filename = f"{self.save_dir}/{i}.png"
            if not os.path.exists(temp_filename):
                screen_image.save(temp_filename)
                break

        return temp_filename

    def set_module(self, module):
        self.module = module
        self.manual = self.prompts.get(
            str(self.module), self.prompts.get("SimpleWirePuzzle", ""))
        manual_path = os.path.join("images", "manuals", f"{self.module}.jpg")
        self.manual_image = manual_path if os.path.exists(manual_path) else None

    def step(self, puzzle, message):
        return puzzle.execute_action(message)

    def get_feedback(self, cur_puzzle, history_puzzle, made_mistake):
        if cur_puzzle != history_puzzle:
            return "Here comes a new puzzle. Let's start working on it."
        elif made_mistake:
            return "That action seems to have been a mistake. A red light popped up on the bomb."
        else:
            return "I have performed the action."

    def respond_with_image(self, width, height, actions, message):
        image_path = self.capture_screen_area(12, 12, width - 24, height + 70)
        response = self.respond(image_path, actions, message)
        return response 

    def encode_image(self, image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

    def get_conversation_history_string(self, image_data=None, actions=None, message=None, use_history=False, model=None):
        if self.conversation_format not in ["concatenated", "structured"]:
            raise ValueError("Invalid conversation_format. Choose 'concatenated' or 'structured'.")

        if self.conversation_format == "concatenated":
            llm_input = ""
            if not self.manual_image:
                llm_input = self.manual if self.role == "EXPERT" else ""

            if use_history:
                llm_input = f"{self.prompt}\n\n{self.history}\n\n{llm_input}\n\n"
                data = read_json_file("history/past_runs.json")
                module_history = data.get(str(self.module), "")
                if module_history:
                    llm_input += f"{module_history}\n\n"
            else:
                llm_input = f"{self.prompt}\n\n" if not llm_input else f"{self.prompt}\n\n{llm_input}\n\n"

            speaker = "SOLVER" if self.role == "EXPERT" else "EXPERT"

            if message:
                self.conversation.append(message)

            for mess in self.conversation:
                llm_input += f"{speaker}: {mess}\n\n"
                speaker = "EXPERT" if speaker == "SOLVER" else "SOLVER"

            llm_input += f"{speaker}: "

            if image_data and actions:
                action_string = "The available actions are:\n" + "\n".join([action['name'] for action in actions]) + "\n\n"

                if model in ["gpt4o", "gpt4v"]:
                    messages = []

                    base64_image = self.encode_image(image_data)

                    messages.append({
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                        },
                    })

                    messages.append({"type": "text", "text": action_string})
                    messages.append({"type": "text", "text": llm_input})

                    return messages
                # Handle other models as before
                # ...
                elif model in ["qwenVL"]:
                    messages = [
                        {'image': image_data},
                        {'text': action_string},
                        {'text': llm_input}
                    ]
                    return messages
                elif model in ["llava"]:

                    prompt = f"[INST] <image>\n{action_string}\n{llm_input} [/INST]"
                    return prompt
                elif model in ["internVL"]:
                    prompt = f'<image>\n {action_string}\n {llm_input}'
                    #print(prompt)
                    return prompt
                elif model in ["internVLX"]:
                    prompt = f'<ImageHere>{action_string}\n {llm_input}'
                    return prompt
                elif model in ["random"]:
                    return action_string
                elif model in ["human"]:
                    prompt = f'{llm_input}\n{action_string}'
                    return prompt
                else:
                    prompt = f'{llm_input}\n{action_string}'
                    return prompt
                    #raise Exception("Invalid model")
            
            
            if self.manual_image:
                return self.manual_image, llm_input
            
            return llm_input

        elif self.conversation_format == "structured":
            structured_history = {"human": "", "Assistant": ""}
            if self.role == "EXPERT":
                structured_history["human"] = "SOLVER"
            elif self.role == "SOLVER":
                structured_history["human"] = "EXPERT"
            else:
                raise Exception("Invalid role")

            if message:
                self.conversation.append(message)

            # Alternate between human and assistant
            role = "Assistant" if self.role == "EXPERT" else "human"
            for mess in self.conversation:
                if role == "human":
                    structured_history.setdefault("human", "")
                    structured_history["human"] += mess + "\n\n"
                    role = "Assistant"
                elif role == "Assistant":
                    structured_history.setdefault("Assistant", "")
                    structured_history["Assistant"] += mess + "\n\n"
                    role = "human"

            # Prepare the final prompt
            llm_input = f"{self.prompt}\n\n{self.history}\n\n" if use_history else self.prompt + "\n\n"

            if image_data and actions:
                action_string = "The available actions are:\n" + "\n".join([action['name'] for action in actions]) + "\n\n"
                if self.manual_image:
                    manual_info = f"This is a picture of the puzzle manual.\n{self.manual}"
                else:
                    manual_info = self.manual if self.role == "EXPERT" else ""

                # Combine all parts
                prompt = f"{llm_input}{action_string}{manual_info}\n\n"
                prompt += json.dumps(structured_history)

                return prompt

            if self.manual_image:
                return self.manual_image, json.dumps(structured_history)
            return json.dumps(structured_history)

    # image_data: Image data for the puzzle (only used for solver)
    # actions: List of actions which can be performed
    # message: None if first message by Solver, otherwise a string which
    # is the question/response from solver/expert
    def respond(self, image_data, actions, message):
        if self.conversation_format == "structured":
            # Implement your logic for concatenated format if needed
            # Currently, returning random action for simplicity
            random_action = random.choice(actions[1:])
            action_name = random_action['name']
            self.conversation.append(action_name)
            return action_name
        else:
            # Original concatenated logic
            random_action = random.choice(actions[1:])
            action_name = random_action['name']
            self.conversation.append(action_name)
            return action_name