from openai import AzureOpenAI
from agents.agent import Agent
import json
import os


class GPT4VAgent(Agent):
    def __init__(self, role="SOLVER") -> None:
        super(GPT4VAgent, self).__init__(role)
        # List of strings. Each element (string) is a message alternating between solver and expert
        self.conversation = []
        api_version = "2024-02-01",
        # your endpoint should look like the following https://YOUR_RESOURCE_NAME.openai.azure.com/
        api_base = "https://gpt4v-simulation.openai.azure.com/"
        config = json.load(open(os.path.join("config", "keys.json")))
        api_key = config["AZURE_OPENAI_GPT4V_KEY"]
        deployment_name = 'gpt-4v'
        api_version = '2023-12-01-preview'  # this might change in the future
        self.client = AzureOpenAI(
            api_key=api_key,
            api_version=api_version,
            base_url=f"{api_base}openai/deployments/{deployment_name}/extensions",
        )

    def clear(self):
        self.conversation = []

    # Given conversation history, respond to message.
    def respond(self, image_data, actions, message=None):
        ret = self.get_conversation_history_string(
            image_data=image_data, actions=actions, message=message, model="gpt4v")
        
        user_messages = []

        if type(ret) == tuple:
            image_path, llm_input = ret
            base64_image = self.encode_image(image_path)
            user_messages = []
            user_messages.append({
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}",
                }
            })
            text = "This is a picture of the puzzle manual. " + llm_input
            user_messages.append({ "role": "user", "type": "text", "content": text})
        else:
            llm_input = ret
            user_messages=llm_input

        response = self.client.chat.completions.create(
            model='gpt-4v',
            messages=[{"role": "system", "content": "You are a helpful assistant."},
                      {"role": "user", "content": llm_input}],
            max_tokens=2000,
            temperature=0
        )
        predicted_action = response.choices[0].message.content
        self.conversation.append(predicted_action)
        return predicted_action
