from ..multimodalmodel import MultiModalModel
import base64
import requests

api_key = "Supply your own key here..."
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        image_data = image_file.read()
        return base64.b64encode(image_data).decode('utf-8')

def gpt4v_decode(image_path, promt, max_tokens=500):
    base64_image = encode_image(image_path)

    headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [{"role": "user", "content": [{"type": "text", "text": promt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}]}],
        "max_tokens": max_tokens
    }

    try:
        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
        return response.json()['choices'][0]['message']['content']
    except Exception as e:
        return ""

class GPT4V(MultiModalModel):
    def __init__(self):
        super().__init__()

    def generate(self, test_cases, image_dir, **generation_kwargs):
        outputs = []
        for case in test_cases:
            output = gpt4v_decode(os.path.join(image_dir, case[0]), case[1], max_tokens=generation_kwargs['max_new_tokens'])
            outputs.append(output)
        return outputs

    def compute_loss(self, behavior, target, image_input):
        return super().compute_loss(behavior, target, image_input)


