from openai import OpenAI
import json
import uuid
import mimetypes
from datetime import datetime
import os
from tqdm import tqdm

with open("env.json",'r') as f:
    api_keys = json.load(f)

def encode_image(image_path):
    if image_path.startswith("http"):
        user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
        request_kwargs = {
            "headers": {"User-Agent": user_agent},
            "stream": True,
        }

        # Send a HTTP request to the URL
        response = requests.get(image_path, **request_kwargs)
        response.raise_for_status()
        content_type = response.headers.get("content-type", "")

        extension = mimetypes.guess_extension(content_type)
        if extension is None:
            extension = ".download"

        fname = str(uuid.uuid4()) + extension
        download_path = os.path.abspath(os.path.join("downloads", fname))

        with open(download_path, "wb") as fh:
            for chunk in response.iter_content(chunk_size=512):
                fh.write(chunk)

        image_path = download_path

    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')


headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"
}

client = OpenAI(
    base_url="https://yunwu.ai/v1",
    api_key=api_keys['API_DEFAULT'],
    timeout=120
)

import base64
import requests
from PIL import Image
from io import BytesIO


def pil_image_to_base64(image):
    buffered = BytesIO()
    image.save(buffered, format='PNG')
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str


def resize_image(image_path):
    img = Image.open(image_path)
    width, height = img.size
    img = img.resize((int(width / 2), int(height / 2)))
    new_image_path = f"resized_{image_path}"
    img.save(new_image_path)
    return new_image_path



def get_prompt_v3(model_family, sample, permutation,image_path=None):
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        # print(image_path)
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, You should thinking step by step first and you **MUST give your final choose with <answer></answer>**.
You should follow the format below STRICTLY
format:  Think step by step first, give your discussion about the question and the image BRIEFLY. Then summarize: The final answer is <answer>[A/B/C/D]</answer>.
#########
Now please answer the question following the above format STRICTLY.
            """},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type":"text",
                    "text":f"""
Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                }
             ]}
        ]

def get_prompt(model_family, sample, permutation,image_path=None):
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        # print(image_path)
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, You should thinking briefly first and you **MUST give your final choose with <answer></answer>**.
You should follow the format below STRICTLY
format:  Think first, give your discussion about the question and the image BRIEFLY. Then summarize: The final answer is <answer>[A/B/C/D]</answer>.
Here is an example:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?
Choices:
A. Not visible / Can't see.
B. Yes, on the side.
C. Yes, arched over the top.
D. The correct answer is not listed.

Your answer: 
From the image I can see the handle on the side clearly, so the answer is <answer>C</answer>.
#########
Now please answer the question following the above format STRICTLY.
            """},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type":"text",
                    "text":f"""
Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                }
             ]}
        ]
    elif model_family == 'minicpm':
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        # print(image_path)
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, You should thinking step by step and you **MUST give your final choose with <answer></answer>**.
Here is an example:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?
Choices:
A. Not visible / Can't see.
B. Yes, on the side.
C. Yes, arched over the top.
D. The correct answer is not listed.

Your answer: 
From the image I can see the handle on the side clearly, so the answer is <answer>C</answer>.
#########
"""},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type": "text",
                    "text": f"""
    Question:{sample['question']}
    Choices:
    A. {sample['options'][permutation[0]]}
    B. {sample['options'][permutation[1]]}
    C. {sample['options'][permutation[2]]}
    D. {sample['options'][permutation[3]]}
    Your answer:"""
                }
            ]}
        ]
    elif model_family == "claude":
        return [
            {"role": "user", "content": [
                {
                    "type": "text",
                    "text": f"""
(System Prompt)
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, You should thinking briefly first and you **MUST give your final choose with <answer></answer>**.
You should follow the format below STRICTLY
format:  Think first, give your discussion about the question and the image BRIEFLY. Then summarize: The final answer is <answer>[A/B/C/D]</answer>.
Here is an example:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?
Choices:
A. Not visible / Can't see.
B. Yes, on the side.
C. Yes, arched over the top.
D. The correct answer is not listed.

Your reply: 
From the image I can see the handle on the side clearly, so the answer is <answer>C</answer>.
#########
Now please answer the question following the above format STRICTLY."""
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image("./generated_dataset/final_part2/" + sample['image_path'])}"}
                },
                {
                    "type": "text",
                    "text": f"""
        Question:{sample['question']}
        Choices:
        A. {sample['options'][permutation[0]]}
        B. {sample['options'][permutation[1]]}
        C. {sample['options'][permutation[2]]}
        D. {sample['options'][permutation[3]]}
        Your answer:"""
                }
            ]}
        ]
    elif model_family == 'grok':
        return [
    {"role": "user", "content": [
        {
            "type": "text",
            "text": f"""
(System information)
*******************************
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, You should thinking briefly first and you **MUST give your final choose with <answer></answer>**.
You should follow the format below STRICTLY
format:  Think first, give your discussion about the question and the image BRIEFLY. Then summarize: The final answer is <answer>A (or B,C,D) </answer>.
Here is an example:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?
Choices:
A. Not visible / Can't see.
B. Yes, on the side.
C. Yes, arched over the top.
D. The correct answer is not listed.

Your answer: 
From the image I can see the handle on the side clearly, so the answer is <answer>C</answer>.
#########
********************************
Now please answer the question following the above format STRICTLY.

Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image("./generated_dataset/final_part2/" + sample['image_path'])}"}
                }
            ]}
        ]
    elif model_family == 'doubao':
        return [
            {"role": "system", "content": """
        You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, You should thinking briefly first and you **MUST give your final choose with <>**.
        You should follow the format below STRICTLY
        format:  Think first, give your discussion about the question and the image BRIEFLY. Then summarize: The final answer is <A/B/C/D>.
        Here is an example:
        #########
        [IMAGE]
        Question:Does the Teapot in the picture have a handle? If so, where is it located?
        Choices:
        A. Not visible / Can't see.
        B. Yes, on the side.
        C. Yes, arched over the top.
        D. The correct answer is not listed.

        Your answer: 
        From the image I can see the handle on the side clearly, so the answer is <C>.
        #########
        Now please answer the question following the above format STRICTLY.
                    """},
            {"role": "user", "content": [
                {
                    "type": "text",
                    "text": f"""
        Question:{sample['question']}
        Choices:
        A. {sample['options'][permutation[0]]}
        B. {sample['options'][permutation[1]]}
        C. {sample['options'][permutation[2]]}
        D. {sample['options'][permutation[3]]}
        Your answer:"""
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image("./generated_dataset/final_part2/" + sample['image_path'])}"}
                }
            ]}
        ]

