from openai import OpenAI
import json
import uuid
import mimetypes
from datetime import datetime
import os
from tqdm import tqdm

with open("env.json",'r') as f:
    api_keys = json.load(f)

def encode_image(image_path):
    if image_path.startswith("http"):
        user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
        request_kwargs = {
            "headers": {"User-Agent": user_agent},
            "stream": True,
        }

        # Send a HTTP request to the URL
        response = requests.get(image_path, **request_kwargs)
        response.raise_for_status()
        content_type = response.headers.get("content-type", "")

        extension = mimetypes.guess_extension(content_type)
        if extension is None:
            extension = ".download"

        fname = str(uuid.uuid4()) + extension
        download_path = os.path.abspath(os.path.join("downloads", fname))

        with open(download_path, "wb") as fh:
            for chunk in response.iter_content(chunk_size=512):
                fh.write(chunk)

        image_path = download_path

    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')


headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"
}

client = OpenAI(
    base_url="https://yunwu.ai/v1",
    api_key=api_keys['API_DEFAULT'],
    timeout=120
)

import base64
import requests
from PIL import Image
from io import BytesIO


def pil_image_to_base64(image):
    buffered = BytesIO()
    image.save(buffered, format='PNG')
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str


def resize_image(image_path):
    img = Image.open(image_path)
    width, height = img.size
    img = img.resize((int(width / 2), int(height / 2)))
    new_image_path = f"resized_{image_path}"
    img.save(new_image_path)
    return new_image_path

def get_prompt_v4(model_family, sample, permutation,image_path=None,glm_thinking=True):
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type":"text",
                    "text":f"""
Answer the following question with only the CORRECT letter.                                        

Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                },
             ]}
        ]

def get_prompt_v3_icl(model_family, sample, permutation,image_path=None,glm_thinking=True):
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should think step by step and finally answer the question with the correct choice letter.
"""},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type":"text",
                    "text":f"""
Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                },
             ]},
            {"role": "assistant", "content": [
                {
                    "type": "text",
                    "text": f"""
Let's think step by step."""
                }
            ]}
        ]



def get_prompt_v3(model_family, sample, permutation,image_path=None,glm_thinking=True):
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should think first and finally answer the question with the correct choice letter.
Here is an example:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?
Choices:
A. Not visible / Can't see.
B. Yes, on the side.
C. Yes, arched over the top.
D. The correct answer is not listed.

Your answer: 
Let's think step by step. First, I'll look at the overall structure - I can see it's a white/ceramic teapot. Next, I need to locate the handle. Looking at the left side of the teapot, I don't see a handle there. Checking the right side... I can see what appears to be a curved handle attached to the side. Based on this systematic examination, the handle is located on the side of the teapot. <answer>B</answer>.
#########
"""},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type":"text",
                    "text":f"""
Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                },
             ]},
            {"role": "assistant", "content": [
                {
                    "type": "text",
                    "text": f"""
Let's think step by step."""
                }
            ]}
        ]


def get_prompt_v2(model_family, sample, permutation,image_path=None,glm_thinking=True):
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should analyse the question STEP BY STEP and then answer the question ONLY with the correct choice letter.
Here is an example:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?
Choices:
A. Not visible / Can't see.
B. Yes, on the side.
C. Yes, arched over the top.
D. The correct answer is not listed.

Your reply:
"Image Observation: I see a white ceramic teapot with a rounded body and spout extending from one side.
Question Analysis: I need to determine if there's a handle and identify its specific location.
Step-by-step Reasoning: 
- Option A: Let me check if I can see a handle... I can clearly see one, so this is incorrect
- Option B: Looking at the sides of the teapot, I see a curved handle attached to the right side
- Option C: Checking the top area, I don't see any handle arching over the top
- Option D: Since I found the handle on the side, one of the listed answers is correct
Final Decision: The handle is clearly visible on the side of the teapot. The answer is <answer>B</answer>."
#########
Now please answer the question following the above format STRICTLY.
            """},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type":"text",
                    "text":f"""
Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                },
             ]}
        ]

def get_prompt_qa_cot(model_family, sample, image_path=None,glm_thinking=True):
    # print(model_family)
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl" or model_family == "internvl3.5":
        # print("hajimi")
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should firstly analyse the question strongly depends on yhe image STEP BY STEP, and finally conclude your answer."""},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type": "text",
                    "text": f"""
Question:{sample['question']}
Your answer:"""
                },

            ]},
            {"role": "assistant", "content": [
                {
                    "type": "text",
                    "text": f"""
        Let's think step by step."""
                }
            ]}
        ]
    elif model_family == "claude":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "user", "content": [
                {
                    "type": "text",
                    "text": f"""
(System Instruction) You are a helpful agent.Here is an image with a question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should firstly analyse the question strongly depends on yhe image STEP BY STEP, and finally conclude your answer."""
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type": "text",
                    "text": f"""
Question:{sample['question']}
Your answer:"""
                }
            ]}
        ]
    elif model_family == 'glm':
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']

        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should answer the question strongly depends on yhe image."""},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type": "text",
                    "text": f"""
Question:{sample['question']}
Your answer:"""
                },
            ]}
        ]
def get_prompt_qa(model_family, sample, image_path=None,glm_thinking=True):
    # print(model_family)
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl" or model_family == "internvl3.5":
        # print("hajimi")
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should answer the question strongly depends on yhe image."""},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type": "text",
                    "text": f"""
Question:{sample['question']}
Your answer:"""
                },
            ]}
        ]
    elif model_family == "claude":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "user", "content": [
                {
                    "type": "text",
                    "text": f"""
(System Instruction) You are a helpful agent.Here is an image with a question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should answer the question strongly depends on yhe image."""
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type": "text",
                    "text": f"""
Question:{sample['question']}
Your answer:"""
                }
            ]}
        ]
    elif model_family == 'glm':
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']

        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should answer the question strongly depends on yhe image."""},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type": "text",
                    "text": f"""
Question:{sample['question']}
Your answer:"""
                },
            ]}
        ]

def get_prompt(model_family, sample, permutation,image_path=None,glm_thinking=True):
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl" or model_family == "internvl3.5":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should answer the question ONLY with the correct choice letter.
Here is an example:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?
Choices:
A. Not visible / Can't see.
B. Yes, on the side.
C. Yes, arched over the top.
D. The correct answer is not listed.

Your answer: A
#########
Now please answer the question following the above format STRICTLY.
            """},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type":"text",
                    "text":f"""
Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                },
             ]}
        ]
    elif model_family == "claude":
        return [
            {"role": "system", "content": [
                {
                    "type": "text",
                    "text": f"""
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should answer the question ONLY with the correct choice LETTER.
Here is an example:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?
Choices:
A. Not visible / Can't see.
B. Yes, on the side.
C. Yes, arched over the top.
D. The correct answer is not listed.

Your answer:A.
#########
Now please answer the question following the above format STRICTLY."""
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image("./generated_dataset/final_part2/" + sample['image_path'])}"}
                },
                {
                    "type": "text",
                    "text": f"""
Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                }
            ]}
        ]
    elif model_family == 'grok':
        return [
    {"role": "user", "content": [
        {
            "type": "text",
            "text": f"""
(System Information)
************************
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should answer the question ONLY with the correct choice letter.
Here is an example:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?
Choices:
A. Not visible / Can't see.
B. Yes, on the side.
C. Yes, arched over the top.
D. The correct answer is not listed.

Your answer: A.
#########
************************
Now please answer the question following the above format STRICTLY.

Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image("./generated_dataset/final_part2/" + sample['image_path'])}"}
                }
            ]}
        ]
    elif model_family== 'kimi':
        return [
            {"role": "user", "content": [
                {
                    "type": "text",
                    "text": f"""
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon. After thinking, you should answer the question ONLY with the correct choice letter.
                    """
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image("./generated_dataset/final_part4/" + sample['image_path'])}"}
                },
                {
                    "type": "text",
                    "text": f"""
Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
"""
                }
            ]}
        ]
    elif model_family == 'glm':
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']

        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with a multiple choice question about the image content. You should reply the question according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should answer the question ONLY with the correct choice letter.
Here is an example:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?
Choices:
A. Not visible / Can't see.
B. Yes, on the side.
C. Yes, arched over the top.
D. The correct answer is not listed.

Your answer: A
#########
Now please answer the question following the above format STRICTLY.
"""},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type": "text",
                    "text": f"""
Question:{sample['question']}
Choices:
A. {sample['options'][permutation[0]]}
B. {sample['options'][permutation[1]]}
C. {sample['options'][permutation[2]]}
D. {sample['options'][permutation[3]]}
Your answer:"""
                },
            ]}
        ]


def get_fib_prompt(model_family, sample, image_path=None):
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with fill-in-the-blank question about the image content. You should reply the question BRIEFLY according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should answer the question with the content of the image but not the common sense.
Here are two examples:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?

Your answer: No, the handle of the teapot is not visible in the image, or the handle is obscured.
#########
[IMAGE]
Question:What is the shape of the kick drum (bass drum) in the drum kit shown in the picture?

Your answer: 
A circle, but the bottom is straight. So the shape is not a perfect round that indicates the kick drum is different from a common one!
#########
            """},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type":"text",
                    "text":f"""
Question:{sample['question']}

Your answer:
"""
                },
             ]}
        ]

def get_fib_prompt_cot(model_family, sample, image_path=None):
    if model_family == "openai" or model_family == "gemini" or model_family == "qwen" or model_family == "internvl":
        if image_path is None:
            image_path = "./generated_dataset/final_part2/" + sample['image_path']
        return [
            {"role": "system", "content": """
You are a helpful agent.Here is an image with fill-in-the-blank question about the image content. You should reply the question BRIEFLY according to the image faithfully. Please note that the question maybe confusing or the image content might be uncommon, you should answer the question with the content of the image but not the common sense.
Here are two examples:
#########
[IMAGE]
Question:Does the Teapot in the picture have a handle? If so, where is it located?

Your answer: No, the handle of the teapot is not visible in the image, or the handle is obscured.
#########
[IMAGE]
Question:What is the shape of the kick drum (bass drum) in the drum kit shown in the picture?

Your answer: 
A circle, but the bottom is straight. So the shape is not a perfect round that indicates the kick drum is different from a common one!
#########
            """},
            {"role": "user", "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
                },
                {
                    "type":"text",
                    "text":f"""
Question:{sample['question']}

Your answer:
"""
                },
                {"role": "assistant", "content": [
                    {
                        "type": "text",
                        "text": f"""
        Let's think step by step."""
                    }
                ]}
             ]}
        ]

def llm_as_judge_prompt(sample,response, image_path=None):
    if image_path is None:
        image_path = "./generated_dataset/final_part2/" + sample['image_path']
    system = { "role" : "system", "content":"""
You are a helpful agent.Here is an image and a question with the ground_truth. I will provide you with a response for this question. Now, you should determine whether the model's response aligns with the ground truth based on the question and the answer. Requirements are as follows:
    1. We provide one ground truth answer and one typical incorrect answer for each question. These answers, as well as the image itself, may not align with your common sense or prior knowledge. You must determine whether the model's response is correct solely based on the answers we provide (both the ground truth and the typical incorrect answer) and the model's own response. You must not use your own observation of the image or your personal common sense preferences to judge the correctness of the model's answer.
    2. The ground truth answer or the typical incorrect answer may contain content marked with the '#' symbol. This means that as long as the model's response covers the content enclosed by the '#' symbols, it should be considered as belonging to that category. If there are multiple segments marked with '#' in either the ground truth or the typical incorrect answer, the model's response will be classified under that category as long as it covers any of the content marked by these symbols.
    3. You should give you judge with the following format: First, provide your judgment within the tags <judge>correct/wrong/typical</judge>, where 'correct' indicates that the model's response is accurate, 'wrong' indicates an incorrect response, and 'typical' signifies that the model provided a typical error. Then, explain the reasoning behind your judgment within the <explanation> Your explanation here.</explanation> tags.
    4. Remember to analysis the ground truth and the typical error. If the model's response matches the typical error, you should judge with <judge>typical</judge>! 
    5. Please note that you need to comprehensively evaluate the correctness based on the question format, the model's response, the correct answer, and the incorrect answers. It is not necessarily required for the model's response to include all information from the correct answer. For example, if the correct answer contains additional information that is not required by the question, the model's response should not be considered incorrect for omitting it. On the other hand, even if the model reaches the same conclusion as the correct answer, if the model's analysis contradicts the correct answer, the response should still be considered incorrect.
    6. Your judge must align with human preferences.
NOTE: You must follow the format with <judge></judge> and <explanation></explanation>!
"""}
    content = [
        {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{encode_image(image_path)}"}
        },
        {
            "type": "text",
            "text": f"""
Question: {sample['question']}
Ground truth: {sample['ground_truth']}
Typical error: {sample['hallu_answer']}

Model's response: {response}

Now please provide your judgment with in the tags."""
        }
    ]
    message = [
        system,
        {
            "role": "user",
            "content": content
        }
    ]
    return message