import argparse
import json
from datetime import datetime

from openai import OpenAI
import re
from multiprocessing import Pool, cpu_count

from tqdm import tqdm

with open("env.json",'r') as f:
    api_keys = json.load(f)


client = OpenAI(
    base_url="",
    api_key=api_keys['OPENAI_API_KEY'],
    timeout=120
)
def get_request_prompt(sample):
    return [ {
        "role": "user",
        "content": f"""
I am currently designing questions for a multimodal dataset. Based on the information I provide, please design a concise, grammatically correct, and well-targeted question.
First, I use a prompt with an image generation model (such as Stable Diffusion) to generate an image. Then, based on a modified text description, I alter the content of this image. Now, please compare the object before and after the modification, and propose a question that asks about the features of this object in the final image.
Please note that you must carefully consider whether your designed question is clearly targeted. For example, if I move the kettle’s handle and spout to the same side, and you ask, “Where is the kettle’s handle in the picture?”—although this question is indeed about the modified handle’s location, the user might simply answer, “On the kettle,” without pointing to the specific modified location in the picture. In this case, both the pre-modified and post-modified images would have the answer “On the kettle,” making the question meaningless.
You must ensure that, based on the image and your question, there is only one correct answer, and it can only be correctly answered by looking at the modified image!
Please follow the format below:
<question>Your designed question</question>
<truth>The correct answer to this question</truth>
<error>The answer from the pre-modified image, or a common-sense answer</error>

For example:
##############
If I input:
"Pre-modification image generation prompt:
Ultra-realistic, bamboo with a square cross-section and emerald green color, located in the distant background, only as a corner decoration in a mist-covered Japanese courtyard at dawn, with ancient stone steps, flowing water, a calm koi pond, soft natural light, real photo, extremely high detail.
Modification: Keep the entire image unchanged, but change the bamboo stalk’s shape from cylindrical to quadrangular prism (square cross-section)."

Then your output should be:
<question>What is the cross-sectional shape of the bamboo in the image?</question>
<truth>The bamboo stalk in the image has a quadrangular prism (or square) shape, rather than the cylindrical shape found in the real world.</truth>
<error>The bamboo stalk in the image has a cylindrical shape</error>
##############

Now please answer:
Pre-modification image generation prompt:
{sample['prompt']}
Modification: {sample['rule']}
Your output:
"""}
    ]

def generate_question_and_judge(sample):
    cnt = []
    for piece in sample:
        prompt = get_request_prompt(piece)
        response = client.chat.completions.create(
            model="gpt-5-mini-2025-08-07",
            messages=prompt,
        )
        response = response.choices[0].message.content
        question = re.findall(r'<question>(.*?)</question>',response)
        if len(question) < 1:
            continue
        question = question[0]
        hallu_answer = re.findall(r'<error>(.*?)</error>',response)
        if len(hallu_answer) < 1:
            continue
        hallu_answer = hallu_answer[0]
        ground_truth = re.findall(r'<truth>(.*?)</truth>',response)
        if len(ground_truth) < 1:
            continue
        ground_truth = ground_truth[0]

        cnt.append({
            **piece,
            "question": question,
            "hallu_answer": hallu_answer,
            "ground_truth": ground_truth
        })
    return cnt
import os
from utils import generate_image,edit_image
def generate_image_first_time_sample(sample):
    image_path = os.path.join("./generated_dataset9/original/" + sample['name'], sample['name'] + '.jpeg')
    print(image_path)
    generate_image(sample['prompt'], image_path)
    return 1
def generate_image_first_time(dataset_file):
    with open(dataset_file, 'r', encoding='utf-8') as f:
        dataset = json.load(f)
    dataset = [j for i in dataset for j in i]
    with Pool(processes=int(cpu_count() * 0.9)) as pool:
        json_answer = list(tqdm(pool.imap(generate_image_first_time_sample, dataset), total=len(dataset)))
    print('Image generating over.')

def edit_image_sample(sample):
    image_path = os.path.join("./generated_dataset9/original/" + sample['name'], sample['name'] + '_edited.jpeg')
    print(image_path)
    edit_image(sample['prompt'], sample['rule'], image_path)
    sample['image_path'] = image_path
    return sample

def edit_image_for_sencond_time(dataset_file):
    with open(dataset_file, 'r', encoding='utf-8') as f:
        dataset = json.load(f)
    dataset = [j for i in dataset for j in i]
    with Pool(processes=int(cpu_count() * 0.9)) as pool:
        json_answer = list(tqdm(pool.imap(edit_image_sample, dataset), total=len(dataset)))
    with open("generated_dataset/dataset.json", 'w', encoding='utf-8') as f:
        json.dump(json_answer,f,ensure_ascii=False, indent=4)
    print('Image editing over.')

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--ccs_content_file', type=str,default='None')
    parser.add_argument( '--edit',type=str,default='True')
    args = parser.parse_args()

    # get questions.
    with open(args.ccs_content_file, 'r', encoding='utf-8') as f:
        dataset = json.load(f)
    with Pool(processes=int(cpu_count() * 0.9)) as pool:  # 保留20%资源给系统
        json_answer = list(tqdm(pool.imap(generate_question_and_judge, dataset), total=len(dataset)))
    with open(args.ccs_content_file, 'w', encoding='utf-8') as f:
        json.dump(json_answer,f,ensure_ascii=False, indent=4)
    # generate
    generate_image_first_time(args.ccs_content_file)
    # edit
    if args.edit == 'True':
        edit_image_for_sencond_time(args.ccs_content_file)
