# import json
# import torch
# from transformers import AutoModelForCausalLM, AutoProcessor
# from tqdm import tqdm
# from PIL import Image

# # 模型路径

# model_path = "moonshotai/Kimi-VL-A3B-Instruct"
# model = AutoModelForCausalLM.from_pretrained(
#     model_path,
#     torch_dtype="auto",
#     device_map="auto",
#     trust_remote_code=True,
# )
# processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)

# input_path = "VQA_Data.json"
# output_path = "VQA_Data_with_answer_VQA_kivi.json"

# # 加载输入 JSON 文件
# with open(input_path, "r", encoding="utf-8") as f:
#     data = json.load(f)



# for idx, item in tqdm(enumerate(data), total=len(data), desc="Processing items"):
#     try:
#         # 从原始数据项中获取 MCQID 和 prompt
#         question = item.get("Question", "").strip()
#         options = [
#             f"A. {str(item.get('Option1', '')).strip()}",
#             f"B. {str(item.get('Option2', '')).strip()}",
#             f"C. {str(item.get('Option3', '')).strip()}",
#             f"D. {str(item.get('Option4', '')).strip()}",
#         ]
#         VQA_prompt = "请基于图像回答以下与中国文化相关的问题：\n"+question + "\n" + "\n".join(options)+"\n这是一个多选题，请先返回所有可能的选项字母，再用中文解释你的选择。"
        
#         # 构建一个更详细的提示词，要求模型返回答案和解释 (模仿 Llama 代码的风格)
#         # 您可以根据需要调整这个 prompt

#         # 构造对话消息格式 (针对单条数据)
#         image_path = item['Image_path']
#         image = Image.open(image_path)
#         messages = [
#             {"role": "user", "content": [{"type": "image", "image": image_path}, {"type": "text", "text": VQA_prompt}]}
#         ]


#         text = processor.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
#         inputs = processor(images=image, text=text, return_tensors="pt", padding=True, truncation=True).to(model.device)
#         generated_ids = model.generate(**inputs, max_new_tokens=512)
#         generated_ids_trimmed = [
#             out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
#         ]
#         response = processor.batch_decode(
#             generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
#         )[0]
#         item["GPT4o_Answer"] = response
#     # 保存结果
#     except Exception as e:
#         item["GPT4o_Answer"] = f"Error: {str(e)}"

# # 写入输出 JSON 文件
# with open(output_path, "w", encoding="utf-8") as f:
#     json.dump(data, f, indent=2, ensure_ascii=False)

# print(f"✅ 处理完成，已将带有预测结果的数据保存至 {output_path}")

import os
import json
import torch
from transformers import AutoModelForCausalLM, AutoProcessor
from tqdm import tqdm
from PIL import Image

# --- 1. 模型和处理器加载 ---
model_id = "moonshotai/Kimi-VL-A3B-Instruct"

print(f"正在加载模型: {model_id}...")
# Kimi-VL 需要 trust_remote_code=True
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype="auto",
    device_map="auto",
    trust_remote_code=True,
)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
print("模型加载完成。")

# --- 2. 路径和配置信息 ---
# 包含国家子文件夹和图像文件夹的基础目录 (请根据您的实际路径修改)
base_data_dir = "." 
# 需要处理的子文件夹列表 (国家名称)
folders_to_process = ["Indonesian", "Korea", "Mongolia", "Vitnamese", "Singapore"]
# 输出文件夹
output_dir = "Output_Json_Kimi_VQA"

# 创建输出文件夹 (如果不存在)
os.makedirs(output_dir, exist_ok=True)

# --- 3. 动态VQA提示词模板 (与您的Qwen代码完全一致) ---
# 注意：每个提示词都加入了“基于图像”或类似表述
VQA_PROMPT_TEMPLATES = {
    "China": {
        "Chinese": "请基于图像回答以下与中国文化相关的问题：\n{question}\n{options}\n这是一个多选题，请先返回所有可能的选项字母，再用中文解释你的选择。",
        "English": "Based on the image, please answer the following question related to Chinese Culture.\n{question}\n{options}\nThis is a multiple-choice question. Please first return all possible option letters, then explain your choice in English."
    },
    "Indonesian": {
        "Indonesian": "Berdasarkan gambar, silakan jawab pertanyaan berikut terkait Budaya Indonesia.\n{question}\n{options}\nIni adalah pertanyaan pilihan ganda. Harap kembalikan semua kemungkinan huruf opsi terlebih dahulu, lalu jelaskan pilihan Anda dalam Bahasa Indonesia.",
        "English": "Based on the image, please answer the following question related to Indonesian Culture.\n{question}\n{options}\nThis is a multiple-choice question. Please first return all possible option letters, then explain your choice in English."
    },
    "Korea": {
        "Korean": "이미지를 바탕으로 다음 한국 문화와 관련된 질문에 답변해 주세요.\n{question}\n{options}\n이것은 객관식 문제입니다. 먼저 가능한 모든 옵션 문자를 반환한 다음, 한국어로 당신의 선택을 설명해 주세요.",
        "English": "Based on the image, please answer the following question related to Korean Culture.\n{question}\n{options}\nThis is a multiple-choice question. Please first return all possible option letters, then explain your choice in English."
    },
    "Mongolia": {
        "Mongolian": "Зурагт үндэслэн Монголын соёлтой холбоотой дараах асуултад хариулна уу.\n{question}\n{options}\nЭнэ бол олон сонголттой асуулт юм. Эхлээд боломжит бүх сонголтын үсгийг буцааж, дараа нь сонголтоо монгол хэлээр тайлбарлана уу.",
        "English": "Based on the image, please answer the following question related to Mongolian Culture.\n{question}\n{options}\nThis is a multiple-choice question. Please first return all possible option letters, then explain your choice in English."
    },
    "Singapore": {
        "English": "Based on the image, please answer the following question related to Singaporean Culture.\n{question}\n{options}\nThis is a multiple-choice question. Please first return all possible option letters, then explain your choice in English.",
        "Malay": "Berdasarkan imej, sila jawab soalan berikut yang berkaitan dengan Budaya Singapura.\n{question}\n{options}\nIni adalah soalan pilihan berganda. Sila kembalikan semua huruf pilihan yang mungkin terlebih dahulu, kemudian jelaskan pilihan anda dalam Bahasa Inggeris.",
        "Chinese": "请基于图像回答以下与新加坡文化相关的问题。\n{question}\n{options}\n这是一个多选题，请先返回所有可能的选项字母，再用英文解释你的选择。",
    },
    "Vitnamese": {
        "Vietnamese": "Dựa vào hình ảnh, vui lòng trả lời câu hỏi sau đây liên quan đến Văn hóa Việt Nam.\n{question}\n{options}\nĐây là một câu hỏi trắc nghiệm. Vui lòng trả về tất cả các chữ cái tùy chọn có thể có trước, sau đó giải thích lựa chọn của bạn bằng tiếng Việt.",
        "English": "Based on the image, please answer the following question related to Vietnamese Culture.\n{question}\n{options}\nThis is a multiple-choice question. Please first return all possible option letters, then explain your choice in English."
    }
}

# 文件夹名到本地语言的映射 (与您的Qwen代码完全一致)
NATIVE_LANGUAGE_MAP = {
    "Indonesian": "Indonesian",
    "Korea": "Korean",
    "Mongolia": "Mongolian",
    "Singapore": "Malay,Chinese",
    "Vitnamese": "Vietnamese"
}


# --- 4. 遍历文件夹和文件进行处理 ---
for folder_name in folders_to_process:
    current_folder_path = os.path.join(base_data_dir, folder_name)

    if not os.path.isdir(current_folder_path):
        print(f"⚠️  警告: 文件夹 '{current_folder_path}' 不存在，已跳过。")
        continue

    print(f"\n📁 开始处理文件夹: {current_folder_path}")

    # 遍历文件夹中的所有文件
    for filename in os.listdir(current_folder_path):
        # 只处理不包含 "Text_Only" 的 VQA JSON 文件
        if "Text_Only" not in filename and filename.endswith(".json"):
            input_path = os.path.join(current_folder_path, filename)
            
            print(f"  ➡️  正在处理VQA文件: {filename}")

            try:
                with open(input_path, "r", encoding="utf-8") as f:
                    data = json.load(f)
            except Exception as e:
                print(f"    ❌ 读取文件失败: {input_path}, 错误: {e}")
                continue

            # --- 确定语言和Prompt模板 ---
            if "English" in filename:
                language = "English"
            else:
                language = NATIVE_LANGUAGE_MAP.get(folder_name, "English")
            
            prompt_template = None # 初始化
            if len(language.split(",")) == 1:
                prompt_template = VQA_PROMPT_TEMPLATES[folder_name][language]
            else:
                for l in language.split(","):
                    if l in filename:
                        prompt_template = VQA_PROMPT_TEMPLATES[folder_name][l]
                        break
            
            if not prompt_template:
                print(f"    ⚠️ 警告: 未能在 '{filename}' 中为语言 '{language}' 找到匹配的模板，将跳过此文件。")
                continue
            
            # --- 逐条处理数据 ---
            for item in tqdm(data, desc=f"  Processing items in {filename}", leave=False):
                try:
                    # 检查并获取图像路径
                    if "Image_path" not in item or not item["Image_path"]:
                        item["kimi_vl_answer"] = "Error: Image_path is missing or empty."
                        continue

                    # 构造完整的图像路径并检查文件是否存在
                    full_image_path = os.path.join(current_folder_path, item["Image_path"])
                    if not os.path.exists(full_image_path):
                        item["kimi_vl_answer"] = f"Error: Image file not found at {full_image_path}"
                        continue
                    
                    # 打开图像
                    image = Image.open(full_image_path).convert("RGB")

                    # 获取问题和选项
                    question = item.get("Question", "").strip()
                    options = [
                        f"A. {str(item.get('Option1', '')).strip()}",
                        f"B. {str(item.get('Option2', '')).strip()}",
                        f"C. {str(item.get('Option3', '')).strip()}",
                        f"D. {str(item.get('Option4', '')).strip()}",
                    ]
                    
                    # 使用VQA模板生成 text_prompt
                    text_prompt = prompt_template.format(
                        question=question, 
                        options="\n".join(options)
                    )
                    
                    # **Kimi-VL 核心修改**: 构造VQA的对话消息格式
                    # Kimi 的 message 格式与 Qwen 类似，但处理器调用方式稍有不同
                    messages = [
                        {
                            "role": "user",
                            "content": [
                                {"type": "image"}, # 图像占位符
                                {"type": "text", "text": text_prompt}
                            ],
                        }
                    ]

                    # 应用聊天模板
                    text = processor.apply_chat_template(
                        messages, 
                        tokenize=False, 
                        add_generation_prompt=True
                    )

                    # **Kimi-VL 核心修改**: 处理器调用方式
                    # 将文本和图像一同传给处理器，注意 `images` 参数接收的是单个Image对象
                    inputs = processor(
                        text=text, 
                        images=image, # 此处传入单个PIL Image对象
                        return_tensors="pt"
                    ).to(model.device)

                    # 模型推理
                    with torch.no_grad():
                        generated_ids = model.generate(**inputs, max_new_tokens=1024)
                    
                    # 解码逻辑与Qwen代码保持一致
                    input_ids_len = inputs.input_ids.shape[1]
                    # Kimi generate返回的也是一个batch，我们取第一个
                    generated_ids_trimmed = generated_ids[0][input_ids_len:]
                    
                    # 解码得到最终的文本输出
                    output_text = processor.decode(
                        generated_ids_trimmed,
                        skip_special_tokens=True,
                        clean_up_tokenization_spaces=False
                    ).strip()

                    # 在原数据项中添加新字段
                    item["kimi_vl_answer"] = output_text

                except Exception as e:
                    item["kimi_vl_answer"] = f"Error: {str(e)}"

            # --- 保存更新后的数据 ---
            base_filename = os.path.splitext(filename)[0]
            output_filename = f"{base_filename}_kimi_answered.json" # 修改输出文件名以区分
            output_path = os.path.join(output_dir, output_filename)

            with open(output_path, "w", encoding="utf-8") as f:
                json.dump(data, f, indent=2, ensure_ascii=False)

            print(f"    ✅ VQA处理完成，结果已保存至: {output_path}")

print(f"\n🎉 所有文件夹的VQA任务处理完毕！所有输出文件已保存到 '{output_dir}' 文件夹中。")