import json

# 输入文件路径
ann_path = "/root/project/datasets/iu_xray/annotation.json"
mimic_format_path = "/root/project/benchmark_data/Visual_Misinterpretation_Hallucination/open-ended/mimic_cxr_open_pairs.json"

# 输出文件
output_path = "/root/project/benchmark_data/Visual_Misinterpretation_Hallucination/open-ended/xray_open_pairs.json"

# 读取 annotation.json
with open(ann_path, "r", encoding="utf-8") as f:
    ann_data = json.load(f)


result = []

# 遍历 annotation.json 中所有数据
num = 0
for item in ann_data.get("train", []) + ann_data.get("val", []) + ann_data.get("test", []):
    if item.get("split") == "test":
        # 构造 mimic 格式
        new_item = {
            "img_name": item.get("image_path", [""])[0],   # 若 image_path 为列表，取第一个
            "img_id": item.get("image_path", [""])[0],
            "question": item.get("question", "Generate a medical report summarizing the key findings in the given Chest X-ray, including the main anatomical structures, relevant measurements(e.g., size, volume), and observed symptoms."),
            "answer": item.get("report", ""),
            "question_type": "open",              # annotation.json 中回答对应字段通常为 "report"
            "qid": num
        }
        num += 1
        result.append(new_item)

# 保存结果
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(result, f, indent=4, ensure_ascii=False)

print(f"Done! Saved {len(result)} test samples to {output_path}")