import json

input_file = "/root/project/benchmark_data/Visual_Misinterpretation_Hallucination/close-ended/slake_close_pairs.json"
xray_output = "/root/project/benchmark_data/Visual_Misinterpretation_Hallucination/close-ended/slake_close_pairs_xray.json"
other_output = "/root/project/benchmark_data/Visual_Misinterpretation_Hallucination/close-ended/slake_close_pairs_other.json"

# 读取原始 JSON 文件
with open(input_file, "r", encoding="utf-8") as f:
    data = json.load(f)

# 分组
xray_data = []
other_data = []

for item in data:
    if str(item.get("modality", "")).strip().lower() in ["x-ray", "xray", "x_ray"]:
        xray_data.append(item)
    else:
        other_data.append(item)

# 保存结果
with open(xray_output, "w", encoding="utf-8") as f:
    json.dump(xray_data, f, ensure_ascii=False, indent=4)

with open(other_output, "w", encoding="utf-8") as f:
    json.dump(other_data, f, ensure_ascii=False, indent=4)

print(f"X-Ray questions: {len(xray_data)} saved to {xray_output}")
print(f"Other modality questions: {len(other_data)} saved to {other_output}")
