

import json


file1_path = ''     # 第一个文件，含完整危险内容和GPT分数
file2_path = ''   # 第二个文件，仅含id, goal, target
file3_path = ''          # 第三个文件，安全问答集
output_path = '' # 输出文件名

def merge_filtered_data(file1_path, file2_path, file3_path, output_path):
    # 加载数据
    with open(file1_path, 'r', encoding='utf-8') as f1:
        full_data = json.load(f1)

    with open(file2_path, 'r', encoding='utf-8') as f2:
        simple_data = json.load(f2)

    with open(file3_path, 'r', encoding='utf-8') as f3:
        safe_data = json.load(f3)

    # 提取 file1 中所有 id
    id_set = set(item['id'] for item in full_data)

    # 筛选出 file2 中 id 在 file1 的项
    filtered_simple = [item for item in simple_data if item['id'] in id_set]

    # 合并筛选后的 file2 和 file3
    combined = filtered_simple + safe_data

    # 保存为新 JSON 文件
    with open(output_path, 'w', encoding='utf-8') as out_file:
        json.dump(combined, out_file, indent=4, ensure_ascii=False)

    print(f"合并完成，输出文件已保存为：{output_path}")

# ✅ 运行函数
merge_filtered_data(file1_path, file2_path, file3_path, output_path)
