#%%
import json
import random
import os

def merge_and_shuffle_jsonl(file1, file2, output_file, seed=None):
    data1 = []
    with open(file1, 'r', encoding='utf-8') as f:
        for line in f:
            data1.append(json.loads(line.strip()))
    
    data2 = []
    with open(file2, 'r', encoding='utf-8') as f:
        for line in f:
            data2.append(json.loads(line.strip()))
    
    merged_data = data1 + data2
    if seed is not None:
        random.seed(seed)
    random.shuffle(merged_data)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        for item in merged_data:
            f.write(json.dumps(item, ensure_ascii=False) + '\n')
    
    print(f"合并完成！共处理 {len(data1)} + {len(data2)} = {len(merged_data)} 条数据，已保存至 {output_file}")

if __name__ == "__main__":
    file_a = "../data/train_data/Toolcall_Orion_Zh_En.jsonl"
    file_b = "../data/train_data/Toolcall_Open_Report.jsonl"
    output = "../data/train_data/Toolcall_Research_train.jsonl"
    merge_and_shuffle_jsonl(file_a, file_b, output, seed=42)