import json
from tqdm import tqdm

orig_path = 'path/to/data'
now_path = 'path/to/data'

with open(orig_path, 'r', encoding='utf-8') as f:
    orig_data = json.load(f)

instr_to_chosen = {
    item['instruction']: item['chosen']
    for item in orig_data
    if 'instruction' in item and 'chosen' in item
}

instr_to_reject = {
    item['instruction']: item['rejected']
    for item in orig_data
    if 'instruction' in item and 'rejected' in item
}

with open(now_path, 'r', encoding='utf-8') as f:
    now_data = json.load(f)

matched = 0
for item in tqdm(now_data, desc='Merging chosen fields'):
    question = item.get('question')
    if question in instr_to_chosen:
        item['chosen'] = instr_to_chosen[question]
        item['rejected'] = instr_to_reject[question]
        matched += 1

print(f"✅combine finished: {matched} nums")

save_path = now_path.replace('.json', '_with_original_chosen_rejected.json')
with open(save_path, 'w', encoding='utf-8') as f:
    json.dump(now_data, f, ensure_ascii=False, indent=2)

print(f"📁 save to file: {save_path}")
