import json
import torch
from tqdm import tqdm
import numpy as np

path = 'path/to/data'
threshold = 0.85

with open(path, 'r', encoding='utf-8') as f:
    datas = json.load(f)

raw_scores = np.array([item['chosen_hack_score'] for item in datas], dtype=np.float32)
mean = raw_scores.mean()
std = raw_scores.std() + 1e-8
normalized_scores = (raw_scores - mean) / std

sigmoid_scores = 1 / (1 + np.exp(-normalized_scores))

chose_num = []
for item, score in tqdm(zip(datas,sigmoid_scores), total=len(datas),desc='sigmoid'):
    if score > threshold:
        chose_num.append(item['chosen_win'])
    else:
        chose_num.append(-1)

assert len(chose_num) == len(datas), "error!"

chosen_responses = []

for item, choice in tqdm(zip(datas, chose_num),total=len(datas),desc='building'):
    if choice == 0 or choice == 1:
        chosen = item['response'][choice]
        cur = {
            "instruction":item['question'],
            "chosen":chosen,
            "rejected":item['rejected']
        }
        chosen_responses.append(cur)
    elif choice == -1:
        continue  
save_path = path.replace('old','new')

print(f"filter {len(chosen_responses)} nums")
with open(save_path, 'w', encoding='utf-8') as f:
    json.dump(chosen_responses, f, ensure_ascii=False, indent=2)

print(f"📁 save to file:{save_path}")
    


