# %%
import json

load_path = '<your_path>/trainset_4096_cot_refined.jsonl'
save_path = '<your_path>/trainset_4096_cot_refined_filtered.jsonl'

with open(load_path,'r') as f:
    lines = f.readlines()
# %%
filtered_lines = []
for line in lines:
    data = json.loads(line)['messages'][-1]['content']
    if 'ground truth' not in data.lower() and 'refine' not in data.lower():
        filtered_lines.append(line)
print(len(filtered_lines))
print(len(lines))
# %%
with open(save_path,'w') as f:
    f.writelines(filtered_lines)
# %%
