import json, os




def trans(item):
    dpo_data = {
        "id": item['paired_data']['id'],
        "chosen_item": None,
        "rejected_item": {
            "traj": None
        }
    }
    new_item = {}
    new_item['criteria_list'] = item['criteria']['0']
    new_item['judge'] = item['judge']
    new_item['judge_pair'] = item['judge_pair']
    new_item['paired_data'] = item['paired_data']
    new_item['answer'] = item['answer']

    dpo_data['rejected_item']['traj'] = new_item
    return dpo_data

if __name__ == "__main__":
    FILTER_DATA_DIR="/data//GRM-Omni-v1/0923_omni_results/0922_filter_image_undstanding_scalar_rm"
    judge_file = os.path.join(FILTER_DATA_DIR, "judge.jsonl")
    filter_file = os.path.join(FILTER_DATA_DIR, "filted_data.jsonl")
    judge_items = [json.loads(line) for line in open(judge_file)]
    error_items = [json.loads(line) for line in open(filter_file)]
    
    error_ids = set()
    for item in error_items:
        error_ids.add(item['id'])

    dpo_pool_items = []
    for item in judge_items:
        if item['paired_data']['id'] not in error_ids: continue
        dpo_pool_items.append(trans(item))
    
    dpo_pool_file = os.path.join(FILTER_DATA_DIR, "DPO_pool.jsonl")
    # dpo_pool_file="/data//GRM-Omni-v1/tmp/dataset/dpo_pool_0922.jsonl"
    with open(dpo_pool_file, 'w') as fw:
        for item in dpo_pool_items:
            fw.write(json.dumps(item)+'\n')
        
