import os
import json
from utils.util import get_data, load_config

# config = load_config('configs/bbox-dev/LaMP_2_movie.yaml')
# config['seed']=42
# data = get_data(config)
# id_list = []
# for d in data:
#     if d['id'] not in id_list:
#         id_list.append(d['id'])
# print(len(id_list))
# old_id_list = id_list

task = 'LaMP_2_movie'
split = 'train'
data_path = "data/LaMP/{}/{}/{}_new.json".format(task, split, split)
source_list = []
target_list = []
user_list = {}
with open(data_path, 'r') as f:
    for line in f:
        data = json.loads(line)
        user_list[data['source']+data['target']] = data['id']

data_path = 'data/{}/generation/openai/42_train.jsonl'.format(task)
new_data_list = []
id_list = []
seen_id = []
unseen_id = []
with open(data_path, 'r') as f:
    for line in f:
        data = json.loads(line)
        data['id'] = user_list[data['source']+data['target']]
        if data['id'] not in id_list:
            id_list.append(data['id'])
            # if data['id'] in id_list:
            #     seen_id.append(data['id'])
            # else:
            #     unseen_id.append(data['id'])
        new_data_list.append(data)
# print(len(id_list))
# print(len(seen_id), seen_id)
# print(len(unseen_id), unseen_id)
# input()
output_path = 'data/{}/generation/openai/42_train_new.jsonl'.format(task)
with open(output_path, 'a') as f:
    for data in new_data_list:
        json.dump(data, f)
        f.write('\n')