import os 
import json

train_path = 'livecodebench/execution2/train.jsonl'
trainset = []
with open(train_path, 'r') as f:
    for line in f:
        trainset.append(json.loads(line))
        
print(len(trainset))


seen_function = set()
trainset_onlyone = []
for item in trainset:
    function_name = item['function_name']
    ## check function name is already appear
    if function_name in seen_function:
        continue
    else:
        seen_function.add(function_name)
        trainset_onlyone.append(item)

print(len(trainset_onlyone)) 

with open('livecodebench/execution2/train_filtered.jsonl', 'w') as g:
    for item in trainset_onlyone:
        g.write(json.dumps(item) + "\n")