from datasets import load_dataset
import json

def dataset_engine(repo_id="bigcode/bigcodebench", split="v0.1.3", splits_file="data/splits/bigcodebench_complete_splits_0.5_0.1_0.4.json", return_val=False, return_problem=False):
    raw_data = load_dataset(repo_id, split=split)
    with open(splits_file, 'r') as f:
        splits = json.load(f)
    trainset = []
    valset = []
    testset = []
    for entry in raw_data:
        task_id = entry['task_id']
        entry_dict = {
            'task_id': task_id,
            'prompt': entry['complete_prompt'],
            'code_prompt': entry['code_prompt'],
            'entry_point': entry['entry_point'],
            'canonical_solution': entry['canonical_solution'],
            'test': entry['test']
        }
        if return_problem:
            entry_dict['problem'] = entry
        if task_id in splits['train']:
            trainset.append(entry_dict)
        elif task_id in splits['val']:
            valset.append(entry_dict)
        elif task_id in splits['test']:
            testset.append(entry_dict)
    if return_val:
        return trainset, valset, testset
    else:
        return trainset, testset

