from datasets import load_dataset
import json


def dataset_engine(repo_id="KodCode/kodCode-V1", splits_file="data/splits/kodcode_complete_splits_1000.json", return_val=False, return_problem=False):
    raw_data = load_dataset(repo_id, split="train")
    with open(splits_file, 'r') as f:
        splits = json.load(f)
    trainset = []
    valset = []
    testset = []
    for entry in raw_data:
        task_id = entry['question_id']
        entry_dict = {
            'task_id': task_id,
            'prompt': entry['question'],
            'canonical_solution': entry['solution'],
            'test': entry['test']
        }
        if return_problem:
            entry_dict['problem'] = {
                'prompt': entry['question'],
                'canonical_solution': entry['solution'],
                'test': entry['test']
            }
        if task_id in splits['train']:
            trainset.append(entry_dict)
        elif task_id in splits['val']:
            valset.append(entry_dict)
        elif task_id in splits['test']:
            testset.append(entry_dict)
    if return_val:
        return trainset, valset, testset
    else:
        return trainset, testset

