from datasets import load_dataset
import os
import sys

script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(os.path.dirname(script_dir))
sys.path.insert(0, project_root)

os.chdir(project_root)

import rllm
from rllm.data.dataset import DatasetRegistry

def prepare_sudoku_data(split="test"):
    dataset = load_dataset(YOUR_PATH, split=split)

    def preprocess_fn(example, idx):
        return {
            **example,
            "task_type": "sudoku",
            "data_source": "sudoku_annotation",
        }

    dataset = dataset.map(preprocess_fn, with_indices=True)

    print(f"Current working directory: {os.getcwd()}")
    print(f"Registering datasets in: {os.path.abspath('rllm/data/datasets')}")
    
    dataset = DatasetRegistry.register_dataset("sudoku_annotation", dataset, split)
    
    print(f"Datasets registered successfully!")
    print(f"Available datasets: {DatasetRegistry.get_dataset_names()}")
    print(f"Sudoku bench splits: {DatasetRegistry.get_dataset_splits('sudoku_annotation')}")
    
    return dataset


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--split", type=str, default="test")
    args = parser.parse_args()
    dataset = prepare_sudoku_data(args.split)
    print(dataset)
