{
    "train_data": [
        {
            "type": "hfds",
            "prefix": "dolma",
            "text_key": "text",
            "weight": 10,
            "data_dir": "/fs/XXXX-37/llm-pretraining/datasets/raw/dolma_v1_6_sample/train"
        }
    ],
    "val_data": [
        {
            "type": "hfds",
            "prefix": "splitted_cosmopedia",
            "text_key": "text",
            "weight": 10,
            "data_dir": "/fs/XXXX-37/llm-pretraining/llm-retrieval/data/splitted_cosmopedia/val_ood"
        }
    ]
}