[
    {
        "dataset_name": "redpajama_16k",
        "task_name": "redpajama_16k",
        "abs_weight": 1,
        "version": "/user/tc_agi/klara/datasets/redpajama/redpajama_16k_20240516",
        "path": "/data/checkpoints/project_data/redpajama_16k_20240516",
        "transforms": "datasets/redpajama/script.py",
        "nlines": 63407,
        "ave_tokens_per_line": 31749.0455,
        "total_tokens": 20.7,
        "allow_repeat": true
    },
    {
        "dataset_name": "dolma_16k",
        "task_name": "dolma_16k",
        "abs_weight": 0.6,
        "version": "/user/tc_agi/klara/datasets/redpajama/en_dolma_16k_20240517",
        "path": "/data/checkpoints/project_data/en_dolma_16k_20240517",
        "transforms": "datasets/dolma/script.py",
        "nlines": 63407,
        "ave_tokens_per_line": 31749.0455,
        "total_tokens": 20.7,
        "allow_repeat": true
    },
    {
        "dataset_name": "c4_16k",
        "task_name": "c4_16k",
        "abs_weight": 0.1,
        "version": "/user/tc_agi/klara/datasets/redpajama/en_c4_16k_20240517",
        "path": "/data/checkpoints/project_data/en_c4_16k_20240517",
        "transforms": "datasets/c4/script.py",
        "nlines": 63407,
        "ave_tokens_per_line": 31749.0455,
        "total_tokens": 20.7,
        "allow_repeat": true
    },
    {
        "dataset_name": "pile_16k",
        "task_name": "pile_16k",
        "abs_weight": 0.02,
        "version": "/user/tc_agi/klara/datasets/redpajama/pile_v4_16k_20240517",
        "path": "/data/checkpoints/project_data/pile_v4_16k_20240517",
        "transforms": "datasets/pile/script.py",
        "nlines": 63407,
        "ave_tokens_per_line": 31749.0455,
        "total_tokens": 20.7,
        "allow_repeat": true
    }
]
