"""
Generate synthetic datasets for pretraining.
This script generates synthetic datasets for pretraining the model and saves them to disk.
"""

from pathlib import Path

import xarray as xr
from tqdm import tqdm


def main():
    
    path = Path("data/datasets_pretraining/tabpfn")
    files = list(path.glob("*.nc"))

    for file in tqdm(files):
        xr.open_dataset(file)

    path = Path("data/datasets_pretraining/forest")
    files = list(path.glob("*.nc"))

    for file in tqdm(files):
        xr.open_dataset(file)


if __name__ == "__main__":
    main()




