from datasets import load_dataset

ds = load_dataset("HuggingFaceFW/fineweb-edu", name="sample-350BT", split="train", streaming=False)
ds.save_to_disk("datasets/fineweb_edu-350b")

ds = load_dataset("nvidia/Nemotron-CC-Math-v1", "4plus")
ds.save_to_disk("datasets/Nemotron-CC-Math-v1-4plus")

ds = load_dataset("nvidia/Nemotron-Pretraining-SFT-v1", "Nemotron-SFT-General")
ds.save_to_disk("datasets/Nemotron-Pretraining-SFT-v1-General")

ds = load_dataset("nvidia/Nemotron-Pretraining-SFT-v1", "Nemotron-SFT-MATH")
ds.save_to_disk("datasets/Nemotron-Pretraining-SFT-v1-Math")
