from datasets import load_dataset from reasoning_commons import decontaminate, deepseek_r1, map_to_sharegpt ds = load_dataset("SynthLabsAI/Big-Math-RL-Verified", spt="train") ds = ds.filter(lambda x: x["source"] == "big_math") # 47_010 ds = ds.rename_column("problem", "question") ds = decontaminate(ds) ds = ds.shuffle(seed=42) ds = ds.take(31_600) ds = deepseek_r1(ds) ds = map_to_sharegpt(ds) ds.push_to_hub("/a_1_math_big-math-reformulated") 