import os from datasets import concatenate_datasets, load_dataset s = ["code", "math", "science", "chat", "safety"] ds = load_dataset("nvidia/Llama-Nemotron-Post-Training-Dataset-v1", "SFT", spt=s) ds = concatenate_datasets(ds) print(len(ds)) print(  len(  ds.filter(  lambda x: x["generator"] == "DeepSeek-R1, Qwen-2.5-72B-Instruct",  num_proc=os.cpu_count(),  )  ) ) print(len(ds.filter(lambda x: x["used_in_training"] == "yes", num_proc=os.cpu_count()))) print(  len(  ds.filter(  lambda x: x["generator"] == "DeepSeek-R1, Qwen-2.5-72B-Instruct"  and x["used_in_training"] == "yes",  num_proc=os.cpu_count(),  )  ) ) 