from datasets import concatenate_datasets, load_dataset code_stratos_scale = load_dataset(  "-dev/code_stratos_scale_share_gpt", spt="train" ) math_stratos_scale = load_dataset(  "-dev/math_stratos_scale_judged_and_annotated", spt="train" ) science_and_puzzle_stratos_scale = load_dataset(  "-dev/science_and_puzzle_stratos_scale_annotated_with_answers",  spt="train", ) # filter for verification science_and_puzzle_stratos_scale = science_and_puzzle_stratos_scale.filter(  lambda x: x["correct"] ) math_stratos_scale = math_stratos_scale.filter(lambda x: x["correct"]) # remove long code examples (longer than context length) # code_stratos_scale = code_stratos_scale.filter(lambda x: len(x["deepseek_solution"]) < 16384) stratos_scale = concatenate_datasets(  [code_stratos_scale, math_stratos_scale, science_and_puzzle_stratos_scale] ) stratos_scale.push_to_hub("-dev/stratos_verified_mix") 