from datasets import load_dataset dataset = load_dataset("-dev/decontaminate_stratos_numina", spt="train") dataset = dataset.filter(lambda x: x["matched_dataset"] != "") print(len(dataset)) dataset.select_columns(  ["problem", "matched_dataset", "matched_text", "match_score"] ).push_to_hub("-dev/decontaminate_stratos_numina_filtered") dataset = load_dataset("-dev/decontaminate_stratos_apps", spt="train") dataset = dataset.filter(lambda x: x["matched_dataset"] != "") print(len(dataset)) dataset.select_columns(  ["question", "matched_dataset", "matched_text", "match_score"] ).push_to_hub("-dev/decontaminate_stratos_apps_filtered") dataset = load_dataset("-dev/decontaminate_stratos_taco", spt="train") dataset = dataset.filter(lambda x: x["matched_dataset"] != "") dataset.select_columns(  ["question", "matched_dataset", "matched_text", "match_score"] ).push_to_hub("-dev/decontaminate_stratos_taco_filtered") dataset.push_to_hub("-dev/decontaminate_stratos_taco_filtered") print(len(dataset)) 