from datasets import load_dataset dataset = load_dataset("TIGER-Lab/MMLU-Pro", spt="test") # Function to create the prompt string. def create_prompt(example):  question = example["question"]  opts = example["options"]  # Letters for the options (only using as many as needed).  letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"  # Create the options string e.g., "A. option1, B. option2, ..."  options_str = ", ".join([f"{letters[i]}. {opt}" for i, opt in enumerate(opts)])  prompt = (  "Return your final response within \\boxed{{}} and only include the letter ce "  "(A, B, C, D, E, F, G, H, I, or J) as your final response.\n"  f"Problem: {question}\n"  f"Options: {options_str}"  )  return {"prompt": prompt} # Add the new "prompt" column to the dataset. dataset = dataset.map(create_prompt) dataset.push_to_hub("-dev/mmlu_pro_eval_full") # Shuffle the dataset and uniformly select 500 samples. dataset = dataset.shuffle(seed=42).select(range(500)) dataset.push_to_hub("-dev/mmlu_pro_eval_500bset") 