name: math_watermark_eval
n_samples: 1000
ppl_model: Qwen/Qwen2.5-32B-Instruct
dataset_config:
  path: "openai/gsm8k"
  name: "main"
  split: "test"
  data_fields:
    - question
    - answer