tag:
  - math_word_problems
task: svamp_cot
dataset_path: allenai/lila
dataset_name: svamp_structured
output_type: generate_until
# Calc-SVAMP ships as a single test-like split; keep these for API symmetry.
training_split: test
fewshot_split: test
test_split: test

doc_to_text: |
  Q: {{input}}
  Let's think step by step.
# Use the numeric result string; you could alternatively use result_float
doc_to_target: "{{output_answer}}"

metric_list:
  - metric: !function utils.number_em
    aggregation: mean
    higher_is_better: true

generation_kwargs:
  until:
    - "Q:"
    - "</s>"
    - "<|im_end|>"
  do_sample: true
  temperature: 0.5
  top_p: 0.95

repeats: 5
num_fewshot: 0

filter_list:
  - name: take_first_five
    filter:
      - function: take_first_k
        k: 5