tag:
  - math_word_problems
task: multiarith_cot
dataset_path: ChilleD/MultiArith
dataset_name: default
output_type: generate_until
training_split: train
fewshot_split: train
test_split: test

doc_to_text: |
  Q: {{question}}
  A: Let's think step by step.
doc_to_target: "{{final_ans}}"

metric_list:
  - metric: exact_match
    aggregation: !function utils.grouped_any_mean
    higher_is_better: true
    ignore_case: true
    ignore_punctuation: false
    regexes_to_ignore:
      - ","
      - "\\$"
      - "\\.$"
      - "^Answer:\\s*"
      - "^The answer is\\s*"
  - metric: exact_match_stderr
    aggregation: !function utils.passk_stderr_as_metric
    higher_is_better: false

generation_kwargs:
  until:
    - "Q:"
    - "</s>"
    - "<|im_end|>"
  do_sample: true
  temperature: 0.5
  top_p: 0.95

repeats: 5
num_fewshot: 0

filter_list:
  - name: "strict-match"
    filter:
      - function: "regex"
        # Matches either "Answer: <num>" or "The answer is <num>"
        regex_pattern: "(?:Answer:|The answer is)\\s*(\\-?[0-9][0-9,]*(?:\\.\\d+)?)"
      - function: "take_first"
  - name: "flexible-extract"
    filter:
      - function: "regex"
        group_select: -1
        regex_pattern: "(-?[$0-9.,]{2,})|(-?[0-9]+(?:\\.[0-9]+)?)"
      - function: "take_first"