dataset_path: lukaemon/bbh
output_type: generate_until
test_split: test
doc_to_target: "{{target}}"
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    ignore_case: true
    ignore_punctuation: true
generation_kwargs:
  max_gen_toks: 1024
  until:
    - "</s>"
    - "Q"
    - "\n\n"
  do_sample: true
  temperature: 1.0
repeats: 32
filter_list:
  - name: "score-first"
    filter:
      - function: "regex"
        regex_pattern: "(?<=the answer is )(.*)(?=.)"
      - function: "take_first"
  - name: "maj@8"
    filter:
      - function: "take_first_k"
        k: 8
      - function: "regex"
        regex_pattern: "(?<=the answer is )(.*)(?=.)"
      - function: "majority_vote"
      - function: "take_first"
  - name: "maj@16"
    filter:
      - function: "take_first_k"
        k: 16
      - function: "regex"
        regex_pattern: "(?<=the answer is )(.*)(?=.)"
      - function: "majority_vote"
      - function: "take_first"
  - name: "maj@32"
    filter:
      - function: "take_first_k"
        k: 32
      - function: "regex"
        regex_pattern: "(?<=the answer is )(.*)(?=.)"
      - function: "majority_vote"
      - function: "take_first"
num_fewshot: 3
metadata:
  version: 3.0
