dataset_path: TIGER-Lab/MMLU-Pro
test_split: test
fewshot_split: validation
fewshot_config:
  sampler: first_n
  doc_to_text: !function utils.fewshot_to_text
  doc_to_target: ""
output_type: generate_until
doc_to_text: !function utils.doc_to_text
doc_to_target: answer
filter_list:
  - name: "custom-extract"
    filter:
      - function: "regex"
        regex_pattern: 'answer is \(?([ABCDEFGHIJ])\)?'
        # regex_pattern: r".*[aA]nswer:\s*([A-J])",
      - function: "take_first"
generation_kwargs:
  until:
    - "</s>"
    - "Q:"
    - "<|im_end|>"
  do_sample: True
  temperature: 0.6
num_fewshot: 5
repeats: 32
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    ignore_case: true
    ignore_punctuation: true
filter_list:
  - name: "custom-extract"
    filter:
      - function: "regex"
        regex_pattern: 'answer is \(?([ABCDEFGHIJ])\)?'
      - function: "take_first"
  - name: "maj@8"
    filter:
      - function: "take_first_k"
        k: 8
      - function: "regex"
        regex_pattern: 'answer is \(?([ABCDEFGHIJ])\)?'
      - function: "majority_vote"
      - function: "take_first"
  - name: "maj@16"
    filter:
      - function: "take_first_k"
        k: 16
      - function: "regex"
        regex_pattern: 'answer is \(?([ABCDEFGHIJ])\)?'
      - function: "majority_vote"
      - function: "take_first"
  - name: "maj@32"
    filter:
      - function: "take_first_k"
        k: 32
      - function: "regex"
        regex_pattern: 'answer is \(?([ABCDEFGHIJ])\)?'
      - function: "majority_vote"
      - function: "take_first"
metadata:
  version: 1.0
