task: supergpqa
dataset_path: m-a-p/SuperGPQA
# dataset_name: gpqa_diamond
# task: gpqa_free
test_split: train
output_type: generate_until
process_docs: !function utils.process_docs
doc_to_text: !function utils.doc_to_text
doc_to_target: answer_letter
filter_list:
  - name: "custom-extract"
    filter:
      - function: "regex"
        # regex_pattern: '<answer>\s*([ABCDEFGHIJ])\s*</answer>|<answer>[\s\n]*([ABCDEFGHIJ])[\s\n]*</answer>'
        regex_pattern: 'answer is \(?([ABCDEFGHIJ])\)?'
      - function: "take_first"
generation_kwargs:
  until:
    - "</s>"
    - "Q:"
    - "<|im_end|>"
  max_gen_toks: 2048
  do_sample: false
  temperature: 0.0
num_fewshot: 0
repeats: 1
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    ignore_case: true
    ignore_punctuation: true
metadata:
  version: 2.1
