dataset_path: Idavidrein/gpqa
tag: gpqa
output_type: generate_until
process_docs: !function utils.process_docs
training_split: train
# Because huggingface dataset only has train split
validation_split: train
test_split: null
# doc_to_text: "What is the correct answer to this question:{{Question}}\nChoices:\n(A) {{choice1}}\n(B) {{choice2}}\n(C) {{choice3}}\n(D) {{choice4}}\nLet's think step by step: "
doc_to_target: answer
doc_to_text: !function utils.doc_to_text
filter_list:
  - name: "strict-match"
    filter:
      - function: "regex"
        regex_pattern: "(?<=answer is )(.*)(?=.)"
      - function: "take_first"
  - name: "custom-extract"
    filter:
      - function: "freeform_regex"
        regex_pattern: answer is \((.*?)\)
generation_kwargs:
  until:
    - "</s>"
  do_sample: false
  temperature: 0.0
num_fewshot: 0
repeats: 5
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    ignore_case: true
    ignore_punctuation: true
metadata:
  version: 1.0
