tag: gpqa
task: gpqa_diamond
dataset_path: Idavidrein/gpqa
dataset_name: gpqa_diamond
output_type: generate_until
process_docs: !function utils.process_docs
training_split: train
# Because huggingface dataset only has train split
validation_split: train
test_split: null
doc_to_text: "What is the correct answer to this question: {{Question.strip()}}\nChoices:\n(A) {{choice1}}\n(B) {{choice2}}\n(C) {{choice3}}\n(D) {{choice4}}\nPlease think step by step and answer the question with \"The answer is (<Choice>).\""
doc_to_target: answer
generation_kwargs:
  max_gen_toks: 1024
  do_sample: true
  temperature: 0.1
  until: []
repeats: 4
num_fewshot: 0
filter_list:
  - name: "flexible-extract"
    filter:
      - function: "multi_choice_regex"
        group_select: -1
        ignore_case: true
        ignore_punctuation: true
        regex_pattern: "(\\([A-Z]\\))"
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    ignore_case: true
    ignore_punctuation: true
metadata:
  version: 1.0
