task: mmlu_pro_both_qa
dataset_path: TIGER-Lab/MMLU-Pro
test_split: test
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: !function utils.both_qa
doc_to_choice: !function utils.get_choices
doc_to_target: "{{answer_index}}"
repeats: 1
metric_list:
  - metric: acc
    aggregation: mean
    higher_is_better: true
  - metric: acc_norm
    aggregation: mean
    higher_is_better: true
  # - metric: acc_mutual_info
  #   aggregation: mean
  #   higher_is_better: true
metadata:
  version: 2.1
dataset_kwargs:
  trust_remote_code: true

# dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
# output_type: multiple_choice
# test_split: test
# fewshot_split: dev
# fewshot_config:
#   sampler: first_n
# doc_to_text: "Question: {{question.strip()}}\nAnswer:"
# doc_to_choice: "{{choices}}"
# doc_to_target: "{{answer}}"
# metadata:
#   version: 1.0
# dataset_kwargs:
#   trust_remote_code: true

# doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
# # doc_to_choice: ["A", "B", "C", "D"]
# # doc_to_target: answer
# doc_to_choice: ["A. {{choices[0]}}", "B. {{choices[1]}}", "C. {{choices[2]}}", "D. {{choices[3]}}"]
# doc_to_target: "{{answer}}"
# metric_list:
