task: mmlu_pro_both_verify
dataset_path: XXXX-7/MMLU-Pro-Verify
test_split: train
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: !function utils.both_verify_prompt
# doc_to_choice: ["A. True", "B. False"]
doc_to_choice: ["True", "False"]
doc_to_target: !function utils.get_answer_index
repeats: 1
metric_list:
  - metric: acc
    aggregation: mean
    higher_is_better: true
  - metric: acc_norm
    aggregation: mean
    higher_is_better: true
  # - metric: acc_mutual_info
  #   aggregation: mean
  #   higher_is_better: true
metadata:
  version: 2.1
dataset_kwargs:
  trust_remote_code: true

# dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
# output_type: multiple_choice
# test_split: test
# fewshot_split: dev
# fewshot_config:
#   sampler: first_n
# doc_to_text: "Question: {{question.strip()}}\nAnswer:"
# doc_to_choice: "{{choices}}"
# doc_to_target: "{{answer}}"
# metadata:
#   version: 1.0
# dataset_kwargs:
#   trust_remote_code: true
