task: mmlu_pro_only_q
dataset_path: TIGER-Lab/MMLU-Pro
test_split: test
fewshot_split: validation
fewshot_config:
  sampler: first_n
  doc_to_text: !function utils.fewshot_only_question
  doc_to_target: ""
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: !function utils.only_question
doc_to_choice: "{{options}}" # !function utils.get_choices
doc_to_target: "{{answer_index}}"
repeats: 1
num_fewshot: 5
metric_list:
  - metric: acc
    aggregation: mean
    higher_is_better: true
  - metric: acc_norm
    aggregation: mean
    higher_is_better: true
  # - metric: acc_mutual_info
  #   aggregation: mean
  #   higher_is_better: true
metadata:
  version: 2.1
dataset_kwargs:
  trust_remote_code: true

# dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
# output_type: multiple_choice
# test_split: test
# fewshot_split: dev
# fewshot_config:
#   sampler: first_n
# doc_to_text: "Question: {{question.strip()}}\nAnswer:"
# doc_to_choice: "{{choices}}"
# doc_to_target: "{{answer}}"
# metadata:
#   version: 1.0
# dataset_kwargs:
#   trust_remote_code: true
