---
root_dir: null

icl_tasks:
  - label: gsm8k
    dataset_uri: eval/local_data/symbolic_problem_solving/gsm8k_prepended_8shot.jsonl
    num_fewshot: [0]
    icl_task_type: generation_task_with_answers
    cot_delimiter: 'The answer is '
    continuation_delimiter: "\n\nA:"
    question_prelimiter: ''
    do_normalization: false
    early_stopping_criteria: ["\n\n", 'Question:']
  - label: triviaqa_sm_sub
    dataset_uri: eval/local_data/world_knowledge/triviaqa_sm_sub.jsonl
    num_fewshot: [3]
    icl_task_type: generation_task_with_answers
    do_normalization: true
  - label: svamp
    dataset_uri: eval/local_data/symbolic_problem_solving/svamp.jsonl
    num_fewshot: [5]
    icl_task_type: generation_task_with_answers
    cot_delimiter: 'The answer is '
    continuation_delimiter: "\n\nA:"
    question_prelimiter: 'Question: '
    do_normalization: false
    early_stopping_criteria: ["\n\n", 'Question:']
  - label: jeopardy
    dataset_uri: eval/local_data/world_knowledge/jeopardy_all.jsonl
    num_fewshot: [3]
    icl_task_type: language_modeling
    continuation_delimiter: "\nAnswer: "
    has_categories: true
  - label: bigbench_qa_wikidata
    dataset_uri: eval/local_data/world_knowledge/bigbench_qa_wikidata.jsonl
    num_fewshot: [3]
    icl_task_type: language_modeling
  - label: arc_easy
    dataset_uri: eval/local_data/world_knowledge/arc_easy.jsonl
    num_fewshot: [3]
    icl_task_type: multiple_choice
    continuation_delimiter: "\nAnswer: "
  - label: arc_challenge
    dataset_uri: eval/local_data/world_knowledge/arc_challenge.jsonl
    num_fewshot: [3, 25]
    icl_task_type: multiple_choice
    continuation_delimiter: "\nAnswer: "
  - label: mmlu
    dataset_uri: eval/local_data/world_knowledge/mmlu.jsonl
    num_fewshot: [5]
    icl_task_type: multiple_choice
    continuation_delimiter: "\nAnswer: "
    has_categories: true
  - label: copa
    dataset_uri: eval/local_data/commonsense_reasoning/copa.jsonl
    num_fewshot: [0]
    icl_task_type: multiple_choice
  - label: siqa
    dataset_uri: eval/local_data/commonsense_reasoning/siqa.jsonl
    num_fewshot: [3]
    icl_task_type: multiple_choice
  - label: commonsense_qa
    dataset_uri: eval/local_data/commonsense_reasoning/commonsense_qa.jsonl
    num_fewshot: [0]
    icl_task_type: multiple_choice
  - label: piqa
    dataset_uri: eval/local_data/commonsense_reasoning/piqa.jsonl
    num_fewshot: [0]
    icl_task_type: multiple_choice
    continuation_delimiter: "\nAnswer: "
  - label: openbook_qa
    dataset_uri: eval/local_data/commonsense_reasoning/openbook_qa.jsonl
    num_fewshot: [10]
    icl_task_type: multiple_choice
  - label: bigbench_strange_stories
    dataset_uri: eval/local_data/commonsense_reasoning/bigbench_strange_stories.jsonl
    num_fewshot: [0]
    icl_task_type: multiple_choice
  - label: bigbench_strategy_qa
    dataset_uri: eval/local_data/commonsense_reasoning/bigbench_strategy_qa.jsonl
    num_fewshot: [0]
    icl_task_type: multiple_choice
  - label: bigbench_dyck_languages
    dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_dyck_languages.jsonl
    num_fewshot: [5]
    icl_task_type: language_modeling
  - label: lambada_openai
    dataset_uri: eval/local_data/language_understanding/lambada_openai.jsonl
    num_fewshot: [0]
    icl_task_type: language_modeling
  - label: hellaswag
    dataset_uri: eval/local_data/language_understanding/hellaswag.jsonl
    num_fewshot: [0, 10]
    icl_task_type: multiple_choice
  - label: winograd
    dataset_uri: eval/local_data/language_understanding/winograd_wsc.jsonl
    num_fewshot: [3]
    icl_task_type: schema
  - label: winogrande
    dataset_uri: eval/local_data/language_understanding/winogrande.jsonl
    num_fewshot: [5]
    icl_task_type: schema
  - label: bigbench_elementary_math_qa
    dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_elementary_math_qa.jsonl
    num_fewshot: [1]
    icl_task_type: multiple_choice
  - label: agi_eval_lsat_ar
    dataset_uri: eval/local_data/symbolic_problem_solving/agi_eval_lsat_ar.jsonl
    num_fewshot: [5]
    icl_task_type: multiple_choice
  - label: bigbench_cs_algorithms
    dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_cs_algorithms.jsonl
    num_fewshot: [10]
    icl_task_type: language_modeling
  - label: bigbench_operators
    dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_operators.jsonl
    num_fewshot: [3]
    icl_task_type: language_modeling
  - label: simple_arithmetic_nospaces
    dataset_uri: eval/local_data/symbolic_problem_solving/simple_arithmetic_nospaces.jsonl
    num_fewshot: [5]
    icl_task_type: language_modeling
  - label: simple_arithmetic_withspaces
    dataset_uri: eval/local_data/symbolic_problem_solving/simple_arithmetic_withspaces.jsonl
    num_fewshot: [5]
    icl_task_type: language_modeling
  - label: squad
    dataset_uri: eval/local_data/reading_comprehension/squad.jsonl
    num_fewshot: [3]
    icl_task_type: language_modeling
  - label: agi_eval_lsat_rc
    dataset_uri: eval/local_data/reading_comprehension/agi_eval_lsat_rc.jsonl
    num_fewshot: [5]
    icl_task_type: multiple_choice
  - label: agi_eval_lsat_lr
    dataset_uri: eval/local_data/reading_comprehension/agi_eval_lsat_lr.jsonl
    num_fewshot: [5]
    icl_task_type: multiple_choice
  - label: coqa
    dataset_uri: eval/local_data/reading_comprehension/coqa.jsonl
    num_fewshot: [0]
    icl_task_type: language_modeling
  - label: boolq
    dataset_uri: eval/local_data/reading_comprehension/boolq.jsonl
    num_fewshot: [0]
    icl_task_type: multiple_choice
    continuation_delimiter: "\nAnswer: "
  - label: agi_eval_sat_en
    dataset_uri: eval/local_data/reading_comprehension/agi_eval_sat_en.jsonl
    num_fewshot: [5]
    icl_task_type: multiple_choice
