icl_tasks:
-
  label: gsm8k
  dataset_uri: eval/local_data/symbolic_problem_solving/gsm8k_prepended_8shot.jsonl
  num_fewshot: [0]
  icl_task_type: generation_task_with_answers
  cot_delimiter: "The answer is "
  continuation_delimiter: "\n\nA:"
  question_prelimiter: ""
  do_normalization: false
  early_stopping_criteria:
  - "\n\n"
  - "Question:"
-
  label: triviaqa_sm_sub
  dataset_uri: eval/local_data/world_knowledge/triviaqa_sm_sub.jsonl
  num_fewshot: [3]
  icl_task_type: generation_task_with_answers
  do_normalization: true
-
  label: svamp
  dataset_uri: eval/local_data/symbolic_problem_solving/svamp.jsonl
  num_fewshot: [5]
  icl_task_type: generation_task_with_answers
  cot_delimiter: "The answer is "
  continuation_delimiter: "\n\nA:"
  question_prelimiter: "Question: "
  do_normalization: false
  early_stopping_criteria:
  - "\n\n"
  - "Question:"
-
  label: jeopardy
  dataset_uri: eval/local_data/world_knowledge/jeopardy_all.jsonl
  num_fewshot: [3]
  icl_task_type: language_modeling
  continuation_delimiter: "\nAnswer: "
  has_categories: true
-
  label: bigbench_qa_wikidata
  dataset_uri: eval/local_data/world_knowledge/bigbench_qa_wikidata.jsonl
  num_fewshot: [3]
  icl_task_type: language_modeling
-
  label: arc_easy
  dataset_uri: eval/local_data/world_knowledge/arc_easy.jsonl
  num_fewshot: [3]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: "
-
  label: arc_challenge
  dataset_uri: eval/local_data/world_knowledge/arc_challenge.jsonl
  num_fewshot: [3, 25]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: "
-
  label: mmlu
  dataset_uri: eval/local_data/world_knowledge/mmlu.jsonl
  num_fewshot: [5]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: "
  has_categories: true
-
  label: copa
  dataset_uri: eval/local_data/commonsense_reasoning/copa.jsonl
  num_fewshot: [0]
  icl_task_type: multiple_choice
-
  label: siqa
  dataset_uri: eval/local_data/commonsense_reasoning/siqa.jsonl
  num_fewshot: [3]
  icl_task_type: multiple_choice
-
  label: commonsense_qa
  dataset_uri: eval/local_data/commonsense_reasoning/commonsense_qa.jsonl
  num_fewshot: [0]
  icl_task_type: multiple_choice
-
  label: piqa
  dataset_uri: eval/local_data/commonsense_reasoning/piqa.jsonl
  num_fewshot: [0]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: "
-
  label: openbook_qa
  dataset_uri: eval/local_data/commonsense_reasoning/openbook_qa.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: bigbench_strange_stories
  dataset_uri: eval/local_data/commonsense_reasoning/bigbench_strange_stories.jsonl
  num_fewshot: [0]
  icl_task_type: multiple_choice
-
  label: bigbench_strategy_qa
  dataset_uri: eval/local_data/commonsense_reasoning/bigbench_strategy_qa.jsonl
  num_fewshot: [0]
  icl_task_type: multiple_choice
-
  label: bigbench_dyck_languages
  dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_dyck_languages.jsonl
  num_fewshot: [5]
  icl_task_type: language_modeling
-
  label: lambada_openai
  dataset_uri: eval/local_data/language_understanding/lambada_openai.jsonl
  num_fewshot: [0]
  icl_task_type: language_modeling
-
  label: hellaswag
  dataset_uri: eval/local_data/language_understanding/hellaswag.jsonl
  num_fewshot: [0, 10]
  icl_task_type: multiple_choice
-
  label: winograd
  dataset_uri: eval/local_data/language_understanding/winograd_wsc.jsonl
  num_fewshot: [3]
  icl_task_type: schema
-
  label: winogrande
  dataset_uri: eval/local_data/language_understanding/winogrande.jsonl
  num_fewshot: [5]
  icl_task_type: schema
-
  label: bigbench_elementary_math_qa
  dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_elementary_math_qa.jsonl
  num_fewshot: [1]
  icl_task_type: multiple_choice
-
  label: agi_eval_lsat_ar
  dataset_uri: eval/local_data/symbolic_problem_solving/agi_eval_lsat_ar.jsonl
  num_fewshot: [5]
  icl_task_type: multiple_choice
-
  label: bigbench_cs_algorithms
  dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_cs_algorithms.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: bigbench_operators
  dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_operators.jsonl
  num_fewshot: [3]
  icl_task_type: language_modeling
-
  label: simple_arithmetic_nospaces
  dataset_uri: eval/local_data/symbolic_problem_solving/simple_arithmetic_nospaces.jsonl
  num_fewshot: [5]
  icl_task_type: language_modeling
-
  label: simple_arithmetic_withspaces
  dataset_uri: eval/local_data/symbolic_problem_solving/simple_arithmetic_withspaces.jsonl
  num_fewshot: [5]
  icl_task_type: language_modeling
-
  label: squad
  dataset_uri: eval/local_data/reading_comprehension/squad.jsonl
  num_fewshot: [3]
  icl_task_type: language_modeling
-
  label: agi_eval_lsat_rc
  dataset_uri: eval/local_data/reading_comprehension/agi_eval_lsat_rc.jsonl
  num_fewshot: [5]
  icl_task_type: multiple_choice
-
  label: agi_eval_lsat_lr
  dataset_uri: eval/local_data/reading_comprehension/agi_eval_lsat_lr.jsonl
  num_fewshot: [5]
  icl_task_type: multiple_choice
-
  label: coqa
  dataset_uri: eval/local_data/reading_comprehension/coqa.jsonl
  num_fewshot: [0]
  icl_task_type: language_modeling
-
  label: boolq
  dataset_uri: eval/local_data/reading_comprehension/boolq.jsonl
  num_fewshot: [0]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: "
-
  label: agi_eval_sat_en
  dataset_uri: eval/local_data/reading_comprehension/agi_eval_sat_en.jsonl
  num_fewshot: [5]
  icl_task_type: multiple_choice
