epoch: 1.25T
dataset: bigdata
num_params: 1B
max_seq_len: 2048
seed: 1
precision: fp32

# Tokenizer
tokenizer:
  # name: [Add name from memory]
  pretrained_model_name_or_path:
  kwargs:
    model_max_length: 2048

model:
  name: open_lm
  # pretrained_model_name_or_path: [add name from memory]
  init_device: cpu
  pretrained: true

load_path: # Add your (optional) Composer checkpoint path here!

device_eval_batch_size: 2

# FSDP config for model sharding
fsdp_config:
  sharding_strategy: FULL_SHARD
  mixed_precision: FULL


icl_tasks:
-
  label: human_eval
  dataset_uri: local_data/programming/human_eval.jsonl
  num_fewshot: [0]
  pass_at_k: 1
  batch_size: 1
  icl_task_type: code_evaluation
-
  label: gsm8k
  dataset_uri: local_data/symbolic_problem_solving/gsm8k.jsonl
  num_fewshot: [3]
  icl_task_type: generation_task_with_answers
  cot_delimiter: ' #### '
  continuation_delimiter: "\nA: Let's think step by step. "
  question_prelimiter: "Q: "
-
  label: agi_eval_sat_math
  dataset_uri: local_data/symbolic_problem_solving/agi_eval_sat_math.jsonl
  num_fewshot: [3]
  icl_task_type: generation_task_with_answers
  cot_delimiter: ' #### '
  continuation_delimiter: "\nA: Let's think step by step. "
-
  label: aqua
  dataset_uri: local_data/symbolic_problem_solving/aqua.jsonl
  num_fewshot: [3]
  icl_task_type: generation_task_with_answers
  cot_delimiter: ' #### '
  continuation_delimiter: "\nA: Let's think step by step. "
-
  label: svamp
  dataset_uri: local_data/symbolic_problem_solving/svamp.jsonl
  num_fewshot: [3]
  icl_task_type: generation_task_with_answers
  continuation_delimiter: "\nUsing the formula below:\n"
  cot_delimiter: ' #### '
  question_prelimiter: "Q: "
-
  label: bigbench_elementary_math_qa
  dataset_uri: local_data/symbolic_problem_solving/bigbench_elementary_math_qa.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: bigbench_dyck_languages
  dataset_uri: local_data/symbolic_problem_solving/bigbench_dyck_languages.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: agi_eval_lsat_ar
  dataset_uri: local_data/symbolic_problem_solving/agi_eval_lsat_ar.jsonl
  num_fewshot: [3]
  icl_task_type: multiple_choice
-
  label: bigbench_cs_algorithms
  dataset_uri: local_data/symbolic_problem_solving/bigbench_cs_algorithms.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: bigbench_logical_deduction
  dataset_uri: local_data/symbolic_problem_solving/bigbench_logical_deduction.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: bigbench_operators
  dataset_uri: local_data/symbolic_problem_solving/bigbench_operators.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: bigbench_repeat_copy_logic
  dataset_uri: local_data/symbolic_problem_solving/bigbench_repeat_copy_logic.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: simple_arithmetic_nospaces
  dataset_uri: local_data/symbolic_problem_solving/simple_arithmetic_nospaces.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: simple_arithmetic_withspaces
  dataset_uri: local_data/symbolic_problem_solving/simple_arithmetic_withspaces.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: math_qa
  dataset_uri: local_data/symbolic_problem_solving/math_qa.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: logi_qa
  dataset_uri: local_data/symbolic_problem_solving/logi_qa.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: " # this separates questions from answers