epoch: 1.25T
dataset: bigdata
num_params: 1B
max_seq_len: 2048
seed: 1
precision: fp32

# Tokenizer
tokenizer:
  # name: [Add name from memory]
  pretrained_model_name_or_path:
  kwargs:
    model_max_length: 2048

model:
  name: open_lm
  # pretrained_model_name_or_path: [add name from memory]
  init_device: cpu
  pretrained: true

load_path: # Add your (optional) Composer checkpoint path here!

device_eval_batch_size: 2

# FSDP config for model sharding
fsdp_config:
  sharding_strategy: FULL_SHARD
  mixed_precision: FULL


icl_tasks:
-
  label: human_eval
  dataset_uri: local_data/programming/human_eval.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  batch_size: 1
  icl_task_type: code_evaluation
  generation_kwargs:
    num_beams: 5
-
  label: human_eval_cpp
  dataset_uri: local_data/programming/processed_human_eval_cpp.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  batch_size: 1
  icl_task_type: code_evaluation
  generation_kwargs:
    num_beams: 5
-
  label: human_eval_js
  dataset_uri: local_data/programming/processed_human_eval_js.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  batch_size: 1
  icl_task_type: code_evaluation
  generation_kwargs:
    num_beams: 5
-
  label: human_eval_return_simple
  dataset_uri: local_data/programming/human_eval_return_simple.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  batch_size: 1
  icl_task_type: code_evaluation
  generation_kwargs:
    num_beams: 5
-
  label: human_eval_return_complex
  dataset_uri: local_data/programming/human_eval_return_complex.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  batch_size: 1
  icl_task_type: code_evaluation
  generation_kwargs:
    num_beams: 5
-
  label: human_eval_25
  dataset_uri: local_data/programming/human_eval-0.25.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  batch_size: 1
  icl_task_type: code_evaluation
  generation_kwargs:
    num_beams: 5
-
  label: human_eval_50
  dataset_uri: local_data/programming/human_eval-0.5.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  batch_size: 1
  icl_task_type: code_evaluation
  generation_kwargs:
    num_beams: 5
-
  label: human_eval_75
  dataset_uri: local_data/programming/human_eval-0.75.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  batch_size: 1
  icl_task_type: code_evaluation
  generation_kwargs:
    num_beams: 5
