model_args:
  pretrained_model_name_or_path: "allenai/OLMo-2-1124-7B"
  attn_implementation: 'flash_attention_2'
  torch_dtype: bfloat16
  revision: "stage1-step928000-tokens3893B"
  # load_in_8bit: true
  # device_map: "auto"
tokenizer_args:
  pretrained_model_name_or_path: "allenai/OLMo-2-1124-7B"
  revision: "stage1-step928000-tokens3893B"

template_args:  # Used in creating prompts for the dataset. See src/data/utils.py#preprocess_chat_instance.
  apply_chat_template: False
  user_start_tag: "Question: "
  user_end_tag: "\n"
  asst_start_tag: "Answer: "
  asst_end_tag: "\n\n"
