llama2-7b:
  # follow tofu
  hf_key: "NousResearch/Llama-2-7b-chat-hf"
  question_start_tag: "[INST] "
  question_end_tag: " [/INST]"
  answer_tag: ""
  flash_attention2: "true"
  gradient_checkpointing: "true"
llama3-8b:
  hf_key: "meta-llama/Meta-Llama-3-8B-Instruct"
  question_start_tag: "<|start_header_id|>user<|end_header_id|>\n\n"
  question_end_tag: "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
  answer_tag: ""
  flash_attention2: "true"
  gradient_checkpointing: "true"
phi1.5:
  hf_key: "microsoft/phi-1_5"
  question_start_tag: "<|user|>\n"
  question_end_tag: "<|end|>\n<|assistant|>\n"
  answer_tag: ""
  flash_attention2: "true"
  gradient_checkpointing: "true"
phi3-4k:
  hf_key: "microsoft/Phi-3.5-mini-instruct"
  question_start_tag: "<|user|>\n"
  question_end_tag: "<|end|>\n<|assistant|>\n"
  answer_tag: ""
  flash_attention2: "true"
  gradient_checkpointing: "true"
phi2:
  hf_key: "microsoft/phi-2"  # Replace with the correct Hugging Face model identifier for phi2
  question_start_tag: "<|user|>\n"  # Start tag for user input
  question_end_tag: "<|end|>\n<|assistant|>\n"  # End tag for user input and start of assistant response
  answer_tag: ""  # Tag for the assistant's response (if any)
  flash_attention2: "true"  # Enable FlashAttention-2 for faster attention computation
  gradient_checkpointing: "true"  # Enable gradient checkpointing to save memory