name: llama
model:
  pretrained_model_name_or_path: "meta-llama/Llama-3.1-8B"
  cache_dir: "/HF_Cache/" # Set this to where you want to save checkpoint weights 
  return_dict: true
  load_in_8bit: false
  load_in_4bit: false
  device_map: cpu
  low_cpu_mem_usage: true
  torch_dtype: bfloat16
  attn_implementation: flash_attention_2
  rope_theta: 500000.0
  rope_scaling:
    factor: 8.0
    low_freq_factor: 1.0
    high_freq_factor: 4.0
    original_max_position_embeddings: 8192
    rope_type: llama3


attention:
  attention_type: resq_attention
  kvquant:
    nbits: 2
    q_group_size: 124
    residual_length: 128
    highfrac: 0.03125
  apply_rotations: resq,key,value


