model_name: zephyr
model_path: HuggingFaceH4/zephyr-7b-beta
num_layer: 0
loss_type: "rmu"
data_type: bfloat16
learning_rate: ${lr}
lr_scheduler_type: linear
weight_decay: 0.01
warmup_ratio: 0.1

remember_weight: 100
steering_coeff: 6.5
layerid: 7
layerids: [5,6,7]

# Default is no lora
Lora:
  r: 0
  alpha: 32
  dropout: 0.05
  bias: none
  task_type: CAUSAL_LM