alg_name: "FT"
model_name: ./hugging_cache/chatglm2-6b
device: 1

layers: [27]
num_steps: 40
batch_size: 1
max_length: 40
lr: 5e-4
weight_decay: 0
kl_factor: 0
norm_constraint: false
objective_optimization: "prompt_last"
# In our survey paper(https://arxiv.org/abs/2401.01286) 
# "prompt_last" corresponds to the results of FT-L.
# "target_new" corresponds to the results of FT-M.
rewrite_module_tmp: "transformer.encoder.layers.{}.mlp.dense_4h_to_h"
layer_module_tmp: "transformer.encoder.layers.{}"
mlp_module_tmp: "transformer.encoder.layers.{}.mlp"
attn_module_tmp: "transformer.encoder.layers.{}.self_attention"
ln_f_module: "transformer.encoder.final_layernorm"
lm_head_module: "transformer.output_layer"
model_parallel: false
