# We provide two implementations (objective_optimization):
#   1. prompt_last: the method of ROME's (https://arxiv.org/abs/2202.05262) original paper, which calculates nll loss through the last token of the input.
#   2. target_new: the standard autoregressive method, using the cross-entropy loss function

alg_name: 'FT'
model_name: './hugging_cache/Mistral-7B-v0.1'
device: 0

layers: [21]
num_steps: 25
batch_size: 1
max_length: 40
lr: 5e-4
weight_decay: 0
kl_factor: 0
norm_constraint: 5e-5
# In our survey paper(https://arxiv.org/abs/2401.01286) 
# "prompt_last" corresponds to the results of FT-L.
# "target_new" corresponds to the results of FT-M.
objective_optimization: "prompt_last"
rewrite_module_tmp: 'model.layers.{}.mlp.down_proj.weight'
layer_module_tmp: 'model.layers.{}'
mlp_module_tmp: 'model.layers.{}.mlp'
attn_module_tmp: 'model.layers.{}.self_attn'
ln_f_module: 'model.norm'
lm_head_module: 'lm_head'
model_parallel: false
