# We provide two implementations (objective_optimization):
#   1. prompt_last: the method of ROME's (https://arxiv.org/abs/2202.05262) original paper, which calculates nll loss through the last token of the input.
#   2. target_new: the standard autoregressive method, using the cross-entropy loss function

alg_name: "FT"
model_name: meta-llama/Llama-2-7b-chat-hf
device: 0
layers: [4, 5, 6, 7, 8]
num_steps: 25
batch_size: 3
max_length: 40
lr: 7e-4
weight_decay: 0
kl_factor: 0
norm_constraint: 5e-4
grad_norm_constraint: 5e-4
num_return_sequences: 1
max_new_tokens: 3
static_target: False
sample_with_context: True
target_update_interval: 1
temperature: 100.0
print_kl: True

# In our survey paper(https://arxiv.org/abs/2401.01286)
# "prompt_last" corresponds to the results of FT-L.
# "target_new" corresponds to the results of FT-M.
objective_optimization: "prompt_last"
rewrite_module_tmp: "model.layers.{}.mlp.down_proj.weight"
layer_module_tmp: "model.layers.{}"
mlp_module_tmp: "model.layers.{}.mlp"
attn_module_tmp: "model.layers.{}.self_attn"
ln_f_module: "model.norm"
lm_head_module: "lm_head"
model_parallel: False
