# Model
model_name: /yanjianhao/huggingface/llama2-7b
model_class: LlamaForCausalLM
tokenizer_class: LlamaTokenizer
tokenizer_name: /yanjianhao/huggingface/llama2-7b
model_parallel: true
inner_params:
- model.layers.29.mlp.gate_proj.weight
- model.layers.29.mlp.up_proj.weight
- model.layers.29.mlp.down_proj.weight
- model.layers.30.mlp.gate_proj.weight
- model.layers.30.mlp.up_proj.weight
- model.layers.30.mlp.down_proj.weight
- model.layers.31.mlp.gate_proj.weight
- model.layers.31.mlp.up_proj.weight
- model.layers.31.mlp.down_proj.weight

archive: null

# Method
alg: MEND
lr: 1e-6
edit_lr: 1e-4
lr_lr: 1e-4
seed: 42
cedit: 0.1
cloc: 1.0
cbase: 1.0
dropout: 0.0
train_base: False
no_grad_layers: null
one_sided: False
n_hidden: 1
hidden_dim: null
init: id
norm: True
combine: True
x_only: False
delta_only: False
act: relu
rank: 1920
mlp_class: IDMLP
shared: True

# Train
device: cuda:0
batch_size: 1
model_save_pt: 5000
silent: False
max_epochs: 10
# max_iters: 100000
log_interval: 100
eval_log_interval: 100
final_eval: True
val_interval: 100
early_stop_patience: 100
# early_stop_key: "edit/acc_val"
early_stop_key: loss/edit_val
eval_only: False
half: False
debug: False
save: False
verbose: True

val_batch_size: 5
accumulate_bs: 10
val_steps: 500 # only for debug
opt: Adam
grad_clip: 100.

# Output

results_dir: ./results_llama_stereoset