alg_name: "MEMIT"
model_name: "./pythia-70m-deduped/step3000"
stats_dir: "./data/stats"
device: 0
layers: [0, 1, 2, 3, 4, 5]
clamp_norm_factor: 4
layer_selection: "all"
fact_token: "subject_last"
v_num_grad_steps: 25
v_lr: 5e-1
v_loss_layer: 5
v_weight_decay: 1e-3
kl_factor: 0.0625
mom2_adjustment: true
mom2_update_weight: 15000
# rewrite_module_tmp: "model.layers.{}.mlp.down_proj"
# layer_module_tmp: "model.layers.{}"
# mlp_module_tmp: "model.layers.{}.mlp"
# attn_module_tmp: "model.layers.{}.self_attn"
# ln_f_module: "model.norm"
# lm_head_module: "lm_head"
rewrite_module_tmp: "gpt_neox.layers.{}.mlp.dense_4h_to_h"
layer_module_tmp: "gpt_neox.layers.{}"
mlp_module_tmp: "gpt_neox.layers.{}.mlp.dense_4h_to_h"
attn_module_tmp: "gpt_neox.layers.{}.attention.query_key_value"
ln_f_module: "gpt_neox.final_layer_norm"
lm_head_module: "embed_out"
mom2_dataset: "wikipedia"
mom2_n_samples: 1000
mom2_dtype: "float32"
