number_of_edits: 10
alg_name: "MEND"
# archive: results/SERAC/llama-2-7b.bk
model_name: meta-llama/Llama-2-7b-chat-hf
model: meta-llama/Llama-2-7b-chat-hf
device: 0
archive: ./results/models/MEND/llama-2-7b-ZsRE
model_class: LlamaForCausalLM
tokenizer_class: LlamaTokenizer
tokenizer_name: meta-llama/Llama-2-7b-chat-hf
tag: mendtest
# Model

inner_params:
- model.layers.29.mlp.gate_proj.weight
- model.layers.29.mlp.up_proj.weight
- model.layers.29.mlp.down_proj.weight
- model.layers.30.mlp.gate_proj.weight
- model.layers.30.mlp.up_proj.weight
- model.layers.30.mlp.down_proj.weight
- model.layers.31.mlp.gate_proj.weight
- model.layers.31.mlp.up_proj.weight
- model.layers.31.mlp.down_proj.weight

# Method
alg: MEND
lr: 1e-6
edit_lr: 1e-4
lr_lr: 1e-4
lr_scale: 1.0
seed: 42
cedit: 0.1
cloc: 1.0
cbase: 1.0
dropout: 0.0
train_base: False
no_grad_layers: null
one_sided: False
n_hidden: 1
hidden_dim: null
init: id
norm: True
combine: True
x_only: False
delta_only: False
act: relu
rank: 1920
mlp_class: IDMLP
shared: True

# Train
batch_size: 1
model_save_pt: 5000
silent: False
#max_epochs: 1
max_iters: 100000
log_interval: 1000
eval_log_interval: 1000
final_eval: True
val_interval: 1000
early_stop_patience: 20000
early_stop_key: "loss/total_edit_val"
eval_only: True
half: False
debug: False
save: False
verbose: True

val_batch_size: 5
accumulate_bs: 10
val_steps: 500 # only for debug
opt: Adam
grad_clip: 100.

# Output

results_dir: ./results

edit_train: True
edit_train_config:
  model_name: meta-llama/Llama-2-7b-chat-hf
  model_class: LlamaForCausalLM
  tokenizer_class: LlamaTokenizer
  tokenizer_name: meta-llama/Llama-2-7b-chat-hf
  inner_params:
  - model.layers.29.mlp.gate_proj.weight
  - model.layers.29.mlp.up_proj.weight
  - model.layers.29.mlp.down_proj.weight
  - model.layers.30.mlp.gate_proj.weight
  - model.layers.30.mlp.up_proj.weight
  - model.layers.30.mlp.down_proj.weight
  - model.layers.31.mlp.gate_proj.weight
  - model.layers.31.mlp.up_proj.weight
  - model.layers.31.mlp.down_proj.weight

  archive: null

  # Method
  alg: MEND
  lr: 1e-6
  edit_lr: 1e-4
  lr_lr: 1e-4
  seed: 42
  cedit: 0.1
  cloc: 1.0
  cbase: 1.0
  dropout: 0.0
  train_base: False
  no_grad_layers: null
  one_sided: False
  n_hidden: 1
  hidden_dim: null
  init: id
  norm: True
  combine: True
  x_only: False
  delta_only: False
  act: relu
  rank: 1920
  mlp_class: IDMLP
  shared: True

  # Train
  device: cuda:0
  batch_size: 1
  model_save_pt: 5000
  silent: False
  #max_epochs: 1
  max_iters: 100000
  log_interval: 1000
  eval_log_interval: 1000
  final_eval: True
  val_interval: 1000
  early_stop_patience: 30000
  early_stop_key: "edit/acc_val"
  eval_only: False
  half: False
  debug: False
  save: False
  verbose: True

  val_batch_size: 5
  accumulate_bs: 10
  val_steps: 500 # only for debug
  opt: Adam
  grad_clip: 100.

  # Output

  results_dir: ./results


# # Compression
# nsamples: 128
# sparsity_ratio: 0.3
# sparsity_type: unstructured
# prune_method: wanda
# quant_method: gptq
# compression_dataset: c4
# percdamp: 0.01
# wbits: 8
# groupsize: -1
# sym: true
# nearest: false
# new_eval: false
# act_order: false
# true_sequential: false
# static_groups: false
# cache_dir: /scratch/sux7mp/llm_weights
# use_variant: false
# save: out/
# save_model: null
# eval_zero_shot: false


edit: True
compress: False
method: prune # prune or quant

load_ckpt: False
ckpt_path: /scratch/sux7mp/saved_models/checkpoint_20231221_113020.pth

save_ckpt: True
