defaults:
  - GradDiff

# llama 3B 0.35 2.0
# llama 8B 0.25 2.5

# llama 7B 0.5 2.0
handler: ATTU_output
method_args:
  alpha: 0.2
  attention_temp: 2.0 #2.0 # 0.35
  retain_loss_type: NLL
  trainable_params_regex: 
    # - model\.layers\.\d+\.mlp\..*
    - model\.layers\.\d+\.self_attn\..*
    # - model\.layers\.\d+\.mlp\.down_proj\.weight
    # - model\.layers\.\d+\.mlp\.(down_proj|up_proj)\..*
    # - .* # update all parameters (as done in https://github.com/tmlr-group/G-effect/blob/ef368eea3b2c6dba1e090b9ebb021ac9f047e0ae/dataloader.py#L271)
    # - model\.layers\.(5|6|7)\.mlp\.down_proj\.weight # If you want to update only these weights (as done in https://github.com/centerforaisafety/wmdp/blob/bc5e1ba0367ea826caeeeaa50656336a1e87acfb/rmu/unlearn.py#L26)