{
    "layers": [
     13,
     14,
     15,
     16,
     17
    ],
    "clamp_norm_factor": 0.75,
    "layer_selection": "all",
    "fact_token": "subject_last",
    "v_num_grad_steps": 20,
    "v_lr": 0.5,
    "v_loss_layer": 47,
    "v_weight_decay": 0.5,
    "kl_factor": 0.0625,
    "mom2_adjustment": true,
    "mom2_update_weight": 100000,
    "rewrite_module_tmp": "transformer.h.{}.mlp.c_proj",
    "layer_module_tmp": "transformer.h.{}",
    "mlp_module_tmp": "transformer.h.{}.mlp",
    "attn_module_tmp": "transformer.h.{}.attn",
    "ln_f_module": "transformer.ln_f",
    "lm_head_module": "transformer.wte",
    "mom2_dataset": "wikipedia",
    "mom2_n_samples": 100000,
    "mom2_dtype": "float32",
    "noise_scale": 0.001,
    "learning_rate": 1e-06,
    "total_iters": 1000,
    "prune_requests": 1,
    "beta_ratio": 1.1,
    "lr_after_hitting_boundary": 1e-09,
    "compute_initialization_at_once": true,
    "decay_factor": 0.75,
    "skip_search": false,
    "keep_original_requests": true
   }