{
    "output_dir": "./output/mamba-130m",
    "cache_dir": "./hf_cache",
    "activate_logging": true,
    "wandb_dir": "./wandb",
    "run_name_addon": "",
    "record_debug_params": false,
    "recover_step": null,
    
    "eval_mode": false,
    "mamba_arch": "vanilla",
    "model_type": "mamba-130m",
    "use_finetuned_model": false,
    "load_cp": null,
    "clip_grad": true,
    "clip_grad_max_norm": 1,
    "seed" : 123,
    "lr_sched_type": "const",
    
    "save_steps" : 100,
    "eval_steps" : 40,
    "grad_flow_steps" : 10,
    "max_step" : 20000,
    "epochs": 10,

    "model_device": "cuda:0",
    "dataset" : "niah_custom",
    "train_set_size": 6144,
    "eval_set_size": 20,
    "eval_samples_to_log": 10,
    "log_eval_predictions_steps": 10,
    "eval_max_len": 10,
    "max_train_input_len": 20000,
    
    "niah_train_set_size": 6144,
    "niah_context_len_train": 4096,
    "niah_needle_depths_eval": [0, 0.25, 0.5, 0.75, 1],
    "niah_context_lens_eval": [1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000],
    
    "ppl_test_context_len_train": 4000,
    "ppl_test_num_windows_per_context_len_eval": 10,
    "ppl_test_context_lens_eval": [16000],
    "ppl_test_pred_len": 100,
    "deci_num_chunks": 1,

    "multidoc_num_noise_docs_train": 11,
    "multidoc_num_noise_docs_eval": [10, 20, 40, 80, 120, 160, 200],
    "multidoc_noise_injection_policy": "random_loc",

    "activate_decimation": true,
    "decimation_type": "max_p",
    "decimation_beta": 0.5,
    "decimating_layers": [12],
    "decimation_min_seq_len": 20,
    "decimation_max_p_L_base": 2000,
    "find_deci_layer": false,
    
    "lr": 1e-4,
    "weight_decay": 0.0,
    "grad_accum_steps": 32,

    "mambaextend": true
}   
