{
    "model_name": "drc33",
    "model_class_name": "HookedTransformer",
    "hook_name": "features_extractor.cell_list.0.hook_h",
    "hook_eval": "NOT_IN_USE",
    "hook_layer": 0,
    "hook_head_index": null,
    "dataset_path": null,
    "dataset_trust_remote_code": true,
    "streaming": true,
    "is_dataset_tokenized": true,
    "context_size": 128,
    "use_cached_activations": true,
    "cached_activations_path": [
        "/workspace/learned_planners/interp/train_unfiltered/0_think_step/",
        "/workspace/learned_planners/interp/train_medium/0_think_step/",
        "/training/activations_dataset/hard/0_think_step/"
    ],
    "architecture": "standard",
    "d_in": 32,
    "d_sae": 512,
    "b_dec_init_method": "geometric_median",
    "expansion_factor": 16,
    "activation_fn": "topk",
    "activation_fn_kwargs": {
        "k": 8
    },
    "normalize_sae_decoder": true,
    "noise_scale": 0.0,
    "from_pretrained_path": null,
    "apply_b_dec_to_input": true,
    "decoder_orthogonal_init": false,
    "decoder_heuristic_init": false,
    "init_encoder_as_decoder_transpose": true,
    "n_batches_in_buffer": 20,
    "training_tokens": 600000000,
    "finetuning_tokens": 0,
    "store_batch_size_prompts": 32,
    "train_batch_size_tokens": 4096,
    "normalize_activations": "layer_norm",
    "device": "cuda",
    "act_store_device": "cuda",
    "seed": 350420793,
    "dtype": "float32",
    "prepend_bos": true,
    "autocast": false,
    "autocast_lm": false,
    "compile_llm": false,
    "llm_compilation_mode": null,
    "compile_sae": false,
    "sae_compilation_mode": null,
    "adam_beta1": 0.9,
    "adam_beta2": 0.999,
    "mse_loss_normalization": null,
    "l1_coefficient": 1e-20,
    "lp_norm": 1.0,
    "scale_sparsity_penalty_by_decoder_norm": false,
    "l1_warm_up_steps": 10000,
    "lr": 5e-05,
    "lr_scheduler_name": "constant",
    "lr_warm_up_steps": 0,
    "lr_end": 5e-06,
    "lr_decay_steps": 40000,
    "n_restart_cycles": 1,
    "finetuning_method": null,
    "use_ghost_grads": false,
    "feature_sampling_window": 2000,
    "dead_feature_window": 1000,
    "dead_feature_threshold": 1e-08,
    "n_eval_batches": 10,
    "eval_batch_size_prompts": null,
    "log_to_wandb": true,
    "log_activations_store_to_wandb": false,
    "log_optimizer_state_to_wandb": false,
    "wandb_project": "lp_sae",
    "wandb_id": null,
    "run_name": "512-L1-1e-20-LR-5e-05-Tokens-6.000e+08-Epochs-1-GridWise",
    "wandb_entity": null,
    "wandb_log_frequency": 20,
    "eval_every_n_wandb_logs": 100,
    "resume": false,
    "n_checkpoints": 5,
    "checkpoint_path": "/training/TrainSAEConfig/devbox/wandb/run-20240903_201625-lnrz5ad8/local-files/checkpoint",
    "verbose": true,
    "model_kwargs": {},
    "model_from_pretrained_kwargs": {},
    "sae_lens_version": "3.18.2",
    "sae_lens_training_version": "3.18.2",
    "grid_wise": true,
    "epochs": 1,
    "num_envs": 64,
    "envpool": true,
    "tokens_per_buffer": 10485760
}