device: 'cuda'

model_name: 'gpt2-small'
d_model: 768
feature_dim: 24576
layer: 6

checkpoints_dir: 'gpt2-small-topk-sae-checkpoints'
total_training_steps: 120000
checkpoint_thresholds: [60000]
batch_size: 4096

lr: 0.0001
lr_warmup_div: 999999999999
lr_decay_div: 999999999999

k: 32
normalize_decoder: True
dead_feature_window: 1000

seed: 42
normalize_activations: 'none'
wandb: True
wandb_project: "gpt2-small-topk-saes"
count_flops: True
