device: 'cuda'

model_name: 'stanford-gpt2-small-a'
d_model: 768
feature_dim: 24576
layer: 8

checkpoints_dir: 'stanford-gpt2-small-a-sae-checkpoints'
total_training_steps: 100000
checkpoint_thresholds: [1000, 5000, 10000, 25000, 50000]
batch_size: 4096

lr: 0.0001
lr_warmup_div: 50
lr_decay_div: 5

l1: 0.5
l1_warmup_div: 9999999999999

seed: 42
normalize_activations: 'none'
wandb: True
wandb_project: "gpt2-small-a-saes"
