device: 'cuda'

model_name: 'stanford-gpt2-small-a'
d_model: 768
feature_dim: 32768
layer: 6

checkpoints_dir: 'stanford-gpt2-small-a-sae-checkpoints'
total_training_steps: 100000
checkpoint_thresholds: [1000, 5000, 10000, 25000, 50000]
batch_size: 4096

lr: 0.0001
lr_warmup_div: 100
lr_decay_div: 5

l1: 0.2
l1_warmup_div: 9999999999

seed: 42
normalize_activations: 'none'
wandb: True
wandb_project: "gpt2-small-a-saes"
