device: 'cuda'

model_name: 'pythia-70m-deduped'
d_model: 512
feature_dim: 4096
layer: 3

checkpoints_dir: 'pythia-70m-topk-sae-checkpoints'
total_training_steps: 120000
checkpoint_thresholds: [60000]
batch_size: 4096

lr: 0.0001
lr_warmup_div: 999999999999
lr_decay_div: 999999999999

k: 64
normalize_decoder: True
dead_feature_window: 1000

seed: 42
normalize_activations: 'none'
wandb: True
wandb_project: "pythia-70m-deduped-topk-saes-simple"
count_flops: True
