data:
    truncation_length: 512
    size: 200000
    batch_size: 10
    val_scale: 10


model:
    model_a_name: 'pythia-70m-deduped'
    model_b_name: 'pythia-160m-deduped'
    layer_a: 3
    layer_b: 4
    force_ortho: False
    norm_adjustment: False
    run_inverse: False
    use_bias1: True
    use_bias2: True
    method: 'separate_mat'

sae:
    name: "pythia-70m-deduped-res-sm"

alpha_inv: 1.0
epochs: 2
lr: 0.0001
weight_decay: 0
use_scheduler: True
val_every: 2500

count_flops: True
run_sae_eval: False
use_wandb: True
verbose: True
device: 'cuda'
