data:
    truncation_length: 512
    size: 200000
    token_batch_size: 1024
    val_scale: 10
    activations_dir: 'fill in'


model:
    model_a_name: 'gemma-2-2b'
    model_b_name: 'gemma-2-9b'
    layer_a: 20
    layer_b: 33
    d_A: 2304
    d_B: 3584
    force_ortho: False
    norm_adjustment: False
    run_inverse: True
    use_bias1: True
    use_bias2: True
    method: 'separate_mat'

alpha_inv: 1.0
epochs: 2
lr: 0.0001
weight_decay: 0
use_scheduler: False
val_every: 2500

run_sae_eval: False
use_wandb: True
verbose: True
device: 'cuda'
count_flops: True
