wandb_project: pythia-14m
wandb_run_name: null
wandb_run_name_prefix: ""

seed: 0
tlens_model_name: pythia-14m
tlens_model_path: null

n_samples: 200_000
save_every_n_samples: null
eval_every_n_samples: 40_000
eval_n_samples: 500
log_every_n_grad_steps: 20
collect_act_frequency_every_n_samples: 40_000
act_frequency_n_tokens: 1_000_000
batch_size: 8
effective_batch_size: 8
lr: 5e-4
lr_schedule: cosine
min_lr_factor: 0.1
warmup_samples: 20_000
max_grad_norm: null

loss:
  sparsity:
    p_norm: 1.0
    coeff: 1.0
  in_to_orig: null
  out_to_orig: null
  out_to_in:
    coeff: 0.0
  logits_kl:
    coeff: 1.0
train_data:
  dataset_name: ANONYMIZED/monology-pile-uncopyrighted-tokenizer-EleutherAI-gpt-neox-20b
  is_tokenized: true
  tokenizer_name: EleutherAI/gpt-neox-20B
  streaming: true
  split: train
  n_ctx: 2048
eval_data:
  dataset_name: ANONYMIZED/monology-pile-uncopyrighted-tokenizer-EleutherAI-gpt-neox-20b
  is_tokenized: true
  tokenizer_name: EleutherAI/gpt-neox-20B
  streaming: true
  split: train
  n_ctx: 2048
saes:
  retrain_saes: false
  pretrained_sae_paths: null
  sae_positions:
    - blocks.3.hook_resid_pre
  dict_size_to_input_ratio: 60.0