wandb_project: tinystories-1m-2
wandb_run_name: null
wandb_run_name_prefix: ""

seed: 0
tlens_model_name: roneneldan/TinyStories-1M
tlens_model_path: null

n_samples: 400_000  # 918604 samples of 512 tokens
save_every_n_samples: null
eval_every_n_samples: 40_000
eval_n_samples: 500
log_every_n_grad_steps: 20
collect_act_frequency_every_n_samples: 40_000
act_frequency_n_tokens: 500_000  #500k tokens is ~977 samples
batch_size: 20
effective_batch_size: 20
lr: 1e-3
lr_schedule: cosine
min_lr_factor: 0.1
warmup_samples: 20_000
max_grad_norm: 1.0

loss:
  sparsity:
    p_norm: 1.0
    coeff: 3.0
  in_to_orig: null
  out_to_orig: null
  out_to_in:
    coeff: 0.0
  logits_kl:
    coeff: 1.0
train_data:
  # dataset_name: ANONYMIZED/roneneldan-TinyStories-tokenizer-gpt2
  dataset_name: roneneldan/TinyStories
  is_tokenized: false
  tokenizer_name: gpt2
  streaming: true
  split: train
  n_ctx: 512
eval_data:
  # dataset_name: ANONYMIZED/roneneldan-TinyStories-tokenizer-gpt2
  dataset_name: roneneldan/TinyStories
  is_tokenized: false
  tokenizer_name: gpt2
  streaming: true
  split: validation
  n_ctx: 512
saes:
  retrain_saes: false
  pretrained_sae_paths: null
  sae_positions: blocks.4.hook_resid_pre
  dict_size_to_input_ratio: 50.0
