architecture: standard
model_name: gelu-1l
hook_name: blocks.0.hook_resid_post
hook_layer: 0
context_size: 1024
use_cached_activations: True
dataset_path: monology/pile-uncopyrighted
cached_activations_path: <PATH_TO_CACHED_ACTIVATIONS>
d_in: 512
training_tokens: 800_000_000
train_batch_size_tokens: 10000
seed: 43
expansion_factor: 32
lr: 0.0003
l1_coefficient: 0.75
log_to_wandb: True
wandb_log_frequency: 1
eval_every_n_wandb_logs: 5000000000
dead_feature_threshold: 0.0001
