architecture: jumprelu
model_name: gemma-2-2b
hook_name: blocks.12.hook_resid_post
hook_layer: 12
context_size: 8192
use_cached_activations: True
dataset_path: monology/pile-uncopyrighted
cached_activations_path: <PATH_TO_CACHED_ACTIVATIONS>
d_in: 2304
training_tokens: 800_000_000
train_batch_size_tokens: 10000
seed: 43
expansion_factor: 7
lr: 0.0003
l1_coefficient: 0.75
log_to_wandb: True
wandb_log_frequency: 1
eval_every_n_wandb_logs: 5000000000
dead_feature_threshold: 0.0001
