run: kla
kla_blocks: [-1]
depth: 12
model_dim: 960
grad_clip: 10.0
device_batch_size: 4
d_state: 64
unembedding_lr: 0.002