channels: [256, 256]
dropout: 0.05
attention_head_dim: 64
n_blocks: 1
num_mid_blocks: 2
num_heads: 2
act_fn: snakebeta
