name: large
type: ddit
hidden_size: 1280
cond_dim: 128
length: 1024
base_n_blocks: 28
# We try to roughly match parameter count
n_blocks: ${adjust_n_blocks:}
n_heads: 20
scale_by_sigma: True
dropout: 0.1
tie_word_embeddings: False

# 36 1280 20