bias: true
block_size: 1024
max_micro_batch_size:
  l40: 13
  rtx-2080: 2
n_embd: 512
n_head: 16
n_layer: 24
n_query_groups: null
norm_class_name: LayerNorm
vocab_size: 50257