batch_size: 8
opt: "adam"
lr: 0.0001
wd: 0.01
rdm_seed: 365
shuffle_seed: 161
n_layer: 1
n_head: 4
n_embd: 64
embd_pdrop: 0
resid_pdrop: 0
attn_pdrop: 0