learning_rate: 1e-3
horizon: 100
state_dim: 2
action_dim: 5
n_layer: 4
n_embd: 32
n_head: 4
dropout: 0.0
gamma: 0.9