learning_rate: 1e-3
horizon: 200
state_dim: 39
action_dim: 4
n_layer: 6
n_embd: 256
n_head: 8
dropout: 0.0
gamma: 0.9