model_type: dt
n_layer: 3
embed_dim: 128
n_head: 4
activation_function: relu
dropout: 0.1
learning_rate: 0.0001
lr_decay: true
lr_min: 0.0
weight_decay: 1e-4
value_penalty: false
policy_penalty: false
behavior_ckpt_file: null
stochastic_policy: true
fixed_std: false