[architecture]
learning_rate = 1e-3
hidden_size = 1024
depth = 1
dropout_p = 0
batch_size = 1024

[conditioning]
reward_conditioning = True

[training]
max_steps = 30000
checkpoint_every_n_steps = 3000
val_frac = 0.1

[analysis]
run_tag = gym-rvs-r
analyze_d4rl = True
