[architecture]
learning_rate = 1e-3
hidden_size = 1024
depth = 2
dropout_p = 0
batch_size = 16384

[conditioning]
reward_conditioning = True

[training]
epochs = 500
checkpoint_every_n_epochs = 500
val_frac = 0

[analysis]
run_tag = maze2d-rvs-r
analyze_d4rl = True