[base]
package = gpudrive
env_name = gpudrive
policy_name = Policy
rnn_name = Recurrent

[env]
num_worlds = 512

[train]
total_timesteps = 10_000_000
num_envs = 1
num_workers = 1
env_batch_size = 1
zero_copy = False
batch_size = 262_144
update_epochs = 5
minibatch_size = 32768
bptt_horizon = 4
anneal_lr = False
gae_lambda = 0.95
gamma = 0.99
clip_coef = 0.2
vf_coef = 0.5
vf_clip_coef = 0.2
max_grad_norm = 0.5
ent_coef = 0.00
learning_rate = 0.0003
checkpoint_interval = 1000
device = cuda

[sweep.metric]
goal = maximize
name = environment/goal_achieved

[sweep.parameters.train.parameters.batch_size]
distribution = uniform
min = 32768
max = 524288

[sweep.parameters.train.parameters.minibatch_size]
distribution = uniform
min = 2048
max = 32768


