[base]
package = ocean
env_name = puffer_grid
vec = multiprocessing
policy_name = Policy
rnn_name = Recurrent

[train]
total_timesteps = 100_000_000
checkpoint_interval = 1000
learning_rate = 0.001
num_envs = 2
num_workers = 2
env_batch_size = 1
update_epochs = 1
bptt_horizon = 16
batch_size = 131072
minibatch_size = 32768
compile = False
anneal_lr = False
device = cuda

[sweep.metric]
goal = maximize
name = environment/episode_return

[sweep.parameters.train.parameters.batch_size]
distribution = uniform
min = 65536
max = 524288

[sweep.parameters.train.parameters.minibatch_size]
distribution = uniform
min = 8192
max = 65536
