[base]
package = ocean
env_name = puffer_bandit puffer_memory puffer_multiagent puffer_password puffer_spaces puffer_stochastic
policy_name = Policy
rnn_name = Recurrent

[train]
total_timesteps = 50_000
learning_rate = 0.017
num_envs = 8
num_workers = 2
env_batch_size = 8
batch_size = 1024
minibatch_size = 128
bptt_horizon = 4
device = cpu

[sweep.parameters.train.parameters.batch_size]
distribution = uniform
min = 512
max = 2048

[sweep.parameters.train.parameters.minibatch_size]
distribution = uniform
min = 64
max = 512
