[base]
package = ocean
env_name = puffer_connect4
vec = multiprocessing
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 512

[train]
total_timesteps = 10_000_000
checkpoint_interval = 50
num_envs = 8
num_workers = 8
env_batch_size = 1
batch_size = 32768
update_epochs = 3
minibatch_size = 8192
bptt_horizon = 8
max_grad_norm = 0.05481921136379242
learning_rate = 0.00859505079095484
env_coef = 0.02805873082160289
gae_lambda = 0.2930961059311335
gamma = 0.978843792530436
vf_coef = 0.960235238467549
anneal_lr = False
device = cuda

[sweep.metric]
goal = maximize
name = environment/score

[sweep.parameters.train.parameters.total_timesteps]
distribution = uniform
min = 10_000_000
max = 100_000_000
