[base]
package = ocean
env_name = puffer_snake
vec = multiprocessing
rnn_name = Recurrent

[env]
vision = 5

[train]
total_timesteps = 200_000_000
num_envs = 2
num_workers = 2
env_batch_size = 1
batch_size = 131072
update_epochs = 1
minibatch_size = 32768
bptt_horizon = 16
anneal_lr = False
gae_lambda = 0.9776227170639571
gamma = 0.8567482546637853
clip_coef = 0.011102333784435113
vf_coef = 0.3403069830175013
vf_clip_coef = 0.26475190539131727
max_grad_norm = 0.8660179376602173
ent_coef = 0.01376980586465873
learning_rate = 0.002064722899262613
checkpoint_interval = 1000
device = cuda

[sweep.metric]
goal = maximize
name = environment/reward
