[base]
package = ocean
env_name = puffer_pong 
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 1024

[train]
total_timesteps = 20_000_000
checkpoint_interval = 25
num_envs = 1
num_workers = 1
env_batch_size = 1
batch_size = 131072
update_epochs = 3
minibatch_size = 8192
bptt_horizon = 16
ent_coef = 0.004602
gae_lambda = 0.979
gamma = 0.9879
learning_rate = 0.001494
anneal_lr = False
device = cuda
max_grad_norm = 3.592
vf_coef = 0.4122

[sweep.metric]
goal = maximize
name = environment/score

[sweep.parameters.train.parameters.total_timesteps]
distribution = uniform
min = 10_000_000
max = 30_000_000
