[base]
package = ocean
env_name = puffer_rware
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 1024

[train]
total_timesteps = 175000000
checkpoint_interval = 25
num_envs = 1
num_workers = 1
env_batch_size = 1
batch_size = 131072
update_epochs = 1
minibatch_size = 32768
bptt_horizon = 8
anneal_lr = False
ent_coef = 0.019885424670094166
device = cuda
learning_rate=0.0018129721882644975
gamma = 0.9543211781474217
gae_lambda = 0.8297991396183212
vf_coef = 0.3974834958825928
clip_coef = 0.1
vf_clip_coef = 0.1



