[base]
package = ocean
env_name = puffer_moba
vec = multiprocessing
policy_name = MOBA
rnn_name = Recurrent

[train]
total_timesteps = 250_000_000
checkpoint_interval = 50
learning_rate = 0.0033394116514234556
num_envs = 8
num_workers = 8
env_batch_size = 4
update_epochs = 3
gamma = 0.9885385317249888
gae_lambda = 0.8723856970733372
clip_coef = 0.1
vf_clip_coef = 0.1
vf_coef = 0.0957932474946704
ent_coef = 0.00006591576198600687
max_grad_norm = 1.8240838050842283
bptt_horizon = 16
batch_size = 1024000
minibatch_size = 256000
compile = False
anneal_lr = False
device = cuda

[env]
reward_death = 0.0
reward_xp = 0.0016926873475313188
reward_distance = 0.0
reward_tower = 4.525112152099609
num_envs = 100

[sweep.metric]
goal = maximize
name = environment/radiant_towers_alive

[sweep.parameters.env.parameters.reward_death]
distribution = uniform
min = -5.0
max = 0

[sweep.parameters.env.parameters.reward_xp]
distribution = uniform
min = 0.0
max = 0.05

[sweep.parameters.env.parameters.reward_tower]
distribution = uniform
min = 0.0
max = 5.0
 
[sweep.parameters.train.parameters.total_timesteps]
distribution = uniform
min = 200_000_000
max = 2_000_000_000
