[base]
package = ocean
env_name = puffer_nmmo3
vec = multiprocessing
policy_name = NMMO3
rnn_name = NMMO3LSTM

[train]
total_timesteps = 107000000000
checkpoint_interval = 1000
learning_rate = 0.0004573146765703167
num_envs = 2
num_workers = 2
env_batch_size = 1
update_epochs = 1
gamma = 0.7647543366891623
gae_lambda = 0.996005622445478
ent_coef = 0.01210084358004069
max_grad_norm = 0.6075578331947327
vf_coef = 0.3979089612467003
bptt_horizon = 16
batch_size = 262144
minibatch_size = 32768
compile = False
anneal_lr = False

[env]
reward_combat_level = 2.9437930583953857
reward_prof_level = 1.445250153541565
reward_item_level = 1.3669428825378418
reward_market = 0
reward_death = -2.46451187133789

[sweep.metric]
goal = maximize
name = environment/min_comb_prof

[sweep.parameters.env.parameters.reward_combat_level]
distribution = uniform
min = 0.0
max = 5.0

[sweep.parameters.env.parameters.reward_prof_level]
distribution = uniform
min = 0.0
max = 5.0

[sweep.parameters.env.parameters.reward_item_level]
distribution = uniform
min = 0.0
max = 5.0

[sweep.parameters.env.parameters.reward_death_mmo]
distribution = uniform
min = -5.0
max = 0.0

[sweep.parameters.train.parameters.total_timesteps]
distribution = uniform
min = 1_000_000_000
max = 10_000_000_000

