[base]
package = None
env_name = None
vec = native
policy_name = Policy
rnn_name = None
max_suggestion_cost = 3600

[env]

[policy]

[rnn]

[train]
seed =  1
torch_deterministic = True
cpu_offload = False
device = cuda
total_timesteps = 10_000_000
learning_rate = 2.5e-4
anneal_lr = True
gamma = 0.99
gae_lambda = 0.95
update_epochs = 4
norm_adv = True
clip_coef = 0.1
clip_vloss = True
vf_coef = 0.5
vf_clip_coef = 0.1
max_grad_norm = 0.5
ent_coef = 0.01
target_kl = None

num_envs = 8
num_workers = 8
env_batch_size = None
zero_copy = True
data_dir = experiments
checkpoint_interval = 200
batch_size = 1024
minibatch_size = 512
bptt_horizon = 16
compile = False
compile_mode = reduce-overhead

[sweep]
method = bayes
name = sweep

[sweep.metric]
goal = maximize
name = environment/episode_return

[sweep.parameters.train.parameters.learning_rate]
distribution = log_uniform_values
min = 1e-5
max = 1e-1

[sweep.parameters.train.parameters.gamma]
distribution = uniform
min = 0.0
max = 1.0

[sweep.parameters.train.parameters.gae_lambda]
distribution = uniform
min = 0.0
max = 1.0

[sweep.parameters.train.parameters.update_epochs]
distribution = int_uniform
min = 1
max = 4

[sweep.parameters.train.parameters.vf_coef]
distribution = uniform
min = 0.0
max = 1.0

[sweep.parameters.train.parameters.max_grad_norm]
distribution = uniform
min = 0.0
max = 10.0

[sweep.parameters.train.parameters.ent_coef]
distribution = log_uniform_values
min = 1e-5
max = 1e-1

[sweep.parameters.train.parameters.bptt_horizon]
values = [1, 2, 4, 8, 16]
