[base]
package = gvgai
env_name = zelda
policy_name = Policy
rnn_name = Recurrent

[train]
total_timesteps = 1_000_000
checkpoint_interval = 1000
learning_rate = 0.00024290984560207393
num_envs = 96
num_workers = 24
env_batch_size = 32
update_epochs = 1
zero_copy = False
bptt_horizon = 16
batch_size = 4096
minibatch_size = 1024
compile = False
anneal_lr = False
device = cuda

[sweep.metric]
goal = maximize
name = environment/reward

[sweep.parameters.train.parameters.total_timesteps]
distribution = log_uniform_values
min = 500_000_000
max = 10_000_000_000

[sweep.parameters.train.parameters.batch_size]
values = [4096, 8192, 16384]

[sweep.parameters.train.parameters.minibatch_size]
values = [512, 1024, 2048, 4096]
