steps:
    value: 10000000
buffer_size:
    value: 1000000
initial_steps:
    value: 25000
num_envs:
    value: 128
learning_rate:
    value: 0.00025
learning_rate_decay:
    value: true
adam_eps:
    value: 0.0001
updates_per_step:
    value: 0.03125
batch_size:
    value: 1024
n_step:
    value: 32
backup:
    value:
        offdae
gamma:
    value: 0.99
max_grad_norm:
    value: -1.
net:
    value: MinAtarCNN4X
target_update_steps:
    value: 1
target_update_tau:
    value: 0.999
target_bootstrap:
    value: true
beta_kl:
    value: 3.
steps_per_eval:
    value: 0.
learning_rate_model:
    value: 0.00025
quantizer:
    value: exact
beta_entropy_model:
    value: 0.0001
z_dim:
    value: 16
