activation: ReLU
add_interval: 10
approx: nn
buffer_size: 100000.0
depth: 2
device: cuda
discount: 0.99
eps_decay: 100000
eps_end: 0.1
eps_warmup: 5000
er_coef: 0.0
eval_interval: 1000
eval_trials: 10
exploit: greedy
explore: eps_greedy
hidden: 128
kl_coef: 0.0
logp_clip: -1
loss_fn: smooth_l1_loss
lr: 0.00025
max_tmp: 1.0
minibatch_size: 32
noise_scale: 0.0
num_samples: 4
optimizer: RMSprop
seed: 0
steps_per_epoch: 1000000
target_update_interval: 1000
use_double_q: false
