activation: ReLU
add_interval: 100
approx: nn
buffer_size: 100000
depth: 2
device: cpu
discount: 0.99
eps_decay: 5000
eps_end: 0.05
eps_warmup: 0
er_coef: 0.0
eval_interval: 100
eval_trials: 10
exploit: greedy
explore: eps_greedy
hidden: 128
kl_coef: 0.0
logp_clip: -100000000.0
loss_fn: mse_loss
lr: 0.001
max_tmp: 1.0
minibatch_size: 32
noise_scale: 0.0
num_samples: 4
optimizer: Adam
seed: 0
steps_per_epoch: 100000
target_update_interval: 100
use_double_q: false
