K_epochs: 10
alpha: 2.0
batch_size: 2048
depth: 3
end_steps: 500000
entropy_coef: 0.01
env_name: halfcheetah-expert-v2
epsilon: 0.2
eval_episodes: 10
evaluate_freq: 2500
gamma: 0.99
gpu: 0
hidden_width: 256
is_clip_value: false
is_decay_pi: false
is_shuffle: true
lamda: 0.95
lr_a: 3.0e-05
lr_c: 0.0002
max_train_steps: 250000
mini_batch_size: 128
offline_alg: IQL
r_scale: 1.0
refer_with_optimal_pi: true
scale_strategy: dynamic
set_adam_eps: true
std_upper_bound: 0
tau: 0.005
update_n: 1
use_adv_norm: true
use_auxi: true
use_grad_clip: true
use_lr_decay: true
use_orthogonal_init: true
use_reward_norm: false
use_reward_scaling: true
use_state_norm: true
use_tanh: false
v_depth: 3
v_hidden_width: 256
