actor_dropout: null
actor_model: tanh
align_steps: 500000
alpha: 0.5
alpha_lr: 0.0003
alpha_multiplier: 1.0
batch_size: 256
buffer_load: half
buffer_size: 2000000
critic_layernorm: true
device: cuda
discount: 0.99
double_critic: true
end_steps: 100000
end_threshold: 0.125
env: halfcheetah-expert-v2
eval_freq: 1000
lmbda: 1.0
load_model: ''
log_freq: 5000
loss_tau: 0.7
n_episodes: 10
normalize: true
normalize_reward: false
offline_alg: CQL
online_buffer_size: 2000000
online_steps: 100000
ope_steps: 500000
orthogonal_init: true
policy_log_std_multiplier: 1.0
policy_lr: 0.0001
q_n_hidden_layers: 3
qf_lr: 0.0003
refer_with_optimal_pi: true
reward_bias: 0.0
reward_scale: 1.0
save_dir: ./log
sigma_KL_divergence: 0.5
soft_target_update_rate: 0.005
target_update_period: 1
threshold: 0.005
update_n: 1
use_automatic_entropy_tuning: true
v_n_hidden_layers: 3
vf_lr: 0.0003
warmup_steps: 1000
