actor_dropout: null
actor_model: tanh
align_steps: 1000000
alpha: 0.2
alpha_lr: 0.0003
alpha_multiplier: 1.0
batch_size: 256
buffer_load: half
buffer_size: 2000000
critic_layernorm: true
device: cuda
discount: 0.99
double_critic: false
end_steps: 100000
end_threshold: 2.0
env: antmaze-umaze-v2
eval_freq: 5000
lmbda: 1.0
load_model: ''
log_freq: 50000
loss_tau: 0.7
n_episodes: 100
normalize: false
normalize_reward: true
offline_alg: CQL
online_buffer_size: 2000000
online_steps: 200000
ope_steps: 500000
orthogonal_init: true
policy_log_std_multiplier: 1.0
policy_lr: 0.0001
q_n_hidden_layers: 5
qf_lr: 0.0003
refer_with_optimal_pi: true
reward_bias: -5.0
reward_scale: 10.0
save_dir: ./log
sigma_KL_divergence: 0.5
soft_target_update_rate: 0.005
target_update_period: 1
threshold: 0.125
update_n: 1
use_automatic_entropy_tuning: true
v_n_hidden_layers: 3
vf_lr: 0.0003
warmup_steps: 5000
