name: SoftM2TD3
batch_size: 100
p_lr: 3e-4
q_lr: 3e-4
ho_lr: 3e-4
start_steps: 1e5
max_steps: 2e6
replay_size: 1e6
policy_std_rate: 0.1
policy_noise_rate: 0.2
noise_clip_policy_rate: 0.5
omega_std_rate: 1
min_omega_std_rate: 0.1
omega_noise_rate: 0.2
noise_clip_omega_rate: 0.5
gamma: 0.99
policy_freq: 2
polyak: 5e-3
minimum_prob: 5e-2
hatomega_distance: 0.1
restart_distance: true
restart_probability: true
