_target_: src.algorithms.SAC

capture_video: false


total_timesteps: 500000
num_envs: 8
buffer_size: 500000
gamma: 0.99
tau: 0.005
batch_size: 256
learning_starts: 5000
policy_lr: 3e-4
q_lr: 1e-3
policy_frequency: 2
target_network_frequency: 1
alpha: 0.2
autotune: True
n_eval_episodes: 48