name: sac
_target_: benchrl.algorithms.sac.SAC

# Network bindings for SAC - requires separate actor and twin critics
model_bindings:
  actor_network: double_headed_mlp
  critic1_network: mlp2
  critic2_network: mlp2

# Training hyperparameters (optimized for 8 parallel envs)
total_timesteps: 1000000
buffer_size: 1000000
learning_starts: 5000
batch_size: 256
train_freq: 1  # Train every step
gradient_steps: 1
target_update_interval: 1
policy_frequency: 2  # Update actor every 2 steps (TD3 style delay)

# Network parameters
tau: 0.005
gamma: 0.99

# Learning rates
actor_lr: 3e-4
critic_lr: 1e-3

# Entropy coefficient (auto for automatic tuning, or float value)
ent_coeff: auto
target_entropy: null  # If null, will use -dim(action_space)

# Performance options
compile: false