seed: 42
device: "cuda"
env_id: "OptimalTrap-v0"
num_train_steps: 200000

# Replay Buffer
buffer_size: 1000000
batch_size: 256

# Networks
hidden_dims: [256, 256]

# Common RL
gamma: 0.99
tau: 0.005  # target soft update

# SAC specifics
actor_lr: 3.0e-4
critic_lr: 3.0e-4
alpha_lr: 3.0e-4
target_entropy_scale: 1.0  # target_entropy = -scale * action_dim
grad_clip_norm: 0.0
reward_scale: 1.0

# Train loop
start_steps: 1000
update_after: 1000
update_every: 1
eval_every: 10000
max_episode_steps: 500
save_every: 0  # 0 disables saving
