# Configuration for SMERL (Structured Maximum Entropy Reinforcement Learning)
_target_: src.algorithms.diayn.DIAYN

# SAC parameters
actor_lr: 3e-4
critic_lr: 3e-4
disc_lr: 3e-4
alpha_lr: 3e-4
alpha: "auto"
gamma: 0.99
tau: 0.005
batch_size: 256
buffer_size: 1000000
learning_starts: 5000
hidden_dims: [128, 128]
policy_freq: 2
target_update_freq: 1

# DIAYN specific
n_skills: 10

# SMERL specific
combined_rewards: true  # Use both environment and diversity rewards
beta: 10  # Weight for diversity reward: from SMERL paper
smerl_threshold: 150  # Target environment reward (adjusted per environment usng SAC's max return)
smerl_eps: 0.1  # Margin for threshold
use_skill_prior: true

# Training parameters
total_iterations: 1000000 # With 16 parallel envs we can do only 1M iterations -> 16M steps
eval_freq: 200000
n_eval_episodes: 10
save_freq: 1000000
log_freq: 1000
