# FiRL config for Hopper on a 12° incline
# If your Gymnasium version exposes v5 MuJoCo tasks, change env_name to `Hopper-v5`.
env_name: "Hopper-v4"

# Training budget
total_steps: 5000000         # 5M steps
max_episode_length: 1000

# Environment modifications
incline: 12                  # slope in degrees
disturbance: false           # set true to evaluate robustness to pushes
actuator_mode: null          # options: null | scale | drop
actuator_scale: 0.6          # used if actuator_mode == 'scale'
failure_step: 100            # used if actuator_mode == 'drop'

# FiRL / CVaR
cvar_alpha: 0.1              # CVaR level (tail fraction)
quantile_mode: false         # true => use quantile critic for CVaR
w_e: 1.0                     # energy term weight
w_d: 1.0                     # uphill drift term weight
w_f: 1.0                     # lateral friction term weight
beta_coef: 50.0              # scales slope-dependent penalty beta(x)
lambda_lat: 1.0              # lateral penalty coefficient

# Optimization
lr: 3.0e-4
gamma: 0.99
update_interval: 32768       # collect steps before update (approx. PPO batch)
log_interval: 10000

# Logging / output
log_dir: "logs/hopper12_firl"

# Reproducibility
seeds: [0, 1, 2, 3, 4]
