# RAFMAC (Risk-Averse Flow-Matching Actor-Critic) Configuration
# Uses flow-matching policy with IQN distributional critics

environments:
    halfcheetah-medium-expert-v2:
        lr_actor: 3e-4
        lr_critic: 3e-4
        eta: 1000
        tau: 0.005
        max_q_backup: false
        grad_norm: 0.6
        top_k: 0
        risk_distortion: "cvar"
        flow_steps: 10 # faster for expert data
        emb_dim: 128
        alpha: 0.1
        q_agg: "mean"
        normalize_q_loss: true
        bandit: true
        # Risky environment parameters
        max_vel: 10.0
        prob_vel_penal: 0.05
        cost_vel: -70.0

    halfcheetah-medium-replay-v2:
        lr_actor: 3e-4 # lower lr for replay data
        lr_critic: 3e-4
        eta: 1000 # higher risk weight for replay
        tau: 0.005
        max_q_backup: false
        grad_norm: 0.8
        top_k: 0
        risk_distortion: "cvar"
        flow_steps: 10 # more steps for complex replay data
        emb_dim: 128
        alpha: 0.1
        q_agg: "mean" # conservative for replay
        normalize_q_loss: true
        bandit: true
        # Risky environment parameters
        max_vel: 5.0
        prob_vel_penal: 0.05
        cost_vel: -60.0

    hopper-medium-expert-v2:
        lr_actor: 3e-4
        lr_critic: 3e-4
        eta: 1000
        tau: 0.005
        max_q_backup: false
        grad_norm: 0.6
        top_k: 0
        risk_distortion: "cvar"
        flow_steps: 10
        emb_dim: 128
        alpha: 0.1
        q_agg: "mean"
        normalize_q_loss: true
        bandit: true
        # Risky environment parameters
        prob_pose_penal: 0.1
        cost_pose: -50.0
        healthy_angle_range: [-0.1, 0.1]
        done_if_exceed_factor: 2.0

    hopper-medium-replay-v2:
        lr_actor: 3e-4
        lr_critic: 3e-4
        eta: 1000
        tau: 0.005
        max_q_backup: false
        grad_norm: 0.8
        top_k: 0
        risk_distortion: "cvar"
        flow_steps: 10
        emb_dim: 128
        alpha: 0.1
        q_agg: "mean"
        normalize_q_loss: true
        bandit: true
        # Risky environment parameters
        prob_pose_penal: 0.1
        cost_pose: -50.0
        healthy_angle_range: [-0.1, 0.1]

    walker2d-medium-expert-v2:
        lr_actor: 3e-4
        lr_critic: 3e-4
        eta: 1000
        tau: 0.005
        max_q_backup: false
        grad_norm: 0.6 # ! : missing
        top_k: 1 # ! : missing
        risk_distortion: "cvar"
        flow_steps: 10
        emb_dim: 128
        alpha: 0.1
        q_agg: "mean"
        normalize_q_loss: true
        bandit: true
        # Risky environment parameters
        prob_pose_penal: 0.1
        cost_pose: -30.0
        healthy_angle_range: [-0.5, 0.5]
        done_if_exceed_factor: 2.0

    walker2d-medium-replay-v2:
        lr_actor: 3e-4
        lr_critic: 3e-4
        eta: 1000
        tau: 0.005
        max_q_backup: false
        grad_norm: 0.8
        top_k: 0
        risk_distortion: "cvar"
        flow_steps: 10
        emb_dim: 128
        alpha: 0.1
        q_agg: "mean"
        normalize_q_loss: true
        bandit: true
        # Risky environment parameters
        prob_pose_penal: 0.1
        cost_pose: -30.0
        healthy_angle_range: [-0.5, 0.5]
