# RADAC (Risk-Aware Distributional Actor-Critic) Configuration
# Uses diffusion-based policy with IQN distributional critics

environments:
    halfcheetah-medium-expert-v2:
        lr_actor: 3e-4
        lr_critic: 5e-5
        eta: 0.05
        tau: 0.005
        max_q_backup: false
        grad_norm: 5.0
        top_k: 0
        n_quantiles: 32
        emb_dim: 128
        lr_decay: false
        alpha: 0.1 # alpha_cvar
        # Risky environment parameters
        max_vel: 10.0
        prob_vel_penal: 0.05
        cost_vel: -70.0

    halfcheetah-medium-replay-v2:
        lr_actor: 3e-4
        lr_critic: 5e-5
        eta: 0.02
        tau: 0.005
        max_q_backup: false
        grad_norm: 5.0
        top_k: 0
        n_quantiles: 16
        emb_dim: 64
        lr_decay: false
        alpha: 0.1
        q_clip_range: [-300.0, 300.0]
        lambda_bc: 1.0
        eta_warmup_steps: 200000
        # Risky environment parameters
        max_vel: 5.0
        prob_vel_penal: 0.05
        cost_vel: -60.0

    hopper-medium-expert-v2:
        lr_actor: 3e-4
        lr_critic: 1e-5
        eta: 0.05
        tau: 0.005
        max_q_backup: false
        grad_norm: 5.0
        top_k: 0
        n_quantiles: 16
        emb_dim: 64
        lr_decay: false
        alpha: 0.1
        q_clip_range: [-300.0, 300.0]
        lambda_bc: 1.0
        eta_warmup_steps: 300000
        # Risky environment parameters
        prob_pose_penal: 0.1
        cost_pose: -50.0
        healthy_angle_range: [-0.1, 0.1]
        done_if_exceed_factor: 2.0 # TODO: Kai - is this right?

    hopper-medium-replay-v2:
        lr_actor: 3e-4
        lr_critic: 1e-5
        eta: 0.05
        tau: 0.002
        max_q_backup: false
        grad_norm: 5.0
        top_k: 0
        n_quantiles: 16
        emb_dim: 64
        lr_decay: false
        alpha: 0.1
        q_clip_range: [-150.0, 150.0]
        lambda_bc: 1.5
        eta_warmup_steps: 300000
        # Risky environment parameters
        prob_pose_penal: 0.1
        cost_pose: -50.0
        healthy_angle_range: [-0.1, 0.1]

    walker2d-medium-expert-v2:
        lr_actor: 3e-4
        lr_critic: 1e-5
        eta: 0.05
        tau: 0.005
        max_q_backup: false
        grad_norm: 5.0
        top_k: 1
        n_quantiles: 16
        emb_dim: 64
        lr_decay: false
        alpha: 0.1
        q_clip_range: [-300.0, 300.0]
        lambda_bc: 1.5
        eta_warmup_steps: 300000
        # Risky environment parameters
        prob_pose_penal: 0.1
        cost_pose: -30.0
        healthy_angle_range: [-0.5, 0.5]
        done_if_exceed_factor: 2.0 # TODO: Kai - is this right?

    walker2d-medium-replay-v2:
        lr_actor: 3e-4
        lr_critic: 5e-6
        eta: 1000
        tau: 0.002
        max_q_backup: false
        grad_norm: 3.0
        top_k: 1
        n_quantiles: 8
        emb_dim: 32
        lr_decay: false
        alpha: 0.1
        q_clip_range: [-300.0, 300.0]
        lambda_bc: 1.0
        eta_warmup_steps: 300000
        # Risky environment parameters
        prob_pose_penal: 0.1
        cost_pose: -30.0
        healthy_angle_range: [-0.5, 0.5]
