# @package _global_

# defaults:
#   - reward_base

# Modified from ABLAdpSigma

rewards:
  set_reward: Anonymity
  set_reward_date: 20250417
  only_positive_rewards: False
  reward_scales:
      teleop_contact_mask: 0.5
      # teleop_contact_mask: 0.5
      teleop_max_joint_position: 1.0

      teleop_body_position_extend: 1.0
      teleop_vr_3point: 1.6
      teleop_body_position_feet: 1.5
      teleop_body_rotation_extend: 0.5
      teleop_body_ang_velocity_extend: 0.5
      teleop_body_velocity_extend: 0.5
      teleop_joint_position: 1.0
      teleop_joint_velocity: 1.0
      penalty_torques: -0.000001
      # penalty_dof_vel: -0.0001
      # penalty_dof_acc: -2.5e-8
      penalty_action_rate: -0.5 # need to be tuned up to -0.5 for real robot

      feet_air_time: 1.0
      penalty_feet_contact_forces: -0.01
      penalty_stumble: -2.0
      
      # better for landing
      penalty_feet_ori: 0
      feet_heading_alignment: 0
      penalty_slippage: -1.0

      limits_dof_pos: -10.0
      limits_dof_vel: -5.0
      limits_torque: -5.0
      termination: -200.0
      # feet_max_height_for_this_air: -100.0

      collision: -30.0
      

  teleop_body_pos_lowerbody_weight : 1.0
  teleop_body_pos_upperbody_weight : 1.0

  desired_feet_max_height_for_this_air: 0.2

  reward_tracking_sigma:
    teleop_max_joint_pos: 1.0

    teleop_upper_body_pos: 0.015
    teleop_lower_body_pos: 0.015
    teleop_vr_3point_pos: 0.015
    teleop_feet_pos: 0.01

    teleop_body_rot: 0.1
    teleop_body_vel: 1.0
    teleop_body_ang_vel: 15.0
    teleop_joint_pos: 0.3
    teleop_joint_vel: 30.0

  adaptive_tracking_sigma:
    enable: True
    alpha: 1e-3


  locomotion_max_contact_force: 400.0
  desired_feet_air_time: 0.3

  reward_penalty_curriculum: True
  reward_initial_penalty_scale : 0.10
  reward_min_penalty_scale: 0.0
  reward_max_penalty_scale: 1.0
  reward_penalty_level_down_threshold: 40
  reward_penalty_level_up_threshold: 42 
  reward_penalty_degree: 0.00001 # 0.00001
  num_compute_average_epl: 10000

  reward_limit:
    soft_dof_pos_limit: 0.9
    soft_dof_vel_limit: 0.9
    soft_torque_limit: 0.825

    reward_limits_curriculum:
      soft_dof_pos_curriculum: True
      soft_dof_pos_initial_limit: 0.95
      soft_dof_pos_max_limit: 0.95
      soft_dof_pos_min_limit: 0.95
      soft_dof_pos_curriculum_degree: 0.00000025
      soft_dof_pos_curriculum_level_down_threshold: 40
      soft_dof_pos_curriculum_level_up_threshold: 42

      soft_dof_vel_curriculum: True
      soft_dof_vel_initial_limit: 0.95
      soft_dof_vel_max_limit: 0.95
      soft_dof_vel_min_limit: 0.95
      soft_dof_vel_curriculum_degree: 0.00000025
      soft_dof_vel_curriculum_level_down_threshold: 40
      soft_dof_vel_curriculum_level_up_threshold: 42

      soft_torque_curriculum: True
      soft_torque_initial_limit: 0.85
      soft_torque_max_limit: 0.85
      soft_torque_min_limit: 0.85
      soft_torque_curriculum_degree: 0.00000025
      soft_torque_curriculum_level_down_threshold: 40
      soft_torque_curriculum_level_up_threshold: 42

  reward_penalty_reward_names : [
    "penalty_torques",
    "penalty_dof_acc",
    "penalty_dof_vel",
    "penalty_action_rate",
    "limits_dof_pos",
    "limits_dof_vel",
    "limits_torque",
    "feet_heading_alignment",
    "penalty_feet_ori",
    "penalty_slippage",
    "collision"
    ]