# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Copyright (c) 2021 ETH Zurich, Nikita Rudin

from legged_gym.envs.base.legged_robot_config import LeggedRobotCfg, LeggedRobotCfgPPO
class task():
    motion_task = 'walk'
    train_estimator = True

class H1MimicCfg( LeggedRobotCfg ):

    class env( LeggedRobotCfg.env ):
        num_envs = 4096      #6144 8000
        episode_length_s = 50 # episode length in seconds

        n_demo_steps = 2
        # n_demo = 9 + 3 + 3 + 3 +6*3  #observe height
        # n_demo = 9 + 3 + 3 + 3 +6*3
        n_demo = 9 + 3 + 3 + 3 + 6*3 + 6*3 +8
        interval_demo_steps = 0.1

        n_scan = 0#132
        n_priv = 0
        n_priv_latent = 4 + 1 + 19*2
        # n_proprio = 3 + 2 + 2 + 19*3 + 2# one hot


        # // /// ////////////
        history_len = 16
        n_priv_latent = 0
        n_priv = 0

        if task.motion_task == 'walk':
            n_proprio = 3 + 2 + 2 + 19*3 +3 -2 + 3 + 2
            n_demo = 0
            episode_length_s = 24
        else:
            n_proprio = 3 + 2 + 2 + 19*3  

        if task.train_estimator:
            n_priv = 13  # default 3 vel     13: vel, diff
        # ////// //////////

        prop_hist_len = 15    # 4
        n_feature = prop_hist_len * n_proprio

        # num_observations = n_feature + n_proprio + n_demo + n_scan + history_len*n_proprio + n_priv_latent + n_priv
        num_observations = n_feature + n_proprio + n_demo + n_priv


        num_policy_actions = 19

    
    class motion:
        if task.motion_task == 'walk':
            motion_curriculum = False
        else:
            motion_curriculum = True

        motion_type = "yaml"
        # motion_name = "motions_autogen_all_no_run_jump.yaml"
        motion_name = 'motions_autogen.yaml'

        global_keybody = False
        global_keybody_reset_time = 2

        num_envs_as_motions = False

        no_keybody = False
        regen_pkl = False

        step_inplace_prob = 0.05
        resample_step_inplace_interval_s = 10

    class terrain( LeggedRobotCfg.terrain ):
        horizontal_scale = 0.1 # [m] influence computation time by a lot
        height = [0., 0.04]
        # height = [0., 0.]
        static_friction = 0.6
        dynamic_friction = 0.6

    
    class init_state( LeggedRobotCfg.init_state ):
        pos = [0.0, 0.0, 1.0] # x,y,z [m]
        rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
        lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
        ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]

        default_joint_angles = { # = target angles [rad] when action = 0.0
           'left_hip_yaw_joint' : 0. ,   
           'left_hip_roll_joint' : 0,               
           'left_hip_pitch_joint' : -0.4,         
           'left_knee_joint' : 0.8,       
           'left_ankle_joint' : -0.4,     
           'right_hip_yaw_joint' : 0., 
           'right_hip_roll_joint' : 0, 
           'right_hip_pitch_joint' : -0.4,                                       
           'right_knee_joint' : 0.8,                                             
           'right_ankle_joint' : -0.4,                                     
           'torso_joint' : 0., 
           'left_shoulder_pitch_joint' : 0., 
           'left_shoulder_roll_joint' : 0, 
           'left_shoulder_yaw_joint' : 0.,
           'left_elbow_joint'  : 0.,
           'right_shoulder_pitch_joint' : 0.,
           'right_shoulder_roll_joint' : 0.0,
           'right_shoulder_yaw_joint' : 0.,
           'right_elbow_joint' : 0.,
        }

    class control( LeggedRobotCfg.control ):
        # PD Drive parameters:
        control_type = 'P'
        # stiffness = {'hip_yaw': 200,
        #              'hip_roll': 200,
        #              'hip_pitch': 200,
        #              'knee': 200,
        #              'ankle': 40,
        #              'torso': 300,
        #              'shoulder': 40,
        #              "elbow":40,
        #              }  # [N*m/rad]
        # damping = {  'hip_yaw': 5,
        #              'hip_roll': 5,
        #              'hip_pitch': 10,
        #              'knee': 10,
        #              'ankle': 2,
        #              'torso': 6,
        #              'shoulder': 2,
        #              "elbow":2,
        #              }  # [N*m/rad]  # [N*m*s/rad]

        # 
        # stiffness = {'hip_yaw': 200,
        #              'hip_roll': 200,
        #              'hip_pitch': 200,
        #              'knee': 200,
        #              'ankle': 80,
        #              'torso': 200,
        #              'shoulder': 30,
        #              "elbow":30,
        #              }  # [N*m/rad]
        # damping = {  'hip_yaw': 5,
        #              'hip_roll': 5,
        #              'hip_pitch': 5,
        #              'knee': 5,
        #              'ankle': 2,
        #              'torso': 5,
        #              'shoulder': 1,
        #              "elbow":1,
        #              }  # [N*m/rad]  # [N*m*s/rad]

        # Humanoid Parkour learning
        # stiffness = {'hip_yaw': 60,
        #              'hip_roll': 220,
        #              'hip_pitch': 220,
        #              'knee': 320,
        #              'ankle': 40,
        #              'torso': 200,
        #              'shoulder': 30,
        #              "elbow":20,
        #              }  # [N*m/rad]
        # damping = {  'hip_yaw': 1.5,
        #              'hip_roll': 4,
        #              'hip_pitch': 4,
        #              'knee': 4,
        #              'ankle': 2,
        #              'torso': 3,
        #              'shoulder': 1,
        #              "elbow":0.5,
        #              }  # [N*m/rad]  # [N*m*s/rad]


        # stiffness = {'hip_yaw': 200,
        #              'hip_roll': 200,
        #              'hip_pitch': 200,
        #              'knee': 300,
        #              'ankle': 40,
        #              'torso': 200,
        #              'shoulder': 30,
        #              "elbow":20,
        #              }  # [N*m/rad]
        # damping = {  'hip_yaw': 5,
        #              'hip_roll': 5,
        #              'hip_pitch': 5,
        #              'knee': 6,
        #              'ankle': 2,
        #              'torso': 3,
        #              'shoulder': 1,
        #              "elbow":0.5,
        #              }  # [N*m/rad]  # [N*m*s/rad]


        stiffness = {'hip_yaw': 200,
                    'hip_roll': 200,
                    'hip_pitch': 200,
                    'knee': 300,
                    'ankle': 40,
                    'torso': 200,
                    'shoulder': 30,
                    "elbow":30,
                    }  # [N*m/rad]
        damping = {  'hip_yaw': 5,
                    'hip_roll': 5,
                    'hip_pitch': 5,
                    'knee': 6,
                    'ankle': 2,
                    'torso': 5,
                    'shoulder': 1,
                    "elbow":1,
                    }  # [N*m/rad]  # [N*m*s/rad]
     
        action_filt = False
        action_cutfreq = 5.0    # 4.0

        action_scale = 0.25
        decimation = 10    # 4

    class normalization( LeggedRobotCfg.normalization):
        # clip_actions = 10
        clip_observations = 18.
        clip_actions = 18.

    class asset( LeggedRobotCfg.asset ):
        # file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/h1/h1_blue_red_custom_collision.urdf'
        # file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/h1/h1_custom_collision.urdf'
        file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/h1/h1.urdf'
        # file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/h1/h1_h2o.urdf'
        torso_name = "torso_link"
        foot_name = "ankle"
        knee_name = "knee"    # 10.28
        # penalize_contacts_on = ["shoulder", "elbow", "hip"]
        penalize_contacts_on = ["pelvis"]
        terminate_after_contacts_on = ["torso_link", ]#, "thigh", "calf"]
        self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter


    class safety:
        # safety factors
        pos_limit = 1.0
        vel_limit = 1.0
        torque_limit = 0.85
  
    class rewards( LeggedRobotCfg.rewards ):
        class scales:
            # tracking rewards
            if task.motion_task == 'walk':
                # # For tracking commands   h1_custom.urdf
                # alive = 1
                # tracking_lin_vel_commands = 20     # 20
                # tracking_ang_vel_commands = 20     # 20
                # orientation = -2
                # lin_vel_z = -1.0
                # ang_vel_xy = -0.4
                # dof_acc = -3e-7
                # collision = -10.
                # action_rate = -0.1
                # energy = -1e-3
                # # dof_error = -0.1    # -0.1
                # feet_stumble = -2
                # feet_drag = -0.1
                # dof_pos_limits = -10.0
                # feet_air_time = 10
                # feet_height = 4           # 2 
                # feet_force = -3e-3
                # base_height = -20.0       # Is it too hard for penalty?  - 100   - 20.0
                # single_feet_contact = 6     # 1 10 8  6
                # feet_away = 6     # 0 6
                # arms_dof_error = -5
                # waist_dof_error = -2
                # hip_yaw_dof_error = -2
                # stand_still = 100   # reward only for zero command.   10


                # # For tracking commands   h1.urdf
                # alive = 1
                # tracking_lin_vel_commands = 20     # 20
                # tracking_ang_vel_commands = 20     # 20
                # orientation = -2
                # lin_vel_z = -1.0
                # ang_vel_xy = -0.4
                # dof_acc = -3e-7
                # # collision = -0.1
                # action_rate = -0.1
                # energy = -1e-3
                # # dof_error = -0.1    # -0.1
                # feet_stumble = -2
                # feet_drag = -0.1
                # dof_pos_limits = -10.0
                # feet_air_time = 10
                # feet_height = 4           # 2  4
                # feet_force = -3e-3
                # base_height = -20.0       # Is it too hard for penalty?  - 100   - 20.0
                # single_feet_contact = 8     # 1 10 8  6
                # arms_dof_error = -5
                # waist_dof_error = -2
                # hip_yaw_dof_error = -2
                # feet_away = 2     # 3 6 1 6 6 2
                # knee_away = 2     # 3 6 1 6 6 2
                # # stand_still = 100.0   # reward only for zero command.   10


                # # For standing still
                # alive = 1
                # tracking_lin_vel_commands = 20     # 20
                # tracking_ang_vel_commands = 20     # 20
                # orientation = -2
                # lin_vel_z = -1.0
                # ang_vel_xy = -0.4
                # dof_acc = -3e-7
                # # collision = -0.1
                # action_rate = -0.1
                # energy = -1e-3
                # # dof_error = -0.1    # -0.1
                # feet_stumble = -2
                # feet_drag = -0.1
                # dof_pos_limits = -10.0
                # # feet_air_time = 10
                # # feet_height = 4           # 2 
                # feet_force = -3e-3
                # base_height = -20.0       # Is it too hard for penalty?  - 100   - 20.0
                # # single_feet_contact = 8     # 1 10 8  6
                # arms_dof_error = -5
                # waist_dof_error = -2
                # hip_yaw_dof_error = -2
                # feet_away = 4     # 0 6 1 4 3 6 1 
                # knee_away = 4     # 0 6 1 4 3 6 1
                # stand_still = 20.0   # reward only for zero command.   10

                # #  reward   h1.urdf
                # alive = 4
                # tracking_lin_vel_commands = 10     # 20 50 40 20
                # tracking_ang_vel_commands = 5     # 20 50 20 10
                # orientation = -8.0    # -2.0
                # lin_vel_z = -1.0
                # ang_vel_xy = -0.4
                # dof_acc = -3e-7
                # # collision = -0.1
                # action_rate = -0.1
                # energy = -1e-3
                # # dof_error = -0.1    # -0.1
                # # feet_stumble = -2
                # feet_drag = -0.1
                # # dof_pos_limits = -10.0
                # feet_air_time = 10    # 10
                # # feet_height = 2           # 2  4
                # feet_force = -3e-3
                # base_height = -40.0       # Is it too hard for penalty?  - 100   - 20.0
                # # single_feet_contact = 4     # 1 10 8  6 8
                # arms_dof_error = -5
                # waist_dof_error = -2
                # hip_yaw_dof_error = -2
                # # hip_roll_dof_error = -2
                # feet_away = 2     # 3 6 1 6 6 2
                # knee_away = 2     # 3 6 1 6 6 2
                # # stand_still = 100.0   # reward only for zero command.   10
                # feet_contact_number = 10
                # feet_clearance = 8



                # # Humandoi parkour learning reward   h1.urdf
                # # alive = 4
                # tracking_lin_vel_commands = 20.0     # 20 50
                # tracking_ang_vel_commands = 16.0     # 20 50
                # orientation = -2
                # # lin_vel_z = -1.0
                # # ang_vel_xy = -0.4
                # dof_vel = -1e-6
                # dof_acc = -3e-7
                # # collision = -0.1
                # action_rate = -0.1
                # energy = -1e-3
                # torques = -1e-6
                # # dof_error = -0.1    # -0.1
                # # feet_stumble = -2
                # # feet_drag = -0.1
                # # dof_pos_limits = -10.0
                # # feet_air_time = 10
                # # feet_height = 2           # 2  4
                # feet_force = -3e-3
                # # base_height = -20.0       # Is it too hard for penalty?  - 100   - 20.0
                # single_feet_contact = 6     # 1 10 8  6
                # arms_dof_error = -5
                # waist_dof_error = -2
                # hip_yaw_dof_error = -2
                # feet_away = 1     # 3 6 1 6 6 2
                # knee_away = 1     # 3 6 1 6 6 2
                # # stand_still = 100.0   # reward only for zero command.   10


                # # Unitree RL
                # # alive = 0.1
                # tracking_lin_vel_commands = 4.0     # 20 50
                # tracking_ang_vel_commands = 2.0     # 20 50
                # orientation = -1.0
                # lin_vel_z = -2.0
                # ang_vel_xy = -1.0
                # dof_acc = -3.5e-8
                # # collision = -0.1
                # action_rate = -0.01
                # # energy = -1e-3
                # # dof_error = -0.1    # -0.1
                # # feet_stumble = -2
                # # feet_drag = -0.1
                # dof_pos_limits = -10.0
                # feet_air_time = 1.0
                # feet_height = 1.0           # 2  4
                # # feet_force = -3e-3
                # base_height = -100.0       # Is it too hard for penalty?  - 100   - 20.0
                # # single_feet_contact = 10     # 1 10 8  6 8
                # arms_dof_error = -0.25
                # waist_dof_error = -0.1
                # hip_yaw_dof_error = -0.1
                # # hip_pitch_dof_error = -2
                # # feet_away = 2     # 3 6 1 6 6 2
                # # knee_away = 2     # 3 6 1 6 6 2
                # # stand_still = 1.0   # reward only for zero command.   10



                # #  reward   h1.urdf
                # # alive = 1
                # tracking_lin_vel_commands = 10     # 20 50 40 20
                # tracking_ang_vel_commands = 5     # 20 50 20 10
                # orientation = -8.0    # -2.0
                # lin_vel_z = -1.0
                # # ang_vel_xy = -0.4
                # dof_acc = -3e-7
                # # collision = -0.1
                # action_rate = -0.1
                # energy = -1e-3
                # # dof_error = -0.1    # -0.1
                # # feet_stumble = -2
                # feet_drag = -0.1
                # # dof_pos_limits = -10.0
                # feet_air_time = 10    # 10
                # # feet_height = 2           # 2  4
                # feet_force = -3e-3
                # base_height = -40.0       # Is it too hard for penalty?  - 100   - 20.0
                # # single_feet_contact = 4     # 1 10 8  6 8
                # arms_dof_error = -100   # -5
                # waist_dof_error = -100  # -2
                # hip_yaw_dof_error = -2
                # # hip_roll_dof_error = -2
                # feet_away = 2     # 3 6 1 6 6 2
                # knee_away = 2     # 3 6 1 6 6 2
                # # stand_still = 100.0   # reward only for zero command.   10
                # feet_contact_number = 12
                # feet_clearance = 10
                # joint_pos = 16



                #                 #  reward   h1.urdf
                # # alive = 1
                # tracking_lin_vel_commands = 1     # 20 50 40 20
                # tracking_ang_vel_commands = 0.5     # 20 50 20 10
                # orientation = -0.5    # -2.0
                # # lin_vel_z = -1.0
                # # ang_vel_xy = -0.4
                # dof_vel = -5e-5
                # dof_acc = -1e-7
                # # collision = -0.1
                # action_rate = -0.001
                # # energy = -1e-3
                # # dof_error = -0.1    # -0.1
                # # feet_stumble = -2
                # feet_drag = -0.01
                # # dof_pos_limits = -10.0
                # feet_air_time = 1.0    # 10
                # # feet_height = 2           # 2  4
                # feet_force = -3e-4
                # base_height = -4.0       # Is it too hard for penalty?  - 100   - 20.0
                # # single_feet_contact = 4     # 1 10 8  6 8
                # arms_dof_error = -100   # -5
                # waist_dof_error = -100  # -2
                # hip_yaw_dof_error = -0.2
                # # hip_roll_dof_error = -2
                # feet_away = 0.2     # 3 6 1 6 6 2
                # knee_away = 0.2     # 3 6 1 6 6 2
                # # stand_still = 100.0   # reward only for zero command.   10
                # feet_contact_number = 1.2
                # feet_clearance = 1
                # joint_pos = 1.6

                # reference motion tracking
                joint_pos = 1.6
                feet_clearance = 1.
                feet_contact_number = 1.2

                # gaitfcont
                feet_air_time = 1.
                foot_slip = -0.05
                feet_distance = 0.2
                knee_distance = 0.2

                # contact
                feet_contact_forces = -0.01
                # vel tracking
                tracking_lin_vel = 1.0    # 1.0 10
                tracking_ang_vel = 0.5    # 0.5 5
                vel_mismatch_exp = 0.5  # lin_z; ang x,y
                low_speed = 0.2
                track_vel_hard = 0.5
                # base pos
                default_joint_pos = 0.5
                upper_joint_pos = 100.0    # 2.0  10.0 50.0 100.0
                orientation = 1.0
                base_height = 0.8    # 0.2 0.8
                base_acc = 0.2
                # energy
                action_smoothness = -0.002
                torques = -1e-5
                dof_vel = -5e-4
                dof_acc = -1e-7
                collision = -1.

                

            else:
                # For tracking target
                alive = 1
                tracking_lin_vel = 6
                tracking_demo_yaw = 1
                tracking_demo_roll_pitch = 1
                orientation = -2
                tracking_demo_dof_pos = 3
                tracking_demo_key_body = 2
                lin_vel_z = -1.0
                ang_vel_xy = -0.4
                dof_acc = -3e-7
                # collision = -10.
                action_rate = -0.1
                energy = -1e-3
                dof_error = -0.1
                feet_stumble = -2
                feet_drag = -0.1
                dof_pos_limits = -10.0
                feet_air_time = 10
                # feet_height = 0           # 2
                feet_force = -3e-3

        # humanoid gym
        # //
        # max_contact_force = 1000
        base_height_target = 0.90    # 0.90
        min_dist = 0.2
        max_dist = 0.5
        # put some settings here for LLM parameter tuning
        target_joint_pos_scale = 0.17    # rad
        target_feet_height = 0.06       # m
        cycle_time = 0.64                # sec
        # //
        
        tracking_sigma = 5.0
        only_positive_rewards = True
        clip_rewards = False  
        soft_dof_pos_limit = 0.98
        # base_height_target = 0.98
    
    class domain_rand( LeggedRobotCfg.domain_rand ):
        randomize_gravity = True
        gravity_rand_interval_s = 10
        gravity_range = [-0.1, 0.1]
        
        # 添加PD增益随机化的部分
        randomize_pd_gain = False  # 启用PD增益随机化
        kp_range = [0.75, 1.25]   # kp增益范围
        kd_range = [0.75, 1.25]   # kd增益范围
    
    class noise():
        add_noise = True
        noise_scale = 0.5 # scales other values
        class noise_scales():
            dof_pos = 0.01
            dof_vel = 0.15
            ang_vel = 0.5     #0.3
            imu = 0.2
            gravity = 0.05
            # lin_vel = 0.05   # 原本没有的

    class task():
        motion_task = task.motion_task
        train_estimator = task.train_estimator

    class commands:
        curriculum = False
        max_curriculum = 1.
        num_commands = 4 # default: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
        resampling_time = 8. # time before command are changed[s]  11.1  TODO default 6.
        heading_command = True # True   if true: compute ang vel command from heading error

        lin_vel_clip = 0.01     # default 0.2
        ang_vel_clip = 0.01     # default 0.4

        class ranges:
            # lin_vel_x = [-0.6, 1.0] # min max [m/s]
            # lin_vel_y = [-0.3, 0.3]#[0.15, 0.6]   # min max [m/s]   # After depolying, spread it.
            # ang_vel_yaw = [-0.3, 0.3]    # min max [rad/s]
            # heading = [-1.6, 1.6]

            # Unitree gym rl
            lin_vel_x = [-0.3, 0.6] # min max [m/s]
            lin_vel_y = [-0.3, 0.3]#[0.15, 0.6]   # min max [m/s]   # After depolying, spread it.
            ang_vel_yaw = [-0.3, 0.3]    # min max [rad/s]
            heading = [-1.6, 1.6]

            # #Standing still
            # lin_vel_x = [0.0, 0.0] # min max [m/s]
            # lin_vel_y = [0.0, 0.0]#[0.15, 0.6]   # min max [m/s]   # After depolying, spread it.
            # ang_vel_yaw = [0.0, 0.0]    # min max [rad/s]
            # heading = [-1.6, 1.6]
    

class H1MimicCfgPPO( LeggedRobotCfgPPO ):
    class runner( LeggedRobotCfgPPO.runner ):
        runner_class_name = "OnPolicyRunnerMimic"
        policy_class_name = 'ActorCriticMimic'
        algorithm_class_name = 'PPOMimic'
    
    class policy( LeggedRobotCfgPPO.policy ):
        continue_from_last_std = False

        text_feat_input_dim = H1MimicCfg.env.n_feature

        text_feat_output_dim = 16
        feat_hist_len = H1MimicCfg.env.prop_hist_len
        # actor_hidden_dims = [1024, 512]
        # critic_hidden_dims = [1024, 512]
    
    class algorithm( LeggedRobotCfgPPO.algorithm ):
        entropy_coef = 0.005
        entropy_coef = 0.001
        learning_rate = 1e-5
        num_learning_epochs = 2
        gamma = 0.994
        lam = 0.9
        num_mini_batches = 4
    
    class estimator:
        train_with_estimated_states = True    #False
        learning_rate = 1.e-4
        hidden_dims = [128, 64]

        priv_states_dim = H1MimicCfg.env.n_priv
        priv_start = H1MimicCfg.env.n_feature + H1MimicCfg.env.n_proprio + H1MimicCfg.env.n_demo + H1MimicCfg.env.n_scan
        
        prop_start = H1MimicCfg.env.n_feature
        prop_dim = H1MimicCfg.env.n_proprio

    

class H1MimicDistillCfgPPO( H1MimicCfgPPO ):
    class distill:
        num_demo = 3
        num_steps_per_env = 24
        
        num_pretrain_iter = 0

        activation = "elu"
        learning_rate = 1.e-4
        student_actor_hidden_dims = [1024, 1024, 512]
        num_mini_batches = 4
