algorithm:
  class_name: PPO
  # training parameters
  # -- advantage normalization
  normalize_advantage_per_mini_batch: true
  # -- value function
  value_loss_coef: 1.0
  clip_param: 0.2
  use_clipped_value_loss: true
  # -- surrogate loss
  desired_kl: 0.01
  entropy_coef: 0.01
  gamma: 0.99
  lam: 0.95
  max_grad_norm: 1.0
  # -- training
  learning_rate: 0.0003
  num_learning_epochs: 5
  num_mini_batches: 16  # mini batch size = num_envs * num_steps / num_mini_batches
  schedule: adaptive  # adaptive, fixed

  # -- Random Network Distillation
  # rnd_cfg:
  #     weight: 0.0  # initial weight of the RND reward

  #     # note: This is a dictionary with a required key called "mode".
  #     #   Please check the RND module for more information.
  #     weight_schedule: null

  #     reward_normalization: false  # whether to normalize RND reward
  #     state_normalization: true  # whether to normalize RND state observations

  #     # -- Learning parameters
  #     learning_rate: 0.001  # learning rate for RND

  #     # -- Network parameters
  #     # note: if -1, then the network will use dimensions of the observation
  #     num_outputs: 1  # number of outputs of RND network
  #     predictor_hidden_dims: [-1] # hidden dimensions of predictor network
  #     target_hidden_dims: [-1]  # hidden dimensions of target network

  # # -- Symmetry Augmentation
  # symmetry_cfg:
  #   use_data_augmentation: true  # this adds symmetric trajectories to the batch
  #   use_mirror_loss: false  # this adds symmetry loss term to the loss function

    # string containing the module and function name to import.
    # Example: "legged_gym.envs.locomotion.anymal_c.symmetry:get_symmetric_states"
    #
    # .. code-block:: python
    #
    #     @torch.no_grad()
    #     def get_symmetric_states(
    #        obs: Optional[torch.Tensor] = None, actions: Optional[torch.Tensor] = None, cfg: "BaseEnvCfg" = None, obs_type: str = "policy"
    #     ) -> Tuple[torch.Tensor, torch.Tensor]:
    #
    # data_augmentation_func: null

    # # coefficient for symmetry loss term
    # # if 0, then no symmetry loss is used
    # mirror_loss_coeff: 0.0

policy:
  class_name: ActorCritic
  # for MLP i.e. `ActorCritic`
  activation: elu
  actor_hidden_dims: [512, 512, 512]
  critic_hidden_dims: [512, 512, 512]
  init_noise_std: 1.0
  noise_std_type: "scalar"  # 'scalar' or 'log'

  # only needed for `ActorCriticRecurrent`
  # rnn_type: 'lstm'
  # rnn_hidden_dim: 512
  # rnn_num_layers: 1

runner:
    num_steps_per_env: 24  # number of steps per environment per iteration
    max_iterations: 500  # number of policy updates
    empirical_normalization: true
    # -- logging parameters
    save_interval: 50  # check for potential saves every `save_interval` iterations
    experiment_name: pickup_exp
    run_name: "pickup_exp"
    # -- logging writer
    logger: wandb  # tensorboard, neptune, wandb
    neptune_project: maniskill
    wandb_project: maniskill
    # -- load and resuming
    load_run: -1  # -1 means load latest run
    resume_path: null  # updated from load_run and checkpoint
    checkpoint: -1  # -1 means load latest checkpoint

runner_class_name: OnPolicyRunner
seed: 1