# @package _global_

algo:
  _target_: phys_anim.agents.ppo.PPO
  _recursive_: False
  config:
    models:
      terrain_models:
        mlp: null
        transformer: null
    # Setup basic actor-critic structure
    actor:
      _target_: phys_anim.agents.models.actor.ActorFixedSigma
      _recursive_: False
      config:
        mu_model:
          _target_: phys_anim.agents.models.mlp.MultiHeadedMLP
          _recursive_: False
          config:
            initializer: ${algo.config.actor.config.initializer}
            units: [1024, 512]
            activation: ${algo.config.actor.config.activation}
            normalize_obs: ${algo.config.normalize_obs}
            obs_clamp_value: ${algo.config.obs_clamp_value}
            use_layer_norm: ${algo.config.actor.config.use_layer_norm}
            terrain_model: ${algo.config.models.terrain_models.mlp}
            extra_inputs:
              terrain: ${..terrain_model}

        init_logstd: -2.9
        learnable_sigma: False
        sigma_schedule: null
        use_layer_norm: False
        activation: relu
        initializer: default

    critic:
      _target_: phys_anim.agents.models.critic.CriticMLP
      _recursive_: False
      config:
        initializer: default
        units: [1024, 512]
        activation: relu
        normalize_obs: ${algo.config.normalize_obs}
        obs_clamp_value: ${algo.config.obs_clamp_value}
        use_layer_norm: False
        extra_inputs: null

    actor_optimizer:
      _target_: torch.optim.Adam
      lr: 2e-5
      betas: [0.9, 0.999]

    critic_optimizer:
      _target_: torch.optim.Adam
      lr: 1e-4
      betas: [0.9, 0.999]

    actor_lr_scheduler: null
    critic_lr_scheduler: null

    schedules: null

    # PPO parameters
    num_steps: 32
    tau: 0.95
    gamma: 0.99
    e_clip: 0.2
    clip_critic_loss: False
    gradient_clip_val: 0
    fail_on_bad_grads: False
    check_grad_mag: True
    bounds_loss_coef: 10
    normalize_values: True
    val_clamp_value: null
    normalize_advantage: True
    normalize_obs: True
    batch_size: 16384
    task_reward_w: 1.0
    num_mini_epochs: 6
    gradient_accumulation_steps: 1
    max_eval_steps: null
    eval_metrics_every: 500
    eval_metric_keys: [ ]
    training_early_termination: null
    num_games: null
    manual_save_every: 10
    use_rand_action_masks: False
    obs_clamp_value: null
    max_epochs: ${eval:${training_max_steps}//${ngpu}//${num_envs}//${.num_steps}}

    eval_callbacks: null

    extra_inputs: null
