defaults:
  - env: mjx_dmc
  - experiment_overrides: default
  - _self_

hyperparameters:
  lr: 1e-4
  gamma: 0.999
  lmbda: 0.97
  clip_ratio: 0.2
  value_coef: 0.5
  entropy_coef: 0.00
  ent_start: 0.002
  ent_target_mult: 3
  update_entropy_lagrangian: false
  use_kl_regularization: false
  actor_kl_clip_mode: "clipped"
  kl_bound: 0.1
  kl_action_rep: 1
  reduce_kl: true
  reverse_kl: false
  temp_lagrangian_optim: "adam"
  temp_lagrangian_adam_gamma1: 0.9
  temp_lagrangian_adam_gamma2: 0.999
  use_temp_lagrangian_ema_optim: false
  use_temp_lagrangian_post_adam_ema: false
  temp_lagrangian_ema_decay: 0.99
  temperature_lr: null
  total_time_steps: 50_000_000
  num_steps: 64
  num_collection_step_factor: 1
  num_mini_batches: 128
  num_envs: 2048
  num_epochs: 16
  max_grad_norm: 0.5
  normalize_advantages: True
  normalize_env: True
  normalize_soft_reward: False
  anneal_lr: False
  num_eval: 20
  max_episode_steps: 1000
  env_action_clip_value: 1.0
  tanh_transform: false

  # critic architecture settings (need to be increased for MJX humanoid)
  critic_hidden_dim: 512
  use_critic_norm: true
  num_critic_encoder_layers: 2
  num_critic_head_layers: 2
  num_critic_pred_layers: 2
  use_simplical_embedding: False
  use_critic_skip: False
  use_categorical_value: False
  vmin: ${env.vmin}
  vmax: ${env.vmax}
  num_bins: 151
  hl_gauss: True
  aux_loss_mult: 0.
  aux_loss_alpha: 0.95

  diffusion:
    name: "dis"
    diff_steps: 8
    init_std: 2.5
    friction: 1.0
    use_friction_mlp: false
    friction_mlp_hidden: 64
    friction_mlp_layers: 2
    friction_num_time_hid: 32
    friction_num_time_out: 16
    friction_mlp_use_obs: true
    integrator: "EM"

    learn_forward: true
    learn_backward: false
    learn_prior: false
    learn_betas: false
    learn_friction: false
    learn_mass_matrix: false

    dt: 0.125
    learn_dt: false
    per_step_dt: false
    per_dim_friction: true
    use_step_size_scheduler: false

    score_model:
      use_path_gradient: false
      use_target_score: false
      num_layers: 4
      num_hid: 256
      num_time_hid: 32
      num_time_out: 16
      outer_clip: 1e4
      inner_clip: 1e2
      weight_init: 1e-8
      bias_init: 0.
      layer_norm: false
      layer_norm_type: "LayerNorm"

    # dt_schedule:
    #   _target_: src.networks.diffusion.schedulers.get_cosine_schedule
    #   total_steps: ${hyperparameters.diffusion.diff_steps}
    #   min: 0.001
    #   s: 0.008
    #   pow: 2
    dt_schedule:
      _target_: src.networks.diffusion.schedulers.get_linear_schedule
      total_steps: ${hyperparameters.diffusion.diff_steps}
      min: 0.05
    # dt_schedule:
    #   _target_: src.networks.diffusion.schedulers.get_constant_schedule

name: "diff_ppo"
tags: ["ppo_baseline_retuned"]
seed: 0
num_seeds: 1
tune: false
checkpoint_dir: null
trials: 1
wandb:
  mode: "online" # set to online to activate wandb
  entity: ""
  project: "dime_${env.name}"
  project_suffix: ""
