total_time_steps: 50_000_000
normalize_env: true
max_episode_steps: ${env.max_episode_steps}
eval_interval: 2
num_eval: 25

# optimization settings (seem very stable)
optimizer:
  _target_: optax.adam
  learning_rate: 3e-4
anneal_lr: false
max_grad_norm: 0.5
polyak: 1.0 # maybe ablate ?

# problem discount settings (need tuning)
gamma: 0.99
lmbda: 0.95
lmbda_min: 0.95 # irrelevant if no exploration noise is added

# batch settings (need tuning for MJX humanoid)
num_steps: 128
num_mini_batches: 128
num_envs: 1024
num_epochs: 4

# exploration settings (currently not touched)
exploration_noise_max: 1.0
exploration_noise_min: 1.0
exploration_base_envs: 0

# critic architecture settings (need to be increased for MJX humanoid)
critic_hidden_dim: 512
actor_hidden_dim: 512
vmin: ${env.vmin}
vmax: ${env.vmax}
num_bins: 151
hl_gauss: true
use_critic_norm: true
num_critic_encoder_layers: 2
num_critic_head_layers: 2
num_critic_pred_layers: 2
use_simplical_embedding: False
use_critic_skip: False

# actor architecture settings (seem stable)
use_actor_norm: true
num_actor_layers: 3
actor_min_std: 0.0
use_actor_skip: False

# actor & critic loss settings (seem remarkably stable)
## kl settings
kl_start: 0.01
kl_bound: 0.1 # switched to tighter bounds for MJX
reduce_kl: true
reverse_kl: false # previous default "false"
update_kl_lagrangian: true
actor_kl_clip_mode: "clipped" # "full", "clipped", "kl_relu_clipped", "kl_bound_clipped", "value"
## entropy settings
ent_start: 0.01
ent_target_mult: 0.5
update_entropy_lagrangian: true
## auxiliary loss settings
aux_loss_mult: 1.0
gradient_estimator: "pathwise_q" # "score_based_gae", "score_based_q", "pathwise_q"
scale_samples_with_action_d: False
fixed_actor_std: false

network:
  _target_: src.algorithms.reppo.networks.make_continuous_ff_networks
  _partial_: true
init:
  _target_: src.algorithms.reppo.ff_reppo.make_init_fn
  _partial_: true
learner:
  _target_: src.algorithms.reppo.ff_reppo.make_learner_fn
  _partial_: true
policy:
  _target_: src.algorithms.reppo.ff_reppo.make_policy_fn
  _partial_: true
