hyperparameters:
  num_envs: 1024
  num_steps: 128
  num_mini_batches: 8 # *diff_steps = 64 -> same num of updates
  num_epochs: 8 
  kl_bound: 0.12 # 0.2
  kl_start: 0.3 # 0.15
  temp_lagrangian_adam_gamma1: 0.9
  temp_lagrangian_adam_gamma2: 0.999
  num_collection_step_factor: 0.5
  vmin: -100
  vmax: 200
  num_bins: 301
  num_eval: 50
  train_mode: "WPO"
  actor_kl_clip_mode: "clipped"
  reverse_kl: false
  env_action_clip_value: 1.
  action_clip_value: 1.
  use_W2_kl: false
  remove_fisher_precond: false
  
  lr: 1e-3
  temperature_lr: 3e-4
  lagrangian_lr: 3e-4
  ent_target_mult: 3.5
  tanh_transform: false
  gamma: 0.999
  lmbda: 0.98
  
  diffusion:
    learn_friction: true
    learn_dt: true
    per_step_dt: true
    per_dim_friction: true
    friction: 0.25
    init_std: 3.0

    dt_schedule:
      _target_: src.networks.diffusion.schedulers.get_linear_schedule
      total_steps: ${hyperparameters.diffusion.diff_steps}
      min: 0.05



