hyperparameters:
  num_envs: 1024
  num_mini_batches: 8 # *diff_steps = 64 -> same num of updates
  num_epochs: 8 
  vmin: -100
  vmax: 200
  num_bins: 301
  num_eval: 50
  
  lr: 1e-3
  temperature_lr: 3e-4
  ent_target_mult: 4
  update_entropy_lagrangian: true
  use_categorical_value: true
  diffusion:
    learn_friction: true
    learn_dt: true
    per_step_dt: true

    dt_schedule:
      _target_: src.networks.diffusion.schedulers.get_constant_schedule



