hyperparameters:
  num_envs: 1024
  num_steps: 128
  num_mini_batches: 16 # *diff_steps = 64 -> same num of updates
  num_epochs: 8 
  kl_bound: 0.1 # 0.2
  kl_start: 0.3 # 0.15
  temp_lagrangian_adam_gamma1: 0.95
  temp_lagrangian_adam_gamma2: 0.9995
  num_collection_step_factor: 0.5
  vmin: -50
  vmax: 150
  num_bins: 201
  num_eval: 100