task: "f"

hyperparameters:
  num_envs: 32 # this needs to be much larger probably
  batchsize: 4096
  episodes: 5000
  value_weight: 10.
  l2_weight: 0.0001
  lr: 0.001
  discount: 1.0
  value_transform: "default"

  A0:
    gumbel_scale: 1.0
    num_considered_actions: 5
    num_simulations: 50
    replay_buffer_size: 50000 # too large of a replay buffer is not good
    lookback: 1

    qtransform: # clibration of qtransform very important for learning
      value_scale: 0.005
      maxvisit_init: 30
      rescale_values: True

