tune_config:
  mode: "max"
  metric: "reward/mean"
  search_alg: "random"
  scheduler: "fifo"
  num_samples: 32

# https://docs.ray.io/en/latest/tune/api_docs/search_space.html#tune-sample-docs
lr:
  strategy: "loguniform"
  values: [0.00001, 0.01]
init_kl_coef:
  strategy: "uniform"
  values: [0, 0.2]
vf_coef:
  strategy: "uniform"
  values: [0.5, 2]
