command:
  - ${env}
  - python3
  - -m
  - ${program}
  - ${args}
method: grid
metric:
  goal: maximize
  name: eval.eval_pop_returns
parameters:
  mode:
    values:
      - online
  env_name:
    values:
      - lbf
  total_timesteps:
    values:
      - 1e7
  seed:
    values:
      - 0
      - 1
      - 2
      - 3
      - 4
      - 5
  learnability_function:
    values:
      - variance
  lr:
    values:
      - 1e-3
  sfl_rollout_factor:
    values:
      - 5
  num_envs:
    values:
      - 512
  sfl_buffer_size:
    values:
      - 64
  sfl_buffer_refresh_freq:
    values:
      - 4
  sfl_batch_size:
    values:
      - 1024
  sfl_num_envs_to_sample:
    values:
      - 512
  eval_against_pop:
    values:
      - True

program: src.jaxzsc.dpd.dpd_ippo_lbf_w_bias_rnn 
project: JaxZSC
