command:
  - scripts/loop_train_eval.sbatch
  - ${args}
method: grid
metric:
  goal: maximize
  name: DPO/Mean
name: acquisition_comparison
parameters:
  acquisition_function.beta:
    values:
      - 1.0
      - 2.0
  acquisition_function_type:
    values:
      - dts
      - infomax
      - maxminlcb
      - drts
      - deltaucb
  enn.regularization.exponential_decay_base:
    values:
      - 0.999
      - 0.99
      - 0.9
  enn.regularization.initial_value:
    values:
      - 1.0
  outer_loop_batch_size:
    values:
      - 64
  replay_buffer_factor:
    values:
      - 100
      - 1000
  enn.effective_batch_size:
    values:
      - 64
program: scripts/loop_train_eval.sbatch