name: peptide-gfn-dual-exploration
program: peptide.wandb_train
method: bayes

metric:
  name: params/logz
  goal: maximize

command:
  - ${env}
  - uv
  - run
  - python
  - -m
  - peptide.wandb_train
  - ${args}

early_terminate:
  type: hyperband
  min_iter: 1500
  eta: 2

parameters:
  # --- Ambiente / Reward ---
  cut_off:
    values: [0.94]
  batch_size:
    values: [4096]
  threshold:
    distribution: uniform
    min: 0.05
    max: 0.5
  eps:
    values: [0.0, 0.1, 0.2, 0.3]
  seed:
    values: [10]

  # --- Policy architecture ---
  emb_dim:
    values: [32, 64, 128]
  window:
    values: [4, 6, 8]
  pos_dim:
    values: [4, 8, 16, 32]   # precisa ser par
  hidden:
    values: [64, 128, 256]
  force_stop_on_full:
    values: [true]

  # --- Otimização (main) ---
  lr_pf:
    values: [0.05]
  lr_logz:
    values: [0.1]

  # --- Otimização (divergente) ---
  div_lr_pf:
    values: [0.5, 0.05, 0.005]
  div_lr_logz:
    values: [1.0, 0.1, 0.01]

  # --- Treino / Logging / Checkpoints ---
  epochs:
    value: 4000
  log_every:
    value: 50
  save_every:
    value: 1000
  save_on_best:
    value: false

  # Se quiser que TODAS as runs versionem o MESMO artifact (mistura runs):
  # artifact_name:
  #   value: peptide_gfn_global