name: gflownet-diffusion-grid-dual-exploration
# O 'program' aqui é o nome do módulo para o Python
program: diffusionGrid.wandb_train
method: bayes
metric:
  name: metrics/l1_total_variation
  goal: minimize

command:
  - ${env}
  - uv
  - run
  - python
  - -m
  - diffusionGrid.wandb_train  # Sem as chaves ${}, texto puro
  - ${args}

early_terminate:
  type: hyperband
  min_iter: 500
  eta: 2

parameters:
  # --- Configurações do Ambiente ---
  reward_kind:
    value: "rings"
  size:
    value: 15
  batch_size:
    values: [128, 256, 512, 1024]
  threshold:
    distribution: uniform
    min: 0.05
    max: 0.3
  eps:
    values: [0.0, 0.1, 0.2]
  seed:
    values: [42]

  # --- Hiperparâmetros da Rede Principal Forward (P_F) ---
  fnet_hidden_dim:
    values: [128, 256, 512]
  fnet_num_layers:
    values: [2, 3]
  fnet_n_freq:
    values: [8, 16]
  lr_pf:
    distribution: log_uniform_values
    min: 1e-4
    max: 1e-2

  # --- Hiperparâmetros da Rede Principal Backward (P_B) ---
  # Frequentemente P_B pode ser mais simples que P_F
  bnet_hidden_dim:
    values: [32, 64, 128]
  bnet_num_layers:
    values: [1, 2]
  bnet_n_freq:
    values: [4, 8]
  lr_pb:
    distribution: log_uniform_values
    min: 1e-4
    max: 1e-2

  # --- Hiperparâmetros da Rede de Divergência Forward (Exploração) ---
  div_fnet_hidden_dim:
    values: [64, 128]
  div_fnet_num_layers:
    values: [1, 2]
  div_fnet_n_freq:
    values: [8, 16]
  div_lr_pf:
    distribution: log_uniform_values
    min: 1e-4
    max: 1e-2

  # --- Hiperparâmetros da Rede de Divergência Backward ---
  div_bnet_hidden_dim:
    values: [32, 64, 128]
  div_bnet_num_layers:
    values: [1, 2]
  div_bnet_n_freq:
    values: [4, 8]
  div_lr_pb:
    distribution: log_uniform_values
    min: 1e-4
    max: 1e-2

  # --- Parâmetros de Log Z (Função de Partição) ---
  # O aprendizado de log Z é crítico para a consistência do fluxo
  lr_logz:
    distribution: log_uniform_values
    min: 1e-3
    max: 1e-1
  div_lr_logz:
    distribution: log_uniform_values
    min: 1e-3
    max: 1e-1

  epochs:
    value: 4000