# Budgeted Multi-Objective Optimization Configuration
# Compare MoltenFlow vs BO baselines on ZINC QED/-SA optimization

experiment:
  name: "budgeted_optimization"
  description: "Compare optimization methods under fixed oracle budget"

# Data configuration
# IMPORTANT: Use the same data source that was used to train the VAE/flow models
# to ensure vocabulary consistency. The parquet file should match training config.
data:
  # Use parquet with same max_molecules as training to get matching vocab
  parquet_path: "data/processed/zinc250k_qed_sas.parquet"
  csv_path: null  # Alternative: "data/raw/zinc250k.csv"
  smiles_col: "smiles"
  max_len: 128
  max_molecules: null  # Must match training config to get same vocab
  # Optional: path to a saved vocab file (takes precedence over building from data)
  vocab_path: "experiments/exp_5321a65d/20260126_212415/finetune/vocab.json"
  # Molecular representation: "smiles" or "selfies"
  representation: "selfies"

# Budget and initialization
optimization:
  budget: 100  # Total oracle calls
  n_init: 10   # Initial dataset size
  batch_size: 1  # Candidates per iteration

# Initialization method: "random" or "near_pareto"
init:
  method: "random"
  # For near_pareto:
  pool_size: null  # Auto: 5 * n_init
  k_neighbors: 5

# Reference point for hypervolume (QED, -SA)
# QED in [0,1], SA in [1,10], so -SA in [-10,-1]
hypervolume:
  ref_point: [0.0, -10.0]
  sense: ["max", "max"]

# VAE configuration
vae:
  checkpoint_path: "experiments/exp_5321a65d/20260126_212415/finetune/vae_best.pt" # "experiments/your_experiment/vae_best.pt"
  d_model: 128
  nhead: 8
  enc_layers: 6
  dec_layers: 6
  dim_ff: 1024
  dropout: 0.1
  K: 8
  latent_dim: 128

# Flow configuration
flow:
  checkpoint_path: "experiments/exp_5321a65d/20260126_212415/flow/flow_best.pt" # "experiments/your_experiment/flow_best.pt"
  d_model: 256
  nhead: 8
  layers: 10
  dim_ff: 512
  dropout: 0.1
  time_dim: 128

# Surrogate configuration
surrogate:
  out_dim: 2  # QED and SA (we negate SA in oracle to get -SA)
  hidden_dim: 1024
  aggregation: "mean"
  dropout: 0.1
  cond_dim: 0
  # Output bounds must match training config
  output_bounds:
    qed: [0.0, 1.0]
    sas: [1.0, 10.0]

# MoltenFlow proposer configuration
moltenflow:
  gamma: 30.0  # Guidance strength
  sigma: 0.5  # Noise level
  steps: 20   # Integration/optimization steps
  t_start: 0.9  # Starting time (local optimization)
  # Seed selection: "uniform", "round_robin", or "diversity_weighted"
  # diversity_weighted favors Pareto molecules but penalizes similarity to recent proposals
  seed_selection: "diversity_weighted"
  clip_norm: null
  normalize_gradient: true
  use_flow: true # If false, use pure gradient ascent (no flow)
  step_size: null  # Defaults to gamma/steps
  # Diversity-weighted selection parameters (only used when seed_selection: "diversity_weighted")
  diversity_threshold: 0.7   # Tanimoto similarity threshold above which to penalize
  diversity_penalty: 2.0     # Strength of penalty for similar molecules
  diversity_window: 10       # Number of recent proposals to consider
  pareto_weight: 2.0         # Log-probability bonus for Pareto molecules

# Gradient ascent configuration (no flow, pure gradient optimization)
gradient_ascent:
  step_size: 0.1  # Smaller step size - gradient ascent is more sensitive than flow+guidance
  sigma: 0.5  # Noise level
  steps: 20   # Optimization steps
  seed_selection: "diversity_weighted"
  clip_norm: null
  normalize_gradient: true
  # Diversity-weighted selection parameters
  diversity_threshold: 0.7
  diversity_penalty: 2.0
  diversity_window: 10
  pareto_weight: 2.0

# BO proposer configuration (requires optional 'bo' dependencies)
bo:
  num_restarts: 10
  raw_samples: 512
  # bounds: null  # Auto from data, or specify as [[low], [high]]
  # Latent aggregation: "flatten" (K*d dims) or "mean" (d dims)
  # flatten is default - works better with SELFIES representations
  latent_aggregation: "flatten"

# Output configuration
output:
  dir: "experiments/budgeted_optimization_n_init_10_budget_100_latest"
  pareto_snapshot_interval: 25  # Steps between Pareto snapshots

# Reproducibility
seed: 42
