# ZINC250K Multi-Objective Optimization - Colab Config
seed: 42

data:
  dataset: zinc250k
  parquet_path: /content/moltenflow/data/processed/zinc250k_qed_sas.parquet
  smiles_col: smiles
  properties: [qed, sas]
  property_directions: [max, min]
  max_len: 128
  max_molecules: null  # Set to e.g. 50000 for faster testing
  representation: selfies

vae_pretrain:
  d_model: 128
  nhead: 8
  enc_layers: 6
  dec_layers: 6
  dim_ff: 1024
  dropout: 0.1
  K: 8
  latent_dim: 128
  epochs: 150
  batch_size: 256
  lr: 1.0e-4
  beta: 0.10
  beta_warmup_frac: 0.35

vae_finetune:
  epochs: 150
  batch_size: 1024
  lr: 1.0e-3
  property_weight: 1.0
  freeze_decoder: false

surrogate:
  hidden_dim: 1024
  aggregation: mean
  dropout: 0.1
  output_bounds:
    qed: [0.0, 1.0]
    sas: [1.0, 10.0]

flow:
  d_model: 256
  nhead: 8
  layers: 10
  dim_ff: 512
  dropout: 0.1
  time_dim: 128
  epochs: 300
  batch_size: 1024
  lr: 2.0e-4

pareto:
  sense: [max, min]
  k_neighbors: 100
  normalize_distance: true

optimization:
  n_candidates: 1000
  sigma: 0.1
  steps: 20
  batch_size: 128
  t_start: 0.9

guidance:
  gamma: 10.0
  clip_norm: null
  normalize: true
  use_directional: true
  scale: 1.0

generation:
  n_samples_uncond: 10000
  n_samples_cond: 10000
  integration_steps: 80
  batch_size: 256

evaluation:
  hypervolume:
    ref_point_margin: 0.1
  bootstrap:
    n_samples: 1000
    confidence: 0.95
    seed: 42

umap:
  n_neighbors: 15
  min_dist: 0.1
  contours:
    smoothing: 0.0
    kernel: thin_plate_spline
    grid_resolution: 100
    n_levels: 10

plotting:
  property_pairs:
    - [0, 1]
  umap_plots:
    pretrain_vs_finetuned: true
    pretrain_vs_finetuned_contours: true
    real_analysis: true
    property_hue_uncond: true
    property_hue_cond: true
    multitype_overlay: true
    splits_overlay: true

ablation:
  use_surrogate: true
  skip_latent_organizing: false
  use_flow: true