dataset:
  batch_sizes: [1024, 1024, 1024, 1024, 1024] # For 5 buckets (adjust as needed)
  val_batch_sizes: [1024, 1024, 1024, 1024, 1024] # [25, 19, 12, 6, 3]
  dataset_name: '0.1'
  val_batch_size: 5
  num_buckets: 5
  num_workers: 7
  prefetch_factor: 2
  vocab_file: 'vocab.txt'

predictor_type: neural_network

property: sa_score

alphabet_size: 242 # number of unique tokens in smi_tokenizer (or in uspto_full dataset)

model:
  hidden_dim: 1024
  num_heads: 8
  num_layers: 8
  dropout: 0.1

train:
  learning_rate: 1e-3
  weight_decay: 1e-5
  use_scheduler: false
  curriculum_learning: true
  num_epochs: 500
  weighted_loss: true
  eval_interval: 10
  print_every: 1

eval:
  checkpoint_path: 'checkpoint_10.pt'
  property_weight: 25