dataset:
  batch_sizes: [1024, 1024, 1024, 1024, 1024] # For 5 buckets (adjust as needed)
  val_batch_sizes: [1024, 1024, 1024, 1024, 1024] # [25, 19, 12, 6, 3]
  dataset_name: 'uspto_50k'
  fraction: 0.1
  val_batch_size: 5
  num_buckets: 5
  num_workers: 7
  prefetch_factor: 2
  completion_lower_limit: 0.8
  max_augmentations: 0
  vocab_file: 'vocab_rsmiles_50k.txt'
  val_file: 'val_no_overlap.csv'
  train_file: 'train_no_overlap.csv'
  test_file: 'test_no_overlap.csv'
  data_dir: debug
  train_fraction: 0.1
  start_idx: 0
  end_idx: 1
  separator: '<unk>'
  subset: 'test_no_overlap.csv'
  val_fraction: 0.1
  conditional_starting_material_column: 'most_sm'

debug_print_ground_truth_reactants: false
starting_material_key: 'route_most_similar_starting_material'
readjust_translations: false
adjusted_score_weight: 1
adjusted_original_score_weight: 1
name: similarity
true_property: 'similarity_to_starting_material'
target_and_starting_material_combination_weight: 0.7
with_reactants_as_starting_material: false
with_starting_material: true
similarity_type: tanimoto
combination_weight: 1
predictor_type: neural_network
eos_penalty: -10.
as_regression: true
experiment_name: 'tanimoto'
checkpoint_path: 'checkpoint_10.pt'
prediction_threshold: 0.7
sigmoid_steepness: 10
enforce_starting_material_at_depth: null
enforce_starting_material_scale: 100
enforce_starting_material_min_length: 0
use_ground_truth_node_depth: false

similarity_target: 'starting_material' # main_target
combine_renormalize: true
onmt_checkpoint_path: /Users/laabidn1/multiguide/checkpoints/rsmiles_50k_checkpoints/USPTO_50K_PtoR.pt
guidance_scale: 0.1
n_candidates_to_evaluate: 10
debug_classifier_scores: false
are_scores_close_tolerance: 0.01
are_scores_close_debug: false

min_length_for_guidance: 5 # set based on the dataset of each task or as tunable parameter
max_length_for_guidance: 500
min_confidence_for_guidance: 0.01
search_batch_size: 1024

target_class_index: -1

property: max_tanimoto
normalize_prediction: true
model:
  hidden_dim: 128
  num_heads: 4
  num_layers: 4
  dropout: 0.1
  num_classes: 11

train:
  loss: mse
  model_log_var: false
  stratified_evaluation: true
  classifier_evaluation: false
  curriculum_learning: false
  learning_rate: 1e-3
  weight_decay: 1e-5
  use_scheduler: false
  weighted_loss: false
  resume: false
  resume_path: null
  num_epochs: 500
  eval_interval: 50
  print_every: 1

eval:
  checkpoint_path: 'checkpoint_10.pt'
  property_weight: 25
  original_score_weight: 1
  reaction_type_weight: 1
  tanimoto_weight: 1