dataset:
  batch_sizes: [1024, 1024, 1024, 1024, 1024] # For 5 buckets (adjust as needed)
  val_batch_sizes: [1024, 1024, 1024, 1024, 1024] # [25, 19, 12, 6, 3]
  dataset_name: 'completion1._aug1'
  fraction: 0.1
  val_batch_size: 5
  num_buckets: 5
  num_workers: 7
  prefetch_factor: 2
  vocab_file: 'vocab.txt'
  test_ratio: 0.2
  max_num_expressions: 50000 # 10000
  data_dir: 'toy_experiment'
  max_num: 3
  max_depth: 2
  partial_sequence_completion_lower_limit: 0.5
  partial_sequence_min_length_limit: 3 # only include partial sequences above this length
name: toy_experiment
max_length_for_guidance: 500
multi_step_classes: false
normalize_prediction: false
as_regression: false
onmt_out_dir: training
experiment_name: 'new_experiment'
checkpoint_path: 'checkpoint_7999.pt'

readjust_translations: false
combine_renormalize: true
predictor_type: neural_network
phase: train_classifier
onmt_checkpoint_path: 'onmt_train_layers_24_heads_16_time_stamp_20250721_171226'
prediction_threshold: 3
guidance_scale: 0.3
n_candidates_to_evaluate: 14
debug_classifier_scores: false
are_scores_close_tolerance: 0.01
are_scores_close_debug: false
sigmoid_steepness: 100

min_confidence_for_guidance: 0.01
target_class_index: 1 # 0 for 3 and 1 for 7
min_length_for_guidance: 3 # set based on the dataset of each task or as tunable parameter
eos_penalty: -10.0
search_batch_size: 1024

property: length

alphabet_size: 240 # number of unique tokens in smi_tokenizer (or in uspto_full dataset)

model:
  hidden_dim: 16
  num_heads: 8
  num_layers: 6
  dropout: 0.1
  num_classes: 2

train:
  loss: mse
  model_log_var: false
  stratified_evaluation: true
  curriculum_learning: true
  learning_rate: 1e-3
  weight_decay: 1e-5
  use_scheduler: false
  weighted_loss: true
  resume: false
  resume_path: null
  num_epochs: 2000
  eval_interval: 10
  print_every: 100
  batch_size: 8192

eval:
  checkpoint_path: 'checkpoint_10.pt'
  property_weight: 25