experiment_name: 'baseline'

dataset: file_loader_qm9
num_epochs: 250
batch_size: 16
trainer: optimal_transport
log_iterations: 10
patience: 30
loss_func: MSELoss
num_train: 10000
required_data:
  - pyg_multiple_conformers
  - dgl_graph
collate_function: pyg_and_dgl_graph_collate
num_conformers: 10
main_metric: loss

optimizer: Adam
optimizer_params:
  lr: 1.0e-3

scheduler_step_per_batch: False
lr_scheduler: WarmUpWrapper
lr_scheduler_params:
  warmup_steps: [700]
  interpolation: 'linear'
  # parameters of scheduler to run after warmup
  wrapped_scheduler: ReduceLROnPlateau
  factor: 0.7
  patience: 5
  min_lr: 1.0e-5
  mode: 'min'
  verbose: True



# Model parameters
model_type: 'OptimalTransportModel'
model_parameters:
  gnn_model: PNAGNNRandomEdgeUpdate
  gnn_params:
    hidden_dim: 50
    mid_batch_norm: False
    last_batch_norm: False
    readout_batchnorm: True
    # e^(log(forgetfulness ~0.001)/number of steps per epoch) = batch_norm_momentum   =>  e^(log(0.001)/100) = 0.970
    batch_norm_momentum: 0.1
    dropout: 0.0
    propagation_depth: 3
    aggregators:
      - sum
    scalers:
      - identity
    pretrans_layers: 2
    posttrans_layers: 2
    residual: False

  hyperparams:
    alpha_mlp:
      n_layers: 2
    c_mlp:
      n_layers: 1
    coord_pred:
      n_layers: 2
    d_mlp:
      n_layers: 1
    encoder:
      n_head: 2
    global_transformer: false
    h_mol_mlp:
      n_layers: 1
    loss_type: ot_emd
    hidden_dim: 50
    n_model_confs: 10
    n_true_confs: 10
    random_alpha: false
    random_vec_dim: 10
    random_vec_std: 1.0
    teacher_force: false



# continue training from checkpoint:
#checkpoint: runs/PNAReadout_2_layer_03-04_15-29-07/last_checkpoint.pt