experiment_name: 'homo_tune_qmugs_NTXentMultiplePositives_620000_35epochs'

pretrain_checkpoint: backbone_pretrained_models/ThreeDInfomax/PNA_qmugs_NTXentMultiplePositives_620000_123_25-08_09-19-52/best_checkpoint_35epochs.pt
transfer_layers:
  - gnn
exclude_from_transfer:
  - batch_norm

num_epochs: 1000
batch_size: 128
log_iterations: 30
patience: 30
loss_func: L1Loss
num_train: 50000
targets:
  - homo
required_data:
  - dgl_graph
  - targets
metrics:
  - mae_denormalized
  - pearsonr
  - rsquared
  - qm9_properties

optimizer: Adam
optimizer_params:
  lr: 7.0e-5
  weight_decay: 1.0e-11

scheduler_step_per_batch: False
lr_scheduler: WarmUpWrapper
lr_scheduler_params:
  warmup_steps: [700,700,390]
  interpolation: 'linear'
  # parameters of scheduler to run after warmup
  wrapped_scheduler: ReduceLROnPlateau
  factor: 0.5
  patience: 25
  min_lr: 1.0e-6
  mode: 'min'
  verbose: True



# Model parameters
model_type: 'PNA'
model_parameters:
  target_dim: 1
  hidden_dim: 200
  mid_batch_norm: True
  last_batch_norm: True
  readout_batchnorm: True
  # e^(log(forgetfulness ~0.001)/number of steps per epoch) = batch_norm_momentum   =>  e^(log(0.001)/100) = 0.970
  batch_norm_momentum: 0.1
  readout_hidden_dim: 200
  readout_layers: 2
  dropout: 0.0
  propagation_depth: 7
  aggregators:
    - mean
    - max
    - min
    - std
  scalers:
    - identity
    - amplification
    - attenuation
  readout_aggregators:
    - min
    - max
    - mean
    - sum
  pretrans_layers: 2
  posttrans_layers: 1
  residual: True





# continue training from checkpoint:
#checkpoint: runs/PNAReadout_2_layer_03-04_15-29-07/last_checkpoint.pt