scheme: pcqm.finetune
model_name: tgt_at_tp_rdkit
model_prefix: tgt_at_200m
distributed: true            # In our distributed setting we had 32 GPUs (V100)
batch_size: 64               # Total batch size = 64 * 32 = 2048
model_height: 24
node_width: 768
edge_width: 256
num_heads: 64
max_lr: 0.0002
source_dropout: 0.3
drop_path: 0.2
node_act_dropout: 0.1
edge_act_dropout: 0.1
lr_warmup_steps: 3000
lr_total_steps: 50000
node_ffn_multiplier: 1.0
edge_ffn_multiplier: 1.0
scale_degree: true
upto_hop: 32
activation: gelu
evaluation_samples: 10
prediction_samples: 50        # Set to 10 if you want to use 10 samples for prediction
prediction_bmult: 2
dataloader_workers: 1
evaluation_type: prediction
optimizer: apex_FusedAdam     # If apex is not installed, set it to "Adam"
num_dist_bins: 512
range_dist_bins: 8
dist_loss_weight: 0.1
mixed_precision: true
pretrained_weights_file: models/pcqm/tgt_at_200m/pretrain/tgt_at_tp/checkpoint/model_state.pt  # Path to the *pretrained* model state file
bins_input_path: models/pcqm/tgt_at_200m/dist_pred/tgt_at_dp_rdkit/predictions/bins50          # Path to the bins file, set to bins10 for 10 bins
bins_shift_half: true
bins_zero_diag: true
triplet_type: attention       # Set to "aggregate" for triplet aggregation
triplet_heads: 16
