---
out_dir: results
metric_best: accuracy
wandb:
  use: false
  project: GPSE-MNIST
dataset:
  format: PyG-GNNBenchmarkDataset
  name: MNIST
  task: graph
  task_type: classification
  transductive: false
  node_encoder: true
  node_encoder_name: GPSE
  node_encoder_bn: false
  edge_encoder: true
  edge_encoder_name: LinearEdge
  edge_encoder_bn: false
posenc_GPSE:
  enable: true
  rand_type: NormalSE
  model_dir: pretrained_models/gpse_molpcba.pt
  dim_pe: 8
  use_repr: true
  repr_type: no_post_mp
  model: Linear
  layers: 2
  virtual_node: true
  input_dropout_be: 0.5
  input_dropout_ae: 0.0
  raw_norm_type: none
  gnn_cfg:
    head: inductive_hybrid_multi
    layers_pre_mp: 1
    layers_mp: 20
    layers_post_mp: 2
    dim_inner: 512
    layer_type: resgatedgcnconv
    multi_head_dim_inner: 32  # use dim_inner if None
    stage_type: skipsum
    batchnorm: true
    act: relu
    dropout: 0.2
    agg: mean
    normalize_adj: false
train:
  mode: custom
  batch_size: 16
  eval_period: 1
  ckpt_period: 100
model:
  type: GPSModel
  loss_fun: cross_entropy
  edge_decoding: dot
  graph_pooling: mean
gt:  # Hyperparameters optimized for ~100k budget.
  layer_type: CustomGatedGCN+Transformer
  layers: 3
  n_heads: 4
  dim_hidden: 52  # `gt.dim_hidden` must match `gnn.dim_inner`
  dropout: 0.0
  attn_dropout: 0.5
  layer_norm: false
  batch_norm: true
gnn:
  head: default
  layers_pre_mp: 0
  layers_post_mp: 3
  dim_inner: 52  # `gt.dim_hidden` must match `gnn.dim_inner`
  batchnorm: false
  act: relu
  dropout: 0.0
  agg: mean
  normalize_adj: false
optim:
  clip_grad_norm: true
  optimizer: adamW
  weight_decay: 1e-4
  base_lr: 0.001
  max_epoch: 100
  scheduler: cosine_with_warmup
  num_warmup_epochs: 5
