out_dir: /path/to/output
metric_best: f1
seed: 18
wandb:
  use: True
  project: iclr
  save_dir: /path/to/output
dataset:
  dir: /path/to/data
  format: ETH
  name: Kaggle
  reverse_mp: True
  add_ports: True
  task: hetero_node
  task_type: classification
  task_entity: node
  transductive: True
  node_encoder: True
  node_encoder_name: Hetero_Raw
  node_encoder_bn: False
  edge_encoder: True
  edge_encoder_name: Hetero_Raw
  edge_encoder_bn: False
num_threads: 24
num_workers: 18
train:
  mode: custom
  sampler: hetero_neighbor
  neighbor_sizes: [100, 100] 
  add_ego_id: True
  iter_per_epoch: 1024
  batch_size: 4096
  eval_period: 4
  ckpt_period: 25
  tqdm: True
  persistent_workers: True
  pin_memory: True
val:
  sampler: hetero_neighbor
  iter_per_epoch: -1
model:
  type: GTModel
  loss_fun: weighted_cross_entropy
  loss_fun_weight: [1, 6.27]
  edge_decoding: dot
  graph_pooling: mean
gt:
  layer_type: SparseNodeTransformer
  layers_pre_gt: 1
  layers: 2
  layers_post_gt: 2
  attn_heads: 8
  dim_hidden: 64  # `gt.dim_hidden` must match `gnn.dim_inner`
  input_dropout: 0.0
  dropout: 0.5     # global transformer dropout
  attn_dropout: 0.3
  batch_norm: False
  layer_norm: True
  l2_norm: False
  act: gelu
  attn_mask: Edge
  residual: Fixed
  ffn: Type
  jumping_knowledge: False
gnn:
  dropout: 0.5  # local MP-GNN dropout
optim:
  batch_accumulation: 8
  clip_grad_norm: True
  optimizer: adamW
  weight_decay: 1e-5
  base_lr: 0.001
  max_epoch: 500
  scheduler: cosine_with_warmup
  num_warmup_epochs: 5
