accelerator: cuda:0
benchmark: false
bn:
  eps: 1.0e-05
  mom: 0.1
cfg_dest: config.yaml
custom_metrics: []
dag:
  bidirectional: true
  compare:
    do_limit: true
    margin: 0.5
    max_compare_ratio: 8
  conv_type: fnb
  dim_in: 5
  ff: true
  mlp: false
  pe:  true
dataset:
  cache_load: false
  cache_save: false
  dir: ./data/nb101.pt
  edge_dim: 128
  edge_encoder: true
  edge_encoder_bn: false
  edge_encoder_name: DummyEdge
  edge_encoder_num_types: 0
  edge_message_ratio: 0.8
  edge_negative_sampling_ratio: 1.0
  edge_train_mode: all
  encoder: true
  encoder_bn: true
  encoder_dim: 128
  encoder_name: db
  format: None
  infer_link_label: None
  label_column: none
  label_table: none
  location: local
  name: nb101
  node_encoder:  false
  node_encoder_bn: true
  node_encoder_name: DAGNode
  node_encoder_num_types: 0
  remove_feature: false
  resample_disjoint: false
  resample_negative: false
  shuffle_split: true
  slic_compactness: 10
  split:
  - 0.8
  - 0.1
  - 0.1
  split_dir: ./splits
  split_index: 0
  split_mode: standard
  task: graph
  task_type: regression
  to_undirected: false
  transductive: false
  transform: none
  tu_simple: true
device: cuda:0
devices: 1
example_arg: example
example_group:
  example_arg: example
gnn:
  act: relu
  agg: add
  att_final_linear: false
  att_final_linear_bn: false
  att_heads: 1
  batchnorm: true
  clear_feature: true
  dim_inner: 64
  dropout: 0.0
  head: graph
  keep_edge: 0.5
  l2norm: true
  layer_type: generalconv
  layers_mp: 2
  layers_post_mp: 1
  layers_pre_mp: 1
  msg_direction: single
  normalize_adj: false
  residual: false
  self_msg: concat
  skip_every: 1
  stage_type: stack
gpu_mem: true
gt:
  attn_dropout: 0.5
  batch_norm: true
  bigbird:
    add_cross_attention: false
    attention_type: block_sparse
    block_size: 3
    chunk_size_feed_forward: 0
    hidden_act: relu
    is_decoder: false
    layer_norm_eps: 1.0e-06
    max_position_embeddings: 128
    num_random_blocks: 3
    use_bias: false
  dim_hidden: 64
  dropout: 0.0
  full_graph: true
  gamma: 1.0e-05
  layer_norm: false
  layer_type: FLOW+HA
  layers: 10
  n_heads: 16
  pna_degrees: []
  residual: true
mem:
  inplace: false
metric_agg: argmin
metric_best: mae
model:
  edge_decoding: dot
  graph_pooling: mean
  loss_fun: l1
  match_upper: true
  size_average: mean
  thresh: 0.5
  type: TEFormer
name_tag: ''
num_threads: 6
num_workers: 6
optim:
  base_lr: 0.0008
  batch_accumulation: 1
  clip_grad_norm: true
  clip_grad_norm_value: 1.0
  lr_decay: 0.1
  max_epoch: 200
  min_lr: 0.0
  momentum: 0.9
  num_warmup_epochs: 20
  optimizer: adamW
  reduce_factor: 0.1
  schedule_patience: 10
  scheduler: cosine_with_warmup
  steps:
  - 30
  - 60
  - 90
  weight_decay: 4.0e-05
out_dir: results/teformer


posenc_RWSE:
  dim_pe: 16
  enable: false
  kernel:
    times: [ 0,1,2,3,4,5,6]
    times_func: ''
  layers: 2
  model: mlp
  n_heads: 4
  pass_as_var: false
  post_layers: 0
  raw_norm_type: none

pretrained:
  dir: ''
  freeze_main: false
  reset_prediction_head: true
print: both
round: 5
run_dir: results
run_multiple_splits: []
seed: 0
share:
  dim_in: 1
  dim_out: 1
  num_splits: 1
tensorboard_agg: true
tensorboard_each_run: false
train:
  auto_resume: false
  batch_size: 32
  ckpt_best: false
  ckpt_clean: true
  ckpt_period: 100
  enable_ckpt: true
  epoch_resume: -1
  eval_period: 1
  iter_per_epoch: 32
  mode: custom
  neighbor_sizes:
  - 20
  - 15
  - 10
  - 5
  node_per_graph: 32
  radius: extend
  sample_node: false
  sampler: full_batch
  skip_train_eval: false
  walk_length: 4
val:
  node_per_graph: 32
  radius: extend
  sample_node: false
  sampler: full_batch
view_emb: false
wandb:
  entity: None
  name: TEFormer
  project: NB101
  use: false
