out_dir: ~/GraphFM
run_multiple_splits: [0]
cfg_dest: orig_config.yaml
name_tag: debugging
metric_best: auto
seed: 0
wandb:
  use: True
  project: Graph_Perceiver
dataset_multi: 
  name_list: [
        "reddit",
        "reddit2",
        "flickr",
        "yelp", 
        "agd_wiki",
        "agd_blog_catalog",
        "agd_ppi", 
        "agd_facebook", 
        "hgd_roman_empire",
        "hgd_amazon_ratings",
        "hgd_minesweeper",
        "hgd_tolokers",
        "hgd_questions",
        "twitch_de",
        "twitch_en",
        "twitch_es",
        "twitch_fr",
        "twitch_pt",
        "twitch_ru",
        "deezer_europe",
        "github",
        "facebook_page_page",
        "last_fm_asia",
        "airports_usa",
        "airports_brazil",
        "airports_europe",
        "pol_blogs", 
        "email_eu_core", 
        "lxd_penn94",
        "lxd_reed98",  
        "lxd_amherst41", 
        "lxd_johnshopkins55", 
        "lxd_genius", 
        "citationfull_cora_ml",
        "citationfull_citeseer", 
        "citationfull_pubmed",  
    ]
  use_synthetic: False
  synthetic_data_dir: ["~/graph-datasets/graphworld/"]
  num_synthetic_samples: 20
  
posenc_SignNet:
  enable: True
  eigen:
    laplacian_norm: sym
    eigvec_norm: L2
    max_freqs: 32 
  model: DeepSet
  dim_pos_emb: 32  # Note: In original SignNet codebase dim_pos_emb is always equal to max_freq
  layers: 2  # Num. layers in \phi model
  post_layers: 3  # Num layers in \rho model; The original uses the same as in \phi
  phi_hidden_dim: 16
  phi_out_dim: 16
feenc:
  dim_feat_emb: 64
train:
  mode: custom_multi_dataset_node_class_distributed
  accum_gradient_steps: 8
  # mode: custom_multi_dataset_node_class

  # batch_size: 1728
  batch_size: 320
  sampler_graph_limit: 10
  eval_period: 1
  ckpt_period: 20
  sampler: full_batch
  auto_resume: False
model:
  type: PerceiverGraph_MultiDataset_NodeClass
  node_pos_encoder_name: SignNet
  node_feat_encoder_name: MLP2Node
  data_emb_init_scale: 0.02
  num_latents: 512
  latent_dim: 512
  hop_cutoff: 15
  loss_fun: multi
  ffn_dropout: 0.2
  attn_dropout: 0.2
  lin_dropout: 0.4
  tok_emb_dim: 64
ca:
  layer_type: default
  cross_heads: 4
sa:  
  depth: 12
  n_heads: 8
  node_decoder:
    n_heads: 8
    depth: 4
optim:
  clip_grad_norm: True
  optimizer: lamb
  weight_decay: 1e-5
  base_lr: 4e-8
  dataset_min_lr: 8e-8
  dataset_max_lr: 2e-6
  max_epoch: 400
  scheduler: cosine_with_warmup
  num_warmup_epochs: 2