general:
  name: graph-tf-model
  wandb: disabled
  gpus: 1
  resume: null
  test_only: null
  check_val_every_n_epochs: 500
  sample_every_val: 1
  val_check_interval: 500
  samples_to_generate: 1000
  samples_to_save: 1000
  chains_to_save: 0
  log_every_steps: 1
  number_chain_steps: 50
  final_model_samples_to_generate: 1000
  final_model_samples_to_save: 1000
  final_model_chains_to_save: 0
  evaluate_all_checkpoints: false
model:
  diffusion_steps: 500
  n_layers: 5
  type: discrete
  transition: marginal
  model: graph_tf
  diffusion_noise_schedule: cosine
  extra_features: all
  hidden_mlp_dims:
    X: 256
    E: 128
    'y': 128
  hidden_dims:
    dx: 256
    de: 64
    dy: 64
    n_head: 8
    dim_ffX: 256
    dim_ffE: 128
    dim_ffy: 128
  lambda_train:
  - 5
  - 0
train:
  n_epochs: 1000
  lr: 0.0002
  weight_decay: 1.0e-12
  seed: 4
  batch_size: 500
  clip_grad: null
  save_model: true
  num_workers: 0
  ema_decay: 0
  progress_bar: false
  optimizer: adamw
dataset:
  name: full_tend_6_16
  remove_h: null
  datadir: data/full_tend_6_16/
