---
model:
  hidden_dim: 512
  ff_dim: 2048
  n_heads: 32
  n_layers: 12
  use_alibi: ${..alibi}
  use_coords: ${..coords}
  use_random_ids: ${..random_ids}
  use_rope: ${..rope}
  use_ssmax: ${..ssmax}

alibi: false
coords: false
random_ids: true
rope: false
ssmax: true

data:
  training: ./data/tsp-train.npz
  evaluation:
    - ./data/tsp-val.npz
    - ./data/tsp-250.npz
    - ./data/tsp-500.npz

trainer:
  evaluation_batch_size: 1024
  evaluation_every: 5000
  evaluation_iters: 1
  learning_rate: 1e-4
  learning_rate_min: 1e-5
  training_batch_size: 448
  training_iters: 2_285_715
  resume: ${..resume}

resume: null

wandb:
  entity: neuralcombopt
  group: ${..group}
  mode: ${..mode}

group: none
mode: online

hydra:
  job:
    chdir: true
