---
model:
  hidden_dim: 64
  ff_dim: 192
  n_heads: 4
  n_layers: 6
  use_alibi: ${..alibi}
  use_coords: ${..coords}
  use_random_ids: ${..random_ids}
  use_rope: ${..rope}
  use_ssmax: ${..ssmax}

alibi: false
coords: false
random_ids: true
rope: false
ssmax: true

data:
  training: ./data/tsp-20-train.npz
  evaluation:
    - ./data/tsp-20-val.npz
    - ./data/tsp-50.npz
    - ./data/tsp-val.npz

trainer:
  evaluation_batch_size: 1024
  evaluation_every: 5000
  evaluation_iters: 1
  learning_rate: 1e-3
  learning_rate_min: 1e-5
  training_batch_size: 1024
  training_iters: 500_000
  resume: ${..resume}

resume: null

wandb:
  entity: neuralcombopt
  group: ${..group}
  mode: ${..mode}

group: ablations
mode: online

hydra:
  job:
    chdir: true
