architecture:
  _target_: krt.models.tnp.tnpa.TNPA
  d_model: 64
  emb_depth: 4
  dim_feedforward: 128
  nhead: 4
  dropout: 0.0
  num_layers: 6
  permute: True
training:
  learning_rate: 5e-4
  weight_decay: 0.0
  lr_schedule: True
