architecture:
  _target_: krt.models.tnp.TNP
  embedding_dim: 16
  n_heads: 4
  transformer_depth: 4
  decoder_depth: 2
  dim_feedforward: 256
training:
  epochs: 5000
  early_stop_patience: 5000
  learning_rate: 3e-4
  weight_decay: 0.0
