architecture:
  _target_: krt.models.tnp.tnpnd.TNPND
  d_model: 64
  emb_depth: 4
  dim_feedforward: 128
  nhead: 4
  dropout: 0.0
  num_layers: 6
  num_std_layers: 2
  cov_approx: 'cholesky' # cholesky or lowrank parameterization
  prj_dim: 20
  prj_depth: 4
  diag_depth: 4 # only for lowrank parameterization option
training:
  epochs: 5000
  early_stop_patience: 5000
  learning_rate: 5e-4
  weight_decay: 0.0
  lr_schedule: True
