model:
  _target_: tnp.models.pttnp.PTTNP
  encoder: ${pttnp_encoder}
  decoder: ${tnp_decoder}
  likelihood: ${likelihood}

pttnp_encoder:
  _target_: tnp.models.pttnp.PTTNPEncoder
  transformer_encoder: ${ist_encoder}
  xy_encoder: ${xy_encoder}
  x_encoder: ${x_encoder}

ist_encoder:
  _target_: tnp.networks.transformer.ISTEncoder
  num_latents: ${params.num_latents}
  mhca_ctoq_layer: ${mhca_ctoq_layer}
  mhca_qtoc_layer: ${mhca_qtoc_layer}
  mhca_qtot_layer: ${mhca_qtot_layer}
  num_layers: ${params.num_layers}

mhca_ctoq_layer:
  _target_: tnp.networks.attention_layers.MultiHeadCrossAttentionLayer
  embed_dim: ${params.embed_dim}
  num_heads: ${params.num_heads}
  head_dim: ${params.head_dim}
  feedforward_dim: ${params.embed_dim}
  norm_first: ${params.norm_first}

mhca_qtoc_layer:
  _target_: tnp.networks.attention_layers.MultiHeadCrossAttentionLayer
  embed_dim: ${params.embed_dim}
  num_heads: ${params.num_heads}
  head_dim: ${params.head_dim}
  feedforward_dim: ${params.embed_dim}
  norm_first: ${params.norm_first}

mhca_qtot_layer:
  _target_: tnp.networks.attention_layers.MultiHeadCrossAttentionLayer
  embed_dim: ${params.embed_dim}
  num_heads: ${params.num_heads}
  head_dim: ${params.head_dim}
  feedforward_dim: ${params.embed_dim}
  norm_first: ${params.norm_first}

x_encoder:
  _target_: tnp.networks.mlp.MLPWithEmbedding
  embeddings:
    - ${x1_encoder}
    - ${x2_encoder}
  in_dim: ${eval:'${x1_encoder.num_wavelengths} + ${x2_encoder.num_wavelengths}'}
  out_dim: ${params.x_embed_dim}
  num_layers: 0
  width: ${params.x_embed_dim}

x1_encoder:
  _target_: tnp.networks.embeddings.FourierEmbedding
  lower: 0.01
  upper: ${eval:'${params.grid_range}[0][1] - ${params.grid_range}[0][0]'}
  assert_range: False
  num_wavelengths: ${params.num_wavelengths}
  active_dim: 0

x2_encoder:
  _target_: tnp.networks.embeddings.FourierEmbedding
  lower: 0.01
  upper: ${eval:'${params.grid_range}[1][1] - ${params.grid_range}[1][0]'}
  assert_range: False
  num_wavelengths: ${params.num_wavelengths}
  active_dim: 1

xy_encoder:
  _target_: tnp.networks.mlp.MLP
  in_dim: ${eval:'1 + ${params.dim_y} + ${params.x_embed_dim}'}
  out_dim: ${params.embed_dim}
  num_layers: 2
  width: ${params.embed_dim}

tnp_decoder:
  _target_: tnp.models.tnp.TNPDecoder
  z_decoder: ${z_decoder}

z_decoder:
  _target_: tnp.networks.mlp.MLP
  in_dim: ${params.embed_dim}
  out_dim: ${eval:'${params.dim_y} * 2'}
  num_layers: 2
  width: ${params.embed_dim}

likelihood:
  _target_: tnp.likelihoods.gaussian.HeteroscedasticNormalLikelihood

optimiser:
  _target_: torch.optim.AdamW
  _partial_: True
  lr: 5.0e-4

params:
  epochs: 500
  embed_dim: 128
  num_heads: 8
  head_dim: 16
  norm_first: True
  num_layers: 5
  num_latents: 256
  num_wavelengths: 64
  x_embed_dim: 128

misc:
  name: IST-L${params.num_layers}-H${params.num_heads}-D${params.embed_dim}-M${params.num_latents}
  resume_from_checkpoint: null
  plot_interval: 10
  gradient_clip_val: 0.5
  logging: True
