defaults:
  - /generators/tabular_data

model:
  _target_: tnp.models.cnp.CNP
  encoder: ${cnp_encoder}
  decoder: ${tnp_decoder}
  likelihood: ${likelihood}

cnp_encoder:
  _target_: tnp.models.cnp.CNPEncoder
  deepset: ${deepset}

deepset:
  _target_: tnp.networks.deepset.DeepSet
  z_encoder: ${z_encoder}
  agg: mean

z_encoder:
  _target_: tnp.networks.mlp.MLP
  in_dim: ${eval:'${params.dim_x} + ${params.dim_y}'}
  out_dim: ${params.embed_dim}
  num_layers: ${params.num_layers}
  width: ${params.embed_dim}

tnp_decoder:
  _target_: tnp.models.tnp.TNPDecoder
  z_decoder: ${z_decoder}

z_decoder:
  _target_: tnp.networks.mlp.MLP
  in_dim: ${eval:'${params.dim_x} + ${params.embed_dim}'}
  out_dim: ${eval:'${params.dim_y} * 2'}
  num_layers: ${params.num_layers}
  width: ${params.embed_dim}

likelihood:
  _target_: tnp.likelihoods.gaussian.HeteroscedasticNormalLikelihood
  min_noise: 1.0e-04

optimiser:
  _target_: torch.optim.AdamW
  _partial_: True
  lr: 5.0e-04
  betas: [0.9, 0.999] # default
  eps: 1.0e-8 # default
  weight_decay: 0.01 # default

scheduler:
  type: "warmup_cosine"  # Options: "constant", "warmup", "cosine", "warmup_cosine"
  warmup:
    steps: null          # Specific number of warmup steps (overrides fraction)
    fraction: 0.1       # Fraction of total training steps for warmup
  cosine:
    eta_min: 1.0e-6      # Minimum learning rate for cosine annealing
    T_max: null          # Max steps for cosine (null = auto-calculate)

params:
  # Model + Training Params
  epochs: 680
  embed_dim: 128
  num_heads: 8
  head_dim: 16
  norm_first: True
  num_layers: 5

misc:
  project: incTNP-tab
  name: CNP-LRSched-L${params.num_layers}-D${params.embed_dim}-LR${optimiser.lr}
  resume_from_checkpoint: null
  gradient_clip_val: 0.5
  plot_interval: 10

  # Plot misc
  eval_name: test
  seed: 1
  only_plots: False
  num_plots: 10
  subplots: True
  savefig: True
  logging: True
  check_val_every_n_epoch: 10
  checkpoint_interval: 100
  num_workers: 0
  num_val_workers: 0
  plot_fn: null
