# Experiment selection
experiment:
  name: "posterior_comparison"


# Model selection
model:
  name: "GFSVI" # GFSVI, GP, FVI


# Model configuration 
gfsvi:
  likelihood_scale: 1 # tuned during training
  architecture: [100, 100, 1] 
  stochastic_layers: [true, true, true] 
  activation_fn: "tanh"
  init_rho_minval: -10
  init_rho_maxval: -8

  # Training configuration
  training:
    mc_samples: 10
    nb_epochs: 100000
    lr: 0.0005 
    b1: 0.9
    b2: 0.999
    eps: 1.e-8
    kl_gamma: 1.e-15
    n_context_points: 500
    context_selection: "random" # random, grid
    min_context_val: -2
    max_context_val: 2
    patience: 5000
    max_grad_val:  # empty for no clipping
    
  # Prior 
  prior:
    kernel: "RBF" 
    parameter_tuning: true
    nb_epochs: 2000
    lr: 0.005
    b1: 0.9
    b2: 0.999
    eps: 1.e-8
    parameters: {"lengthscale": 0.25, "variance": 1, "alpha": 4.3} 


# Model configuration 
gp:
  sparse: true
  n_inducing_pts: 100 # use a max of 1000 inducing points for the GP model
  likelihood_scale: 1. # tuned during training
  
  # Prior 
  prior:
    kernel: "RBF"
    params: {lengthscale: 0.25, variance: 1., alpha: 1.}

  # Training configuration
  training:
    nb_epochs: 1000
    lr: 0.1
    b1: 0.9
    b2: 0.999
    eps: 1.e-8


# Model configuration
fvi:
  likelihood_scale: 1  # tuned during training
  architecture: [100, 100, 1] 
  stochastic_layers: [true, true, true] 
  activation_fn: "tanh"

  # Training configuration
  training:
    mc_samples: 10
    nb_epochs: 100000
    lr: 0.0005
    b1: 0.9
    b2: 0.999
    eps: 1.e-8
    n_context_points: 50
    context_selection: "random" # random, grid
    min_context_val: -2
    max_context_val: 2
    patience: 5000
    max_grad_val:  # empty for no clipping
    
  # Prior 
  prior:
    kernel: "RBF" 
    parameter_tuning: true
    nb_epochs: 2000
    lr: 0.005
    b1: 0.9
    b2: 0.999
    eps: 1.e-8
    parameters: {"lengthscale": 0.25, "variance": 1, "alpha": 4.3}     


# Dataset configuration 
data:
  name: "denmark" # truncated_sine, GP_RBF, boston, concrete, energy, kin8nm, naval, power, protein, wine, yacht, wave, denmark
  feature_dim: 1 # only for truncated_sine and GP_RBF, automatically set for UCI datasets
  n_samples: 300 # only for truncated_sine and GP_RBF, automatically set for UCI datasets
  batch_size: 2000 
  k_folds: 5 # only for UCI datasets