# Experiment selection
experiment:
  name: "toy_regression" # toy_regression, uci_regression, ood_detection


# Model selection
model:
  name: "TFSVI" # GFSVI, MFVI, GP, Laplace, TFSVI, FVI


# Model configuration 
gfsvi:
  likelihood_scale: 1 # tuned during training
  architecture: [30, 30, 1] 
  stochastic_layers: [true, true, true] 
  activation_fn: "tanh"
  init_rho_minval: -10
  init_rho_maxval: -8

  # Training configuration
  training:
    mc_samples: 10
    nb_epochs: 100000
    lr: 0.0005 
    b1: 0.9
    b2: 0.999
    eps: 1.e-8
    kl_gamma: 1.e-10
    n_context_points: 500
    context_selection: "random" # random, grid
    min_context_val: -2
    max_context_val: 2
    patience: 5000
    max_grad_val:  # empty for no clipping
    
  # Prior 
  prior:
    kernel: "RBF" # RBF, Matern12, Matern32, Matern52, RationalQuadratic
    parameter_tuning: true
    nb_epochs: 2000
    lr: 0.005
    b1: 0.9
    b2: 0.999
    eps: 1.e-8
    parameters: {"lengthscale": 0.25, "variance": 1, "alpha": 4.3} 


# Model configuration 
mfvi:
  likelihood_scale: 1 # tuned during training
  architecture: [30, 30, 1]
  stochastic_layers: [true, true, true] 
  activation_fn: "tanh"
  
  # Prior 
  prior_scale: 0.75 

  # Training configuration
  training:
    mc_samples: 10
    nb_epochs: 100000
    lr: 0.0005 
    b1: 0.9
    b2: 0.98
    eps: 1.e-9
    patience: 5000 
    max_grad_norm:  # empty for no clipping


# Model configuration 
gp:
  sparse: false
  n_inducing_pts: 1000 
  likelihood_scale: 1. # tuned during training
  
  # Prior 
  prior:
    kernel: "Matern12" # RBF, Matern12, Matern32, Matern52, Linear, RationalQuadratic, PoweredExponential
    params: {lengthscale: 0.25, variance: 1., alpha: 1.}

  # Training configuration
  training:
    nb_epochs: 1000
    lr: 0.1
    b1: 0.9
    b2: 0.999
    eps: 1.e-8


# Model configuration
laplace:
  likelihood_scale: 1 # tuned during training
  architecture: [30, 30, 1]
  stochastic_layers: [true, true, true] 
  activation_fn: "tanh"
  cov_type: "diag"

  # Prior 
  prior_scale: 0.75

  # Training configuration
  training:
    mc_samples: 10
    nb_epochs: 100000
    lr: 0.0005
    b1: 0.9
    b2: 0.98
    eps: 1.e-9
    patience: 5000  
    max_grad_norm:  # empty for no clipping


tfsvi:
  likelihood_scale: 1 # tuned during training
  architecture: [30, 30, 1]
  stochastic_layers: [true, true, true] 
  activation_fn: "tanh"
  
  # Prior 
  prior_scale: 0.75
 
  # Training configuration
  training:
    mc_samples: 10
    n_context_points: 500
    n_context_sets: 5
    nb_epochs: 100000
    lr: 0.0005
    b1: 0.9
    b2: 0.98
    eps: 1.e-9
    patience: 5000 
    max_grad_norm:  # empty for no clipping


# Model configuration
fvi:
  likelihood_scale: 1 # tuned during training
  architecture: [30, 30, 1] 
  stochastic_layers: [true, true, true] 
  activation_fn: "tanh"

  # Training configuration
  training:
    mc_samples: 10
    nb_epochs: 100000
    lr: 0.0005
    b1: 0.9
    b2: 0.999
    eps: 1.e-8
    n_context_points: 50
    context_selection: "random" # random, grid
    min_context_val: -2
    max_context_val: 2
    patience: 5000
    max_grad_val:  # empty for no clipping
    
  # Prior 
  prior:
    kernel: "RBF" # RBF, Matern12, Matern32, Matern52, RationalQuadratic
    parameter_tuning: true
    nb_epochs: 2000
    lr: 0.005
    b1: 0.9
    b2: 0.999
    eps: 1.e-8
    parameters: {"lengthscale": 0.25, "variance": 0.5, "alpha": 4.3}     


# Dataset configuration 
data:
  name: "denmark" # truncated_sine, GP_RBF, boston, concrete, energy, kin8nm, naval, power, protein, wine, yacht, wave, denmark
  feature_dim: 1 # only for truncated_sine and GP_RBF, automatically set for UCI datasets
  n_samples: 300 # only for truncated_sine and GP_RBF, automatically set for UCI datasets
  batch_size: 2000 
  k_folds: 5 # only for UCI datasets