seed: null

data:
  between_minus_1_1: true
  quantile_cutoff: 0.99999
  data_alpha: 2.0
  dataset: gmm_grid #gmm_grid # sas_grid
  isotropic: true
  n_mixture: 9 # must be square. set to None if not a grid mixture
  dim: 2 # if data_type is 2d and this is one, will just project on first dimension
  normalized: true #true
  nsamples: 100000
  bs: 4096 # batch size
  num_workers: 0 # number of workers for data loading
  std: 0.05
  theta: 1.
  weights:
  #- 0.1
  #- 0.4
  #- 0.4
  #- 0.1
  - 0.01
  - 0.1
  - 0.3
  - 0.2
  - 0.02
  - 0.15
  - 0.02
  - 0.15
  - 0.05

noising_process: 'pdmp' # pdmp, diffusion, None

diffusion:
  alpha: 2.0
  clamp_a: null
  clamp_eps: null
  reverse_steps: 100 # here our diffusion models are trained with the uniforma distribution over [1, diffusion_steps] instead of [0, 1]: thus it is part of the training parameters
  LIM: false
  isotropic: true
  rescale_timesteps: true
  mean_predict: EPSILON
  var_predict: FIXED
  loss_type: LP_EPS_LOSS

pdmp:
  sampler: HMC # ZigZag, BPS, HMC
  time_horizon: 10
  refresh_rate: 1.
  add_losses: 
  #- hyvarinen # usually we choose it by default. Works on reflections. Mandatory for ZigZag.
  #- ml # maximum likelihood
  #- square
  #- kl
  #- logistic

eval:
  data_to_generate: 10000
  batch_size: 128
  real_data: 10000 # in case of images, number of real images to store and to compare to

  pdmp:
    backward_scheme: splitting # euler
    reverse_steps: 100 # reverse_steps are only part of evaluation, not of the training
    exponent: 2 # might need to remove that for hashes to match afterwards.
    get_sample_history: false
    #clip_denoised: false
    #new_time_spacing
  # for diffusion
  diffusion:
    reverse_steps: 100
    clip_denoised: false
    ddim: false
    eta: 1.0
    #new_time_spacing

model:
  # this is for 2d ZigZag, and 2d diffusion
  mlp:
    a_emb_size: 32
    a_pos_emb: false
    act: silu
    compute_gamma: false
    dropout_rate: 0.0
    group_norm: true
    nblocks: 8
    no_a: true
    nunits: 256
    skip_connection: true
    time_emb_size: 16
    time_emb_type: learnable
    use_a_t: false
  # this is for the other samplers; normalizing flows
  normalizing_flow:
    transforms: 3
    hidden_width: 256
    hidden_depth: 8
    time_emb_type: learnable
    time_emb_size: 8
    x_emb_size: 8
    x_emb_type: mlp
    #flow_blocks: 8
  vae: false

training:
  pdmp:
    ema_rates: null
    #- 0.9
    grad_clip: null #1.0
    subsamples: 2 # for ZigZag
    train_type: NORMAL # assert train_type in ['VAE', 'RATIO', 'NORMAL', 'NORMAL_WITH_VAE']
  diffusion:
    ema_rates: null
    #- 0.9
    grad_clip: null #1.0
    loss_monte_carlo: mean # loss to apply on batch of M number of a's. can be mean or median
    lploss: 2.
    monte_carlo_steps: 1 # for each t, x_0, z_t, number of different a_t_1, a_t' to generate
    monte_carlo_groups: 1 # number groups: will take median of means of monte_carlo_groups of a's

optim:
  optimizer: adamw
  schedule: null #steplr, linear
  lr: 0.002
  warmup: 0 #1000
  lr_steps: 300000
  lr_step_size: 400
  lr_gamma: 0.99

run:
  epochs: 1000
  eval_freq: null
  checkpoint_freq: null
  progress: true # print progress bar