seed: 42

data:
  between_minus_1_1: true
  quantile_cutoff: 0.98
  data_alpha: 2.0
  dataset: gmm_2
  isotropic: true
  n_mixture: 2 # must be square. set to None if not a grid mixture
  nfeatures: 2 # if data_type is 2d and this is one, will just project on first dimension
  normalized: true
  nsamples: 4096
  num_workers: 0 # number of workers for data loading
  std: 0.1
  theta: 1.0
  weights:
  - 0.01

diffusion:
  alpha: 1.8 
  clamp_a: null
  clamp_eps: null
  diffusion_steps: 100
  LIM: false
  isotropic: true
  mean_predict: EPSILON
  rescale_timesteps: true
  var_predict: FIXED

eval:
  data_to_generate: 1000
  ddim: false
  eval_eta: 1.0
  reduce_timesteps: 1.
  clip_denoised: true

model:
  a_emb_size: 32
  a_pos_emb: false
  act: silu
  compute_gamma: false
  dropout_rate: 0.0
  group_norm: true
  nblocks: 2
  no_a: true
  nunits: 64
  skip_connection: true
  time_emb_size: 8
  time_emb_type: learnable
  use_a_t: false

training:
  bs: 1024
  ema_rates: null
  #- 0.9
  grad_clip: null #1.0 #1.0
  loss_monte_carlo: mean # loss to apply on batch of M number of a's. can be mean or median
  loss_type: LP_EPS_LOSS
  lploss: 2.
  monte_carlo_steps: 1 # for each t, x_0, z_t, number of different a_t_1, a_t' to generate
  monte_carlo_groups: 1 # number groups: will take median of means of monte_carlo_groups of a's

optim:
  lr: 0.01
  lr_steps: 2000
  optimizer: adamw
  schedule: null #linear
  warmup: 0

run:
  epochs: 300
  eval_freq: 25
  checkpoint_freq: null
  progress: true # print progress bar