seed: 42

data:
  between_minus_1_1: true
  quantile_cutoff: 0.98
  data_alpha: 1.8
  dataset: sas_grid
  isotropic: false
  n_mixture: 9 # must be square. set to None if not a grid mixture
  nfeatures: 2 # if data_type is 2d and this is one, will just project on first dimension
  normalized: true
  nsamples: 10000
  num_workers: 0 # number of workers for data loading
  std: 0.01
  theta: 1.0
  weights:
  - 0.01
  - 0.1
  - 0.3
  - 0.2
  - 0.02
  - 0.15
  - 0.02
  - 0.15
  - 0.05

diffusion:
  alpha: 1.8
  clamp_a: null
  clamp_eps: null
  diffusion_steps: 100
  LIM: false
  isotropic: false
  mean_predict: EPSILON
  rescale_timesteps: true
  var_predict: FIXED

eval:
  data_to_generate: 5000
  ddim: false
  eval_eta: 1.0
  reduce_timesteps: 1.
  clip_denoised: true

model:
  a_emb_size: 32
  a_pos_emb: false
  act: silu
  compute_gamma: false
  dropout_rate: 0.0
  group_norm: true
  nblocks: 2
  no_a: true
  nunits: 64
  skip_connection: true
  time_emb_size: 8
  time_emb_type: learnable
  use_a_t: false

training:
  bs: 256
  ema_rates: null
  #- 0.9
  grad_clip: null #1.0
  loss_monte_carlo: mean # loss to apply on batch of M number of a's. can be mean or median
  loss_type: LP_EPS_LOSS
  lploss: 1.0
  monte_carlo_steps: 1 # for each t, x_0, z_t, number of different a_t_1, a_t' to generate
  monte_carlo_groups: 1 # number groups: will take median of means of monte_carlo_groups of a's

optim:
  lr: 0.01
  lr_steps: 2000
  optimizer: adamw
  schedule: null #linear
  warmup: 0

run:
  epochs: 500
  eval_freq: 20
  checkpoint_freq: null
  progress: true # print progress bar