data:
    path: ./data
    dataset: CIFAR10
    image_size: 32
    channels: 3
    logit_transform: false
    uniform_dequantization: false
    gaussian_dequantization: false
    random_flip: true
    rescaled: true
    num_workers: 4
    n_classes: 10

model:
    type: simple
    in_channels: 3
    out_ch: 3
    ch: 128
    ch_mult: [1, 2, 2, 2]
    num_res_blocks: 2
    attn_resolutions: [16, ]
    dropout: 0.1
    var_type: fixedlarge
    ema_rate: 0.9999
    ema: True
    resamp_with_conv: True
    cond_drop_prob: 0.1

diffusion:
    beta_schedule: linear
    beta_start: 0.0001
    beta_end: 0.02
    num_diffusion_timesteps: 1000

training:
    batch_size: 128
    n_iters: 20000
    snapshot_freq: 1000
    log_freq: 50
    visualization_samples: 100
    train_embeddings: False
    gamma: 1 # weight of GR term, leave it at 1
    lmbda: 10 # adjust lambda for FIM term

sampling:
    batch_size: 128
    last_only: True

optim:
    weight_decay: 0.000
    optimizer: "Adam"
    lr: 0.0001
    beta1: 0.9
    amsgrad: false
    eps: 0.00000001
    grad_clip: 1.0