training:
  L1: false
  batch_size: 128
  n_epochs: 500000
  n_iters: 210001
  snapshot_freq: 50000
  snapshot_sampling: true
  sample_freq: 50000
  val_freq: 100
  anneal_power: 2
  log_all_sigmas: false

sampling:
  batch_size: 64
  data_init: false
  step_lr: 0.0000033
  n_steps_each: 5
  ckpt_id: 0
  final_only: true
  fid: false
  ssim: true
  fvd: true
  denoise: true
  num_samples4fid: 10000
  inpainting: false
  interpolation: false
  n_interpolations: 15
  clip_before: true
  max_data_iter: 100000
  init_prev_t: -1.0 # if >0, we start next_frame at prev_frame starting with noise t=init_prev_t
  one_frame_at_a_time: false
  preds_per_test: 1

fast_fid:
  batch_size: 1000
  num_samples: 1000
  step_lr: 0.0000033
  n_steps_each: 5
  begin_ckpt: 5000
  end_ckpt: 210000
  verbose: false
  ensemble: false

test:
  begin_ckpt: 5000
  end_ckpt: 210000
  batch_size: 100

data:
  dataset: "CELEBA"
  image_size: 64
  channels: 3
  logit_transform: false
  uniform_dequantization: false
  gaussian_dequantization: false
  random_flip: true
  rescaled: false
  num_workers: 32
  num_frames: 1
  num_frames_cond: 0
  num_frames_future: 0
  prob_mask_cond: 0.0
  prob_mask_future: 0.0
  prob_mask_sync: false

model:
  depth: deep
  sigma_begin: 90
  num_classes: 500
  ema: true
  ema_rate: 0.999
  spec_norm: false
  sigma_dist: geometric
  sigma_end: 0.01
  normalization: InstanceNorm++
  nonlinearity: elu
  ngf: 128
  ch_mult: 
    - 1
    - 2
    - 2
    - 2
  num_res_blocks: 1 # 8 for traditional
  attn_resolutions: 
    - 8
    - 16
    - 32 # can use only 16 for traditional
  n_head_channels: 64 # -1 for traditional
  conditional: false
  noise_in_cond: false
  output_all_frames: false # could be useful especially for 3d models
  cond_emb: false
  spade: false
  spade_dim: 128

optim:
  weight_decay: 0.000
  optimizer: "Adam"
  lr: 0.0001
  beta1: 0.9
  amsgrad: false
  eps: 0.00000001
