quantizer:
  init_args:
    codebook_size: 1024
    channels: 32
backbone: # Encoder and Decoder
  init_args:
    in_channels: 3
    out_ch: 3
    ch: 128
    ch_mult: [1,1,2,2,4] # Spatial size: (256 ->) 128 -> 64 -> 32 -> 16 -> 16
    # z_channels: 32 <- Refer to the channels of quantizer
    num_res_blocks: 2
    attn_resolutions: [16]
    resolution: 256
    dropout: 0.0
    double_z: false
    depthwise: true
optimizer:
  lr: 1.0e-4
  weight_decay: 1.0e-4
  betas: [0.9, 0.99]
scheduler:
  # initial_lr: <- Refer to the lr of optimizer
  warmup_iters: 16000 #2000*8
  decay_iters: 800000 #100000*8
  min_lr_constant: 2
batch_size: 64
num_epochs: 25