model:
  base_learning_rate: 4.5e-6
  target: ldm.models.autoencoder.MuxVQModel
  params:
    monitor: "val/rec_loss"
    embed_dim: 3
    n_embed: 8192
    lossconfig:
      target: ldm.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
      params:
        disc_conditional: False
        disc_in_channels: 3
        disc_start: 50001
        disc_weight: 0.8
        codebook_weight: 1.0
        n_classes: 8192

    ddconfig:
      double_z: False
      z_channels: 3
      resolution: 256
      in_channels: 3
      out_ch: 3
      ch: 128
      ch_mult: [ 1,2,4 ]  # num_down = len(ch_mult)-1
      num_res_blocks: 2
      attn_resolutions: [ ]
      dropout: 0.0
      K: 8
      expand: 8
      mod: "nonlinear-expand-one"
      demux: "upsample"
      demux_mod: "channel-conv"
data:
  target: main.DataModuleFromConfig
  params:
    batch_size: 24
    wrap: True
    train:
      target: taming.data.faceshq.FFHQTrain
      params:
        size: 256
    validation:
      target: taming.data.faceshq.FFHQValidation
      params:
        size: 256

lightning:
  callbacks:
    image_logger:
      target: main.ImageLogger
      params:
        batch_frequency: 1000
        max_images: 16
        K: 8
        batch_size: 24
        increase_log_steps: True

  trainer:
    benchmark: True
    accumulate_grad_batches: 2
