summ_scalar_intvl: 1
summ_image_intvl: 1
summ_image_count: 4

phase_param:
  train:
    num_slots: 10
  valid:
    num_slots: 10
  test:
    num_slots: 10

batch_size: 8
num_epochs: 20
image_size: 256
lr_enc: 0.00003
lr_dec: 0.0001

feat_dim: 128
SA_ch_mult: [1, 1,2, 4]
slot_size: &slot_size 768

T: 1000
attn: [0,1,2]
beta_1: 0.0001
beta_T: 0.02
ch: 128
ch_mult: [1, 2, 4]
dropout: 0.1
mean_type: epsilon
num_res_blocks: 2
var_type: fixedlarge
linear_mean: 0.5
linear_scale: 2

VQGAN: VQGAN_PATH
VQGAN_config: VQGAN_CONFIG_PATH


DiT:
  input_size: 32
  patch_size: 2
  in_channels: 4
  hidden_size: *slot_size
  depth: 10
  num_heads: 12
  class_dropout_prob: 0




