name: vae
model_type: vae
bundle_seq_length: 1
latent_dim: 512
act_fn: GELU
decouple_mu: true
conditioning: ["itg", "dg", "s_hat", "q"]  # "timestep"
# conditioning: []  # for no conditioning
extra_zf_loss: false
init_weights: "kaiming_uniform"  # "xavier_uniform", "truncnormal", "torch"
patching_init_weights: "kaiming_uniform"
cond_init_weights: "kaiming_uniform"  # "kaiming_uniform", "normal_smallvar", "xavier_uniform"

beta_vae: 1.0
kl_warmup_epochs: 10
free_bits: 0.0

loss_scheduler:
  df: null
  phi: null
  flux: null
  phi_int: null
  flux_int: null
    # start: 0.0
    # end: 1.0
    # start_fraction: 0.5
    # end_fraction: 0.7
  phi_cross: null
  flux_cross: null
  kl_div: null
loss_weights:
  df: 1.0
  kl_div: 1.0
extra_loss_weights:
  # flux_int: 1e-4
  # phi_int: 1e-4

patch:
  patch_size: [ 4, 0, 2, 5, 4 ]
  window_size: [ 4, 0, 4, 9, 4 ]
  merging_hidden_ratio: 1.0
  unmerging_hidden_ratio: 1.0
  c_multiplier: 1.0

vit:
  num_heads: [8, 16]
  depth: [4, 4]
  gradient_checkpoint: false
  use_abs_pe: false
  use_rpb: true
  use_rope: false
  gated_attention: false
  modulation: dit # "dit", "film"
  drop_path: 0.1

bottleneck:
  dim: 32
  norm_learnable: true
  num_heads: 2
  depth: 2
