name: xnet
bundle_seq_length: 1
latent_dim: 256
num_layers: 1
decouple_mu: false
conditioning: ["timestep", "itg", "s_hat", "q"]
losses: ["df", "phi", "flux"]
loss_weights:
  df: 0.6
  phi: 0.4
  flux: 0.001
extra_loss_weights:
  phi_int: 0.0
  flux_int: 1e-4
  phi_cross: 0.0
  flux_cross: 0.0

swin:
  patch_size: [ 4, 2, 4, 8, 4 ]
  phi_patch_size: [8, 2, 4]
  window_size: [ 4, 2, 2, 8, 4 ]
  phi_window_size: [7, 4, 12]
  num_heads: [8]
  depth: [2]
  gradient_checkpoint: false
  merging_hidden_ratio: 4.0
  unmerging_hidden_ratio: 8.0
  c_multiplier: 2
  norm_output: false
  use_abs_pe: false
  act_fn: GELU
  patch_skip: true
  modulation: "film"
  drop_path: 0.1
  swin_bottleneck: true
  latent_cross_attn: true
  use_rpb: true
  use_rope: false