defaults:
  - probe_encoding: mlp_probe_encoding
  - conditioner: timestep_conditioner
  - patch_embedding: conv3d
  - _self_

# _target_: src.models.flow_matching.dit_aupt.DiTAUPT
_target_: src.models.flow_matching.cond_ar_vit.CondVit
_partial_: True

transformer_dim: ${model_dim}
transformer_depth: 12
transformer_attn_heads: 3
x_dim: ${x_dim}
condition_dim: ${condition_dim}
output_ln: true
init_weights: torch

image_size: ${data.dataset.image_size}
patch_size: ${patch_size}

attn_ctor:
  _target_: src.models.kappa_overrides.dpa.DotProductAttention
  _partial_: True
  do_attn_gating: False
  n_attn_sinks: 4 # TODO: c.f. with register token paper
