# Separate Fusion Module Configuration
# This module decomposes intermediate features into structure and content
_target_: models.hierarchical_model_Dinov2.Separate_Fusion
intermediate_dim: ${STTransformer.intermediate_dim}
structure_dim: 32 #64 -> 32 (Decreased for dynamics capacity)
content_dim: 256 #128 -> 64 (Decreased to force identity-only encoding)
patch_size: 1
spatial_depth: 3
# temporal_depth: 4
dim_head: 32
heads: 8
ssm_depth: 2
ssm_d_state: 32
ssm_d_conv: 4
ssm_expand: 2
ssm_dropout: 0.0
batch_first: ${batch_first}
# fusion_type: film # cross_attention, enhanced_cross_attention, gated, film, hierarchical, adaptive, concat