_target_: the_well.benchmark.models.CViT
patch_size: [1, 16, 16]
grid_size: [128, 384]
latent_dim: 512
dec_emb_dim: 512
dec_num_heads: 16
dec_depth: 1
out_dim: 4
emb_dim: 384
depth: 5
num_heads: 6
mlp_ratio: 1
input_time: 6
flexi: true
