_target_: src.model.eae.LitEquivariantAE
latent_dim: 32
num_patches: 49
enc_embed_dim: 128
dec_embed_dim: 64
latent_pooling: average
# adapter: lowrank
# rank_ratio: 4
adapter: straight

encoder:
  _target_: src.model.network.ViTEncoder
  img_size: 28
  patch_size: 4
  mlp_ratio: 4
  depth: 3
  embed_dim: ${model.enc_embed_dim}
  num_heads: 6
  img_channels: 1

decoder:
  _target_: src.model.network.ViTDecoder
  img_size: ${model.encoder.img_size}
  patch_size: ${model.encoder.patch_size}
  mlp_ratio: 4
  depth: 3
  embed_dim: ${model.dec_embed_dim}
  num_heads: 3
  img_channels: ${model.encoder.img_channels}