_target_: nn.neomlp.NeoMLP
_recursive_: False
num_inputs: ${data.num_dims}
num_outputs: ${data.num_channels}
num_nodes: 16
trainable_features: True
requires_grad: True
embed_dim: 128
use_pos_embedding: True
pos_embedding_dim: 128
pos_embedding_sigma: 20.0
single_input_embedding: False
squeeze_output: False
signals_to_fit: ${data.num_images}
shared_hidden_embeddings: False
shared_output_embeddings: False
shared_head_embeddings: False
num_classes: ${data.num_classes}
compressed_embed_dim: 0
init_sigma: 0.001
input_init_sigma: 1.0
neomlp_attention:
  _target_: nn.neomlp_attention.NeoMLPAttention
  embed_dim: ${model.embed_dim}
  num_heads: 4
  num_layers: 3
  shared_weights: False
  use_linear_attention: True
  use_ffn: True
  ffn_dim: ${model.embed_dim}
  dropout: 0.0
  use_layer_norm: False
