# @package model
t2tattn1_cfg:
  _target_: src.models.attention.performer_attention.PerformerAttention
  dim_heads: 64
  nb_features: 49  # seqlen = 3136, and we want to use 1/32 of the memory
  # Very important that we set eps to very small since we care about approximation quality
  softmax_eps: 0.0
  normalization_eps: 0.0
t2tattn2_cfg:
  _target_: ${..t2tattn1_cfg._target_}
  dim_heads: ${..t2tattn1_cfg.dim_heads}
  nb_features: 12  # seqlen = 784, and we want to use 1/32 of the memory
  softmax_eps: ${..t2tattn1_cfg.softmax_eps}
  normalization_eps: ${..t2tattn1_cfg.normalization_eps}
