_target_: src.models.modules.sequence_to_sequence_models.encoder_decoder.Bart
key: bart

config:
  _target_: transformers.BartConfig
  # Maximum sequence length that this model might ever be used with
  vocab_size: 20
  d_model: 256
  encoder_layers: 8
  decoder_layers: 8
  encoder_attention_heads: 4
  decoder_attention_heads: 4
  decoder_ffn_dim: 256
  encoder_ffn_dim: 256
  activation_function: 'gelu'
  max_position_embeddings: 100

  encoder:
    hidden_size: ${model.sequence_to_sequence_model.config.d_model}
  decoder:
    hidden_size: ${model.sequence_to_sequence_model.config.d_model}