bert_gpt2:
  _target_: src.models.modules.sequence_to_sequence_models.encoder_decoder.EncoderDecoder

  config_encoder:
    _target_: transformers.BertConfig
    # Maximum sequence length that this model might ever be used with

    vocab_size: 20
    hidden_size: 300
    num_hidden_layers: 2
    num_attention_heads: 4
    intermediate_size: 512
    max_position_embeddings: 100
    hidden_act: 'gelu'
    

  config_decoder:
    _target_: transformers.GPT2Config
    
    vocab_size: 20
    n_positions: 100
    n_embd: 400
    n_layer: 2
    n_head: 4
    activation_function: 'gelu_new'