name: nanogpt
n_embd: 768
n_head: 6
n_layer: 12

model:
    _target_: mtp.models.lm.LM
    lm:
        _target_: mtp.models.gpt.GPT
        vocab_size: ${data.vocab_size}
        n_embd: ${lm.n_embd}
        n_head: ${lm.n_head}
        n_layer: ${lm.n_layer}
    encoder_only: true
    from_checkpoint: null
    from_huggingface: null
    adaptor_kwargs: ${adaptor.hyperparameters}
    ref_enc: encoder
    ref_head: head
    freeze: false
