# These are the huggingface bert parameters

model_type: "gpt2"

n_ctx: 1024
n_embd: 768
n_head: 12
n_layer: 12
n_positions: ${data.seq_length} # max seq length that the positional embedding is instantiated for


activation_function: "gelu_new"
attn_pdrop: 0.1
resid_pdrop: 0.1
embd_pdrop: 0.1
initializer_range: 0.02
layer_norm_epsilon: 1e-05




summary_activation: null
summary_first_dropout: 0.1
summary_proj_to_labels: true
summary_type: "cls_index"
summary_use_proj: true

bos_token_id: 50256
eos_token_id: 50256
