# config.yaml

name: "ptb_text_med_j"  

optimizer_params:
  iamsadam_learning_rate: 1.0
  adam_learning_rate: 1.e-3
  sgd_learning_rate: 0.05
  iams_learning_rate: 1.0

training_params:
  batch_size: 8
  num_epochs: 1
  max_length: 512
  warm_up_percent: 0.2
  warm_up_peak_mult: 5.0 

gpt_model:
  teacher_model: "EleutherAI/gpt-j-6B"  # Or 'distilgpt2', 'gpt2', 'gpt-j-6B' etc.
  n_embd: 768    # Hidden size used in distilgpt2
  n_layer: 4    # Number of layers in distilgpt2
  n_head: 8    # Number of attention heads in distilgpt2
  vocab_size: 50304

dataset:
  name: "ptb_text_only"