# config.yaml
name: "wikitext-2"

optimizer_params:
  iamsadam_learning_rate: 1.0
  adam_learning_rate: 1.e-3
  sgd_learning_rate: 0.05
  iams_learning_rate: 1.0

training_params:
  batch_size: 8
  num_epochs: 1
  max_length: 512
  warm_up_percent: 0.2
  warm_up_peak_mult: 2.0 
  
gpt_model:
  teacher_model: "gpt2-large"  # Or 'distilgpt2', 'gpt2', etc.
  n_embd: 768    # Hidden size used in distilgpt2
  n_layer: 6    # Number of layers in distilgpt2
  n_head: 12    # Number of attention heads in distilgpt2
  vocab_size: 50304

dataset:
  name: "wikitext-2-raw-v1"