# config.yaml

name: "tiny_shakespeare_med"

optimizer_params:
  iamsadam_learning_rate: 1.0
  adam_learning_rate: 1.e-3
  sgd_learning_rate: 0.05
  iams_learning_rate: 1.0


training_params:
  batch_size: 8
  num_epochs: 1
  max_length: 512
  warm_up_percent: 0.2
  warm_up_peak_mult: 3.0  #  grid-search 1.2, 1.5, 2, 3, 5
  adam_warm_up_peak_mult: 1.5

gpt_model:
  teacher_model: "gpt2-medium"  #  'gpt2-medium', etc.
  n_embd: 768    # Hidden size used in distilgpt2
  n_layer: 4    # Number of layers in distilgpt2
  n_head: 8    # Number of attention heads in distilgpt2
  vocab_size: 50304

dataset:
  name: "tiny_shakespeare" 