save_weights = true
trials = 3
seed_source = "random"

[data]
file = [
  "data/pubmed/100-tokens/(0...10).txt",
  "data/pubmed/1000-tokens/(0...10).txt",
  "data/pubmed/10000-tokens/(0...10).txt",
  "data/random-words/100-tokens/(0...10).txt",
  "data/random-words/1000-tokens/(0...10).txt",
  "data/random-words/10000-tokens/(0...10).txt",
  "data/random-bytes/100-tokens/(0...10).txt",
  "data/random-bytes/1000-tokens/(0...10).txt",
  "data/random-bytes/10000-tokens/(0...10).txt",
]
prompt_type = "uuid"

[training]
maximum_epochs = 10_000
learning_rate = 2e-8
report_interval = 50
batch_size = 2
# learning rate scheduler
lr_scheduler_type = "reduce-on-plateau"
plateau_reduce_factor = 0.31622776602  # 1/sqrt(10)

[training.clipping]
algorithm = "norm"
value = 1e5

[model]
language_model_name_or_path = "gpt2"
intrinsic_dimension = [ 
  1_000, 
  3_000, 
  10_000, 
  30_000, 
  100_000 
]
dropout = 0.0
normalized = false

[tokenizer]
variety = "pretrained"
pretrained = "gpt2"
