bf16: true
data:
  alpha: 100000.0
  batch_size: 128
  config:
    n_adjectives: 10
    n_adverbs: 10
    n_conjunctions: 10
    n_nouns: 10
    n_prepositions: 0
    n_pronouns: 10
    n_verbs: 10
    p_conjunctions: 0.15
    relative_clauses: false
    transitivity: false
  language: english
  max_sample_length: 128
  num_iters: 1000000.0
  num_workers: 0
  prior_type: dirichlet
deploy: true
device: cuda:1
epochs: 1
eval:
  grammar: true
  save_tables: false
log:
  eval_interval: 100
  free: false
  log_interval: 10
  save_interval: 100
  save_multiple: false
model:
  bias: false
  compile: false
  context_size: 256
  dropout: 0.0
  mlp: true
  n_embd: 128
  n_head: 2
  n_layer: 2
  pretrain_dir: ./pretrained/latest_ckpt.pt
  use_pretrained: false
optimizer:
  beta1: 0.9
  beta2: 0.95
  decay_lr: true
  grad_clip: 1.0
  learning_rate: 0.0001
  min_lr: 9.0e-05
  warmup_iters: 200
  weight_decay: 0.0001
seed: 42
tag: scratch
