defaults:
  - _self_
  - model: small
  - override hydra/launcher: submitit_slurm

ngpus: 1
tokens: 50257

training:
  batch_size: 512
  diffusion_batch_size: 512
  accum: 1
  n_iters: 400001
  snapshot_freq: 1000
  log_freq: 50
  eval_freq: 1000
  snapshot_freq_for_preemption: 500
  weight: standard
  snapshot_sampling: False
  ema: 0.99999
  use_optimal_transport: True
  chunksize: 512
  ot_metric: input_embedding_l2

data:
  train: openwebtext
  valid: wikitext103
  cache_dir: data

graph:
  type: IED
  file: data
  report_all: False
  p_m: 0.85
  loss_type: cedd

noise:
  type: loglinear
  sigma_min: 1e-4
  sigma_max: 20

sampling:
  predictor: euler
  steps: 128
  noise_removal: True

eval:
  batch_size: 32
  perplexity: False
  perplexity_batch_size: 32

optim:
  weight_decay: 0
  optimizer: AdamW
  lr: 3e-4
  beta1: 0.9
  beta2: 0.999
  eps: 1e-8
  warmup: 2500
  grad_clip: 1.
  

hydra:
  run:
    dir: exp_local/${data.train}/${now:%Y.%m.%d}/${now:%H%M%S}
  sweep:
    dir: exp/${data.train}/${now:%Y.%m.%d}/${now:%H%M%S}
    subdir: ${hydra.job.num}
