defaults:
  - _self_
  - model: small
  - override hydra/launcher: submitit_slurm

ngpus: 4
tokens: 32

training:
  batch_size: 512
  accum: 1
  n_iters: 1300001
  snapshot_freq: 50000
  log_freq: 50
  eval_freq: 100
  snapshot_freq_for_preemption: 2000
  weight: standard
  snapshot_sampling: True
  ema: 0.9999

data:
  train: "sem"
  valid: "sem"
  cache_dir: data
  hf_dataset: False

graph:
  type: absorb
  file: data
  report_all: False

noise:
  type: loglinear
  sigma_min: 1e-4
  sigma_max: 20

sampling:
  predictor: euler
  steps: 128
  noise_removal: True

eval:
  batch_size: 512
  perplexity: True
  perplexity_batch_size: 32

optim:
  weight_decay: 0
  optimizer: AdamW
  lr: 3e-4
  beta1: 0.9
  beta2: 0.999
  eps: 1e-8
  warmup: 2500
  grad_clip: 1.


hydra:
  run:
    dir: dir/0
  sweep:
    dir: dir/${data.train}-${model.name}/
  launcher:
    # max_num_timeout: 1440
    timeout_min: 180
    partition: short-unkillable
    mem_gb: 96
    cpus_per_task: 8
    gpus_per_node: ${ngpus}
    constraint: hopper
