defaults:
  - _self_
  - override hydra/launcher: submitit_slurm

compute:
  ngpus: 1
  nodes: 1

logging:
  log_freq: 100
  log_lr_every: ${logging.log_freq}
  log_file_name: stdout.log
  enable_wandb: False
  entity: zrfnz
  project: flow_matching
  group: null

data:
  train: wikitext 
  valid: wikitext
  cache_dir:
  num_workers: 16

training:
  batch_size: 8
  snapshot: 50000
  eval_freq: 10000
  perplexity_freq: 10000
  seed: 123

eval:
  batch_size: 8
  sample_batch_size: 8
  perplexity: True
  perplexity_batch_size: 8

optim:
  weight_decay: 0.03
  optimizer: AdamW
  lr: 3e-4
  beta1: 0.9
  beta2: 0.95
  eps: 1e-8
  warmup: 2500
  grad_clip: 1.
  eta_min_ratio: 0.1
  fused: false
  n_iters: 200000
  log_lr_every: ${logging.log_lr_every}

flow:
  source_distribution: uniform  # [uniform, mask]
  loss_function: cross_entropy  # [cross_entropy, generalized_kl]
  exponent: 1.
  scheduler_type: polynomial
  sampling_steps: 1024

model:
  hidden_size: 384
  cond_dim: 128
  img_cond_dim: 1024
  length: 1024
  n_blocks: 12
  n_heads: 12
  dropout: 0.1
  compile: true

hydra_dir:

hydra:
  run:
    dir: ${hydra_dir}/${now:%Y.%m.%d}/${now:%H%M%S}
  sweep:
    dir: ${hydra_dir}/${now:%Y.%m.%d}/${now:%H%M%S}
    subdir: ${hydra.job.num}
  launcher:
    max_num_timeout: 100000
    timeout_min: 4320
    partition: learn
    qos: # TODO: change it to your own qos
    gpus_per_node: ${compute.ngpus}
    mem_gb: 12
    cpus_per_task: 24
    nodes: ${compute.nodes}
