defaults:
  - _self_


model:
  name: "transformer"
  tokenizer: "numerical"
  max_length: 128
  target_max_length: 4
  depth: 4
  width: 384
  num_heads: 6
  dropout: 0.0
  use_abs_pos_emb: True
  pos_emb: "abs"
  glu: False
  causal: True
  autoregressive: True
  load_model: null


debug: False
use_wandb: False
root: "./"
seed: 1

data:
  name: "combo"
  data_dir: "data/${data.name}"
  train_data_name: ${data.val_data_name}
  val_data_name: null
  edge_shuffle_rule: "bylayer"
  params: null
  

train:
  train_online: True
  max_iters: 100000
  num_paths: 1
  batch_size: 128
  num_epochs: 10
  num_workers: null
  offline:
    sample_size: 180000
    load_data: False
  online:
    chunk_size: 64000
    num_chunks: 8
    generate_chunks_first: True
    shuffle_input: False
    sample_response: True

optimizer:
  learning_rate: 1e-4
  weight_decay: 0.01
  gradient_accumulation_steps: 1
  max_grad_norm: 1.0
  warmup_steps: 1
  decay_lr: False
  lr_decay_iters: 25000
  min_lr: 1e-5
  betas: [0.9, 0.95]
  use_amp: True

eval:
  load_data: False
  evaluate_train: False
  sample_size: 1024
  prompt_batch_size: 128
  batch_size: null
  log_interval: 100
  eval_interval: 1000
  logprob_interval: 1000
  generation_interval: -1
  save_generations: False
  logprob_dir: "logprobs/${wandb.run_name}"
  checkpoint_dir: "checkpoints/${wandb.run_name}"
  n_responses: 4
  n_table: ${eval.n_responses}
  save_model_interval: -1


wandb:
  project: "coverage-experiments"
  run_name: null
  host: https://api.wandb.ai
  key: null

hydra:
  run:
    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
  sweep:
    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
    subdir: ${hydra.job.num} 