model:
  _set_:
    name: llama3_1_70b_instruct
gang:
  _set_:
    tensor_parallel_size: 8
common:
  metric_recorders:
    wandb:
      _set_:
        run: test_dpo
dataset:
  _set_:
    source_encode_mode: as_is
    max_num_tokens: 4096
    batch_size: 1
    min_seq_len: 1
    max_seq_len: 4096
criterion:
  _set_:
    name: dpo
    config:
      reference_model:
        name: llama3_1_70b_instruct
      reference_dtype: bfloat16
      beta: 0.1
      nll_scale: 0.0
      length_normalization: false
trainer:
  _set_:
    gradient_accumulation: 8
optimizer:
  config:
    _set_:
      betas:
      - 0.9
      - 0.95
      eps: 1.0e-08
      lr: 1.0e-06
      weight_decay: 0.1
lr_scheduler:
  config:
    _set_:
      final_lr: 1.0e-07
      final_lr_scale: null
      num_warmup_steps: 50
