model_name_or_path: your_name
model_revision: main
use_flash_attn: true
gradient_checkpointing: true
tokenizer_name: your_name
use_slow_tokenizer: true
dataset_name: HuggingFaceH4/ultrafeedback_binarized
max_seq_length: 2048
preprocessing_num_workers: 16
per_device_train_batch_size: 1
gradient_accumulation_steps: 4 # designed for 8 GPUs, so batch size 32
learning_rate: 5.0e-7
lr_scheduler_type: linear
warmup_ratio: 0.1
weight_decay: 0.0
num_train_epochs: 3
output_dir: output/dpo_7b_recreate2/
with_tracking: true
report_to:
  - wandb
logging_steps: 1