#!/bin/bash

python dpo.py \
  --dataset_name /path/to/preference_pairs.csv \
  --pretrained_model_name_or_path /path/to/stable-diffusion-model \
  --output_dir /path/to/output_directory \
  --train_batch_size 1 \
  --gradient_accumulation_steps 1 \
  --max_train_steps 3000 \
  --checkpointing_steps 500 \
  --learning_rate=1e-8 --scale_lr \
  --dataloader_num_workers=16 \
  --lr_scheduler="constant_with_warmup" --lr_warmup_steps=500 \
  --beta_dpo 5000 \
  --mixed_precision="fp16" \
  --resolution 512