export MODEL_NAME="stable-diffusion-v1-5/stable-diffusion-v1-5"
export OUTPUT_DIR="sft-out/pokemon"
export DATASET_NAME="./pokemon-blip-captions"

CUDA_VISIBLE_DEVICES=3 accelerate launch --mixed_precision="bf16"  train_text_to_image.py \
  --pretrained_model_name_or_path=$MODEL_NAME \
  --dataset_name=$DATASET_NAME \
  --dataloader_num_workers=8 \
  --resolution=512 --center_crop --random_flip \
  --train_batch_size=1 \
  --gradient_accumulation_steps=2 \
  --max_train_steps=7500 \
  --learning_rate=1e-04 \
  --max_grad_norm=1 \
  --lora_nums=3 \
  --lr_scheduler="cosine" --lr_warmup_steps=0 \
  --output_dir=${OUTPUT_DIR} \
  --checkpointing_steps=500 \
  --seed=1337
