deepspeed run_dpo.py \
        --deepspeed ./dpo_config/deepspeed/zero3.json \
        --model_name_or_path /models/Qwen/Qwen-VL-Chat  \
        --image_folder <DATA_FOLDER>/llava_data/ \
      --data_path /<DATA_FOLDER>/hadpo-data/hadpo/llava-v1.5/pope+desc_data.json \
        --fp16 True \
        --bf16 False \
        --tf32 False \
        --output_dir /output/qwen-vl/dpo_hadpo_local \
        --num_train_epochs 1  \
        --per_device_train_batch_size 1  \
        --per_device_eval_batch_size 1 \
        --gradient_accumulation_steps 1 \
        --evaluation_strategy "steps" \
        --eval_steps 10 \
        --save_strategy "steps" \
        --save_steps 10 \
        --save_total_limit 10 \
        --learning_rate 1e-5 \
        --weight_decay 0.05 \
        --warmup_ratio 0.1 \
        --adam_beta2 0.98 \
        --lr_scheduler_type "cosine" \
        --logging_steps 10 \
        --model_max_length 2048 \
        --gradient_checkpointing True \
        --use_lora True \
        --report_to wandb \
        --logging_first_step True \
        --remove_unused_columns False \
        --dataset_debug True
        # --unk_data_path <DATA_FOLDER>/vqav2/vqa_k_trainval_noun_100k_gpt4_clean_dedup_sampled_1_sft_llava.dpo_debug \
        # --unk_image_folder <DATA_FOLDER>/vqav2/remove_anything/lama-gpt4v_gen_q \

        # --data_path /<DATA_FOLDER>/MMInstruction/vlfeedback_80k_debug.jsonl \
        # --image_folder /<DATA_FOLDER>/MMInstruction/merged_images \
        # --unk_data_path <DATA_FOLDER>/vqav2/vqa_k_trainval_noun_100k_gpt4_clean_dedup_sampled_1_sft_llava.dpo_debug \
        # --unk_image_folder <DATA_FOLDER>/vqav2/remove_anything/lama-gpt4v_gen_q \
