WANDB_MDOE=offline srun -p mllm_safety --quotatype=reserved --gres=gpu:1 --cpus-per-task=8 --time=30000 accelerate launch \
    --config_file scripts/accelerate_configs/single_gpu.yaml --num_processes 1 \
    examples/sft/sft_vlm.py \
    --dataset_name /mnt/lustrenew/mllm_safety-shared/datasets/huggingface/HuggingFaceH4/llava-instruct-mix-vsft \
    --model_name_or_path /mnt/lustrenew/mllm_safety-shared/models/huggingface/meta-llama/Llama-3.2-11B-Vision-Instruct \
    --use_peft \
    --lora_target_modules all-linear \
    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --output_dir ./model/ \
    --bf16 \
    --torch_dtype bfloat16 \
    --logging_strategy steps \
    --logging_steps 1
