accelerate launch encoder_based_method.py \
    --model-path="llava-v1.5-7b" \
    --ref_img "data/CelebAMask-HQ/CelebA-HQ-img/1.jpg" \
    --tgt_img "data/CelebAMask-HQ/CelebA-HQ-img/1.jpg" \
    --load-8bit \
    --prompt="Can you spot <sks> in this photo?" \
    --output_dir="exp/encoder_based_v3_0.5/"\
    --logging_dir="exp/encoder_based_v3_0.5/log"\
    --gradient_accumulation_steps=1 \
    --mixed_precision="fp16" \
    --num_train_steps=10000 \
    --img_dir="data/CelebAMask-HQ/CelebA-HQ-img" \
    --importance_weight=3.0 \
    --infer_ref_img="ref_img.png" \
    --infer_query_img="query_img.png" \
    --checkpoint_path="checkpoint_100000.ckpt" \
    --question="What is the reason why you think this person is <sks>?" \
    --task="infer"