# SVD: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt
# SVD 1.1:  https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt-1-1
 CUDA_VISIBLE_DEVICES=4,5,6,7 python -m torch.distributed.launch --nproc_per_node=4 --master_port=29501 train_light_control.py \
 --pretrained_model_name_or_path="stabilityai/stable-video-diffusion-img2vid" \
 --output_dir="model_out/ControlSVD-2024.12.15_light_control_reference_cross_atten" \
 --video_folder="/users/zeyuzhu/dataset_project/Datasets/fallowshow/datasets" \
 --ann_folder="/users/zeyuzhu/dataset_project/Datasets/fallowshow/datasets" \
 --width=576 \
 --height=320 \
 --num_frames=16\
 --learning_rate=2e-5 \
 --per_gpu_batch_size=1 \
 --num_train_epochs=500 \
 --mixed_precision="fp16" \
 --gradient_accumulation_steps=2 \
 --checkpointing_steps=2000 \
 --gradient_checkpointing \
 --report_to=wandb \
 --use_reference_image=True \
 --main_process_port 0 \


