deepspeed --master_port=28500 train_reward.py \
    --lora_enable True \
    --vision_lora False \
    --freeze_vision_tower False \
    --freeze_llm False \
    --tune_merger True \
    --fps 2 \
    --max_frame_pixels 200704 \
    --sample_type "uniform" \
    --lora_r 64 \
    --lora_alpha 128 \
    --lora_namespan_exclude "['lm_head', 'rm_head', 'embed_tokens']" \
    --bf16 True \
    --torch_dtype "bfloat16" \
    --num_lora_modules -1 \
    --model_name_or_path Qwen/Qwen2-VL-2B-Instruct \
    --meta_data "/path/to/your_code/videoreward/datasets/train/example.csv" \
    --meta_data_test "/path/to/your_code/videoreward/datasets/train/example.csv" \
    --data_dir "/path/to/your_code/videoreward/datasets/train" \
    --output_dir rm_output \
    --eval_dim "VQ" "MQ" "TA" \
    --output_dim 3 \
    --use_special_tokens True \
    --reward_token "special" \
    --loss_type "btt" \
    --use_tied_data True \
    --prompt_template_type "detailed_special" \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 4 \
    --num_train_epochs 3 \
    --learning_rate 2e-6 \
    --merger_lr 2e-6 \
    --vision_lr 2e-6 \
    --special_token_lr 2e-6 \
    --report_to tensorboard \
    --warmup_ratio 0.05 \
    --lr_scheduler_type "constant_with_warmup" \
    --eval_strategy "steps" \
    --logging_epochs 0.01 \
    --eval_epochs 0.1 \
    --save_epochs 0.25 \
    --max_length 6144 \
    --gradient_checkpointing False \
    --deepspeed ds_config/zero0.json \
    --save_only_model True \
    --save_full_model False \
    --dataloader_num_workers 8
