#!/bin/bash

set -e

export DEBUG_MODE="true"
export LOG_PATH="./debug_log.txt"

export FORCE_QWENVL_VIDEO_READER=decord

MODEL_NAME="Qwen2.5-VL-7B-Instruct"

LR=1e-5

CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 \
    --nnodes=1 \
    --node_rank=0 \
    --master_addr=127.0.0.1 \
    --master_port=12349 \
    ./train/sft.py \
    --output_dir "./checkpoints/VideoTrace-R1-${MODEL_NAME}-sft" \
    --model_name_or_path "./pretrained/${MODEL_NAME}" \
    --dataset_name "./data/train/videotrace_r1_train_10k.jsonl" \
    --video_path '{"m-a-p/ScaleLong": "./data/videos/ScaleLong/", "STAR": "./data/videos/Charades_v1_480/", "ActivityNet": "./data/videos/ActivityNet_Captions/", "YouCook2": "./data/videos/YouCook2/", "LVBench": "./data/videos/LVBench/", "TutorialVQA": "./data/videos/TutorialVQA/"}' \
    --deepspeed "./deepspeed/zero2.json" \
    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 2 \
    --learning_rate ${LR} \
    --logging_steps 1 \
    --bf16 \
    --report_to tensorboard \
    --gradient_checkpointing true \
    --attn_implementation flash_attention_2 \
    --num_train_epochs 1 \
    --run_name VideoTrace-R1-${MODEL_NAME}-sft \
    --save_strategy epoch \
    --max_grad_norm 5 \
    --save_only_model true \
    --max_length 8192