#!/bin/bash

set -e

export DEBUG_MODE="true"
export LOG_PATH="./debug_log.txt"

export FORCE_QWENVL_VIDEO_READER=decord

CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 \
    --nnodes=1 \
    --node_rank=0 \
    --master_addr=127.0.0.1 \
    --master_port=12365 \
    ./train/grpo.py \
    --output_dir "./checkpoints/videotrace-r1-7b" \
    --model_name_or_path "./checkpoints/VideoTrace-R1-Qwen2.5-VL-7B-Instruct-sft" \
    --dataset_name "./data/train/videotrace_r1_train_10k.jsonl" \
    --video_path '{"m-a-p/ScaleLong": "./data/videos/ScaleLong/", "STAR": "./data/videos/Charades_v1_480/", "ActivityNet": "./data/videos/ActivityNet_Captions/", "YouCook2": "./data/videos/YouCook2/", "LVBench": "./data/videos/LVBench/", "TutorialVQA": "./data/videos/TutorialVQA/"}' \
    --deepspeed "./deepspeed/zero3.json" \
    --max_prompt_length 16384 \
    --max_completion_length 1024 \
    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --learning_rate 5e-6 \
    --lr_scheduler_type "cosine" \
    --weight_decay 0.01 \
    --bf16 \
    --logging_steps 1 \
    --gradient_checkpointing true \
    --attn_implementation flash_attention_2 \
    --max_pixels 401408 \
    --num_train_epochs 2 \
    --run_name videotrace-r1-grpo \
    --save_strategy steps \
    --save_steps 500 \
    --beta 0.04 \
    --max_grad_norm 5 \
    --save_only_model false \
    --num_generations 8
